Case study n. 3: Integration of methylation and expression for ACC
开始接触TCGA数据,想学习如何下载、处理、分析这些数据。在目前的常用分析包中选中了R包TCGAbiolinks。 下面就记录过程,实际上本身TCGAbiolinks的官方教程就比较完整,我就按照官方教程学习如何处理然后整合weinfo提供的docker封装环境进行的分析。
# 1
library(TCGAbiolinks)
library(SummarizedExperiment)
#-----------------------------------
# STEP 1: Search, download, prepare |
#-----------------------------------
# 1.1 - DNA methylation
# ----------------------------------
query.met <- GDCquery(project = "TCGA-ACC",
legacy = TRUE,
data.category = "DNA methylation",
platform = "Illumina Human Methylation 450")
GDCdownload(query.met)
acc.met <- GDCprepare(query = query.met,
save = TRUE,
save.filename = "accDNAmet.rda",
summarizedExperiment = TRUE)
#-----------------------------------
# 1.2 - RNA expression
# ----------------------------------
query.exp <- GDCquery(project = "TCGA-ACC",
legacy = TRUE,
data.category = "Gene expression",
data.type = "Gene expression quantification",
platform = "Illumina HiSeq",
file.type = "results")
GDCdownload(query.exp)
acc.exp <- GDCprepare(query = query.exp, save = TRUE, save.filename = "accExp.rda")
# 2
# na.omit
acc.met <- subset(acc.met,subset = (rowSums(is.na(assay(acc.met))) == 0))
# Volcano plot
acc.met <- TCGAanalyze_DMR(acc.met, groupCol = "subtype_MethyLevel",
group1 = "CIMP-high",
group2="CIMP-low",
p.cut = 10^-5,
diffmean.cut = 0.25,
legend = "State",
plot.filename = "CIMP-highvsCIMP-low_metvolcano.png")
# 3
#-------------------------------------------------
# 2.3 - DEA - Expression analysis - volcano plot
# ------------------------------------------------
acc.exp.aux <- subset(acc.exp,
select = colData(acc.exp)$subtype_MethyLevel %in% c("CIMP-high","CIMP-low"))
idx <- colData(acc.exp.aux)$subtype_MethyLevel %in% c("CIMP-high")
idx2 <- colData(acc.exp.aux)$subtype_MethyLevel %in% c("CIMP-low")
dataPrep <- TCGAanalyze_Preprocessing(object = acc.exp.aux, cor.cut = 0.6)
dataNorm <- TCGAanalyze_Normalization(tabDF = dataPrep,
geneInfo = geneInfo,
method = "gcContent")
dataFilt <- TCGAanalyze_Filtering(tabDF = dataNorm,
qnt.cut = 0.25,
method='quantile')
dataDEGs <- TCGAanalyze_DEA(mat1 = dataFilt[,idx],
mat2 = dataFilt[,idx2],
Cond1type = "CIMP-high",
Cond2type = "CIMP-low",
method = "glmLRT")
TCGAVisualize_volcano(x = dataDEGs$logFC,
y = dataDEGs$FDR,
filename = "Case3_volcanoexp.png",
x.cut = 3,
y.cut = 10^-5,
names = rownames(dataDEGs),
color = c("black","red","darkgreen"),
names.size = 2,
xlab = " Gene expression fold change (Log2)",
legend = "State",
title = "Volcano plot (CIMP-high vs CIMP-low)",
width = 10)
# 4 这部分很可惜作者也已经指出不再支持了
#------------------------------------------
# 2.4 - Starburst plot
# -----------------------------------------
# If true the argument names of the geenes in circle
# (biologically significant genes, has a change in gene
# expression and DNA methylation and respects all the thresholds)
# will be shown
# these genes are returned by the function see starburst object after the function is executed
starburst <- TCGAvisualize_starburst(met = acc.met,
exp = dataDEGs,
genome = "hg19",
group1 = "CIMP-high",
group2 = "CIMP-low",
filename = "startburst.png",
met.platform = "450k",
met.p.cut = 0.00005,
exp.p.cut = 0.00005,
diffmean.cut = 0.25,
logFC.cut = 3,
names = FALSE,
height = 10,
width = 15,
dpi = 300)
请关注“恒诺新知”微信公众号,感谢“R语言“,”数据那些事儿“,”老俊俊的生信笔记“,”冷🈚️思“,“珞珈R”,“生信星球”的支持!