TCGAbiolinks-Case study n. 4: Elmer pipeline – KIRC
开始接触TCGA数据,想学习如何下载、处理、分析这些数据。在目前的常用分析包中选中了R包TCGAbiolinks。 下面就记录过程,实际上本身TCGAbiolinks的官方教程就比较完整,我就按照官方教程学习如何处理然后整合weinfo提供的docker封装环境进行的分析。
# 1
library(TCGAbiolinks)
library(SummarizedExperiment)
library(ELMER)
library(parallel)
#-----------------------------------
# STEP 1: Search, download, prepare |
#-----------------------------------
# 1.1 - DNA methylation
# ----------------------------------
query.met <- GDCquery(project = "TCGA-KIRC",
data.category = "DNA Methylation",
platform = "Illumina Human Methylation 450")
GDCdownload(query.met)
kirc.met <- GDCprepare(query = query.met,
save = TRUE,
save.filename = "kircDNAmet.rda",
summarizedExperiment = TRUE)
# 2
# Step 1.2 download expression data
#-----------------------------------
# 1.2 - RNA expression
# ----------------------------------
query.exp <- GDCquery(project = "TCGA-KIRC",
data.category = "Transcriptome Profiling",
data.type = "Gene Expression Quantification",
workflow.type = "HTSeq - FPKM-UQ")
GDCdownload(query.exp)
kirc.exp <- GDCprepare(query = query.exp,
save = TRUE,
save.filename = "kircExp.rda")
kirc.exp <- kirc.exp
# 3
distal.probes <- get.feature.probe(genome = "hg38", met.platform = "450K")
# 4
library(MultiAssayExperiment)
mae <- createMAE(exp = kirc.exp,
met = kirc.met,
save = TRUE,
linearize.exp = TRUE,
filter.probes = distal.probes,
save.filename = "mae_kirc.rda",
met.platform = "450K",
genome = "hg38",
TCGA = TRUE)
# Remove FFPE samples
mae <- mae[,!mae$is_ffpe]
# 5
group.col <- "definition"
group1 <- "Primary solid Tumor"
group2 <- "Solid Tissue Normal"
direction <- "hypo"
dir.out <- file.path("kirc",direction)
dir.create(dir.out, recursive = TRUE)
#--------------------------------------
# STEP 3: Analysis |
#--------------------------------------
# Step 3.1: Get diff methylated probes |
#--------------------------------------
sig.diff <- get.diff.meth(data = mae,
group.col = group.col,
group1 = group1,
group2 = group2,
minSubgroupFrac = 0.2,
sig.dif = 0.3,
diff.dir = direction, # Search for hypomethylated probes in group 1
cores = 1,
dir.out = dir.out,
pvalue = 0.01)
#-------------------------------------------------------------
# Step 3.2: Identify significant probe-gene pairs |
#-------------------------------------------------------------
# Collect nearby 20 genes for Sig.probes
nearGenes <- GetNearGenes(data = mae,
probes = sig.diff$probe,
numFlankingGenes = 20)
pair <- get.pair(data = mae,
group.col = group.col,
group1 = group1,
group2 = group2,
nearGenes = nearGenes,
minSubgroupFrac = 0.4, # % of samples to use in to create groups U/M
permu.dir = file.path(dir.out,"permu"),
permu.size = 100, # Please set to 100000 to get significant results
raw.pvalue = 0.05,
Pe = 0.01, # Please set to 0.001 to get significant results
filter.probes = TRUE, # See preAssociationProbeFiltering function
filter.percentage = 0.05,
filter.portion = 0.3,
dir.out = dir.out,
cores = 4,
label = direction)
# Identify enriched motif for significantly hypomethylated probes which
# have putative target genes.
enriched.motif <- get.enriched.motif(data = mae,
probes = pair$Probe,
dir.out = dir.out,
label = direction,
min.incidence = 10,
lower.OR = 1.1)
TF <- get.TFs(data = mae,
group.col = group.col,
group1 = group1,
group2 = group2,
minSubgroupFrac = 0.4,
enriched.motif = enriched.motif,
dir.out = dir.out,
cores = 4,
label = direction)
# 6
scatter.plot(data = mae,
byProbe = list(probe = sig.diff$probe[1], numFlankingGenes = 20),
category = "definition",
dir.out = "plots",
lm = TRUE, # Draw linear regression curve
save = TRUE)
# 7
scatter.plot(data = mae,
byTF = list(TF = c("RUNX1","RUNX2","RUNX3"),
probe = enriched.motif[[names(enriched.motif)[10]]]),
category = "definition",
dir.out = "plots",
save = TRUE,
lm_line = TRUE)
# 8 以下过程并没有运行成功,报错: filename = "heatmap.pdf")
# Ordering groups
# Error in hclust(., method = "average") :
# must have n >= 2 objects to cluster
heatmapPairs(data = mae,
group.col = "definition",
group1 = "Primary solid Tumor",
annotation.col = c("gender"),
group2 = "Solid Tissue Normal",
pairs = pair,
filename = "heatmap.pdf")
请关注“恒诺新知”微信公众号,感谢“R语言“,”数据那些事儿“,”老俊俊的生信笔记“,”冷🈚️思“,“珞珈R”,“生信星球”的支持!