ggcyto 优雅的可视化流式细胞数据
不忘初心,方得始终

1引言
ggcyto 是一个围绕 ggplot 和图形范式语法构建的数据可视化分析工具。该软件扩展了许多数据科学家已经熟悉的流行的 ggplot2 框架,使其能够识别核心的 Bioconductor 流式细胞术数据结构,从而获得封闭式和注释式的流式细胞数据。
它简化了流动数据的可视化和绘制,以便用于出版物质量图形。有三种方法可以构建 ggcyto 图。每一个都代表了不同程度的复杂性和灵活性。它们满足各种绘图应用程序的需要,因此适合不同编码技能水平的用户。
三种方法:
使用 autoplot
快速绘图。
ggcyto + flowSet
/ggcyto + GatingSet
灵活绘图。
ggplot + flowSet1d
/ggplot + flowSet2d
/ggplot + flowSet + gate
/ggplot + flowSet + overlay
更加多方面的灵活绘图和控制。
2安装
# install
BiocManager::install("ggcyto")
BiocManager::install('openCyto')
BiocManager::install("flowWorkspaceData")
3特点介绍
有三种水平来进行绘图。
# set working dir
library(ggcyto)
dataDir <- system.file("extdata",package="flowWorkspaceData")
ggplot 低级水平
GatingSet 数据:
gs <- load_gs(list.files(dataDir, pattern = "gs_manual",full = TRUE))
attr(gs, "subset") <- "CD3+"
ggplot(gs, aes(x = `<B710-A>`, y = `<R780-A>`)) +
geom_hex(bins = 128) +
scale_fill_gradientn(colours = gray.colors(9))

flowSet/ncdfFlowSet/flowFrame, 密度图:
# flowSet/ncdfFlowSet/flowFrame
fs <- gs_pop_get_data(gs, "CD3+")
ggplot(fs, aes(x = `<B710-A>`)) +
geom_density(fill = "blue", alpha= 0.5)

gates:
# gates
gates <- filterList(gs_pop_get_gate(gs, "CD8"))
ggplot(gs, aes(x = `<B710-A>`, y = `<R780-A>`)) +
geom_hex(bins = 128) +
geom_polygon(data = gates, fill = NA, col = "purple")

ggcyto 中级水平
ggcyto(gs, aes(x = CD4, y = CD8)) +
geom_hex(bins = 128) +
geom_gate("CD8")

autoplot 高级水平
一维和二维:
#1d
autoplot(fs, "CD4")
#2d
autoplot(fs, "CD4", "CD8", bins = 64)

autoplot(gs, c("CD4", "CD8"), bins = 64)

不同的轴尺度缩放
原始数据:
data(GvHD)
fr <- GvHD[[1]]
p <- autoplot(fr, "FL1-H")
p #raw scale

三种不同的缩放样式:
p + scale_x_logicle() #flowCore logicle scale
p + scale_x_flowJo_fasinh() # flowJo fasinh
p + scale_x_flowJo_biexp() # flowJo biexponential

通用的 geom_gate 图层
垂直:
fr <- fs[[1]]
p <- autoplot(fr,"CD4", "CD8") + ggcyto_par_set(limits = "instrument")
#1d gate vertical
gate_1d_v <- openCyto::gate_mindensity(fr, "<B710-A>")
p + geom_gate(gate_1d_v)

水平:
#1d gate horizontal
gate_1d_h <- openCyto::gate_mindensity(fr, "<R780-A>")
p + geom_gate(gate_1d_h)

二维矩形:
#2d rectangle gate
gate_rect <- rectangleGate("<B710-A>" = c(gate_1d_v@min, 4e3),
"<R780-A>" = c(gate_1d_h@min, 4e3))
p + geom_gate(gate_rect)

椭圆形:
#ellipsoid Gate
gate_ellip <- gh_pop_get_gate(gs[[1]], "CD4")
p + geom_gate(gate_ellip)

geom_stats 统计图层
p <- ggcyto(gs, aes(x = "CD4", y = "CD8"), subset = "CD3+") +
geom_hex()
p + geom_gate("CD4") + geom_stats()

展示细胞数量:
p + geom_gate("CD4") + geom_stats(type = "count") #display cell counts

ggcyto_layout 分面
在绘制 GatingHierarchy 时,多个细胞种群及其相关的可以被绘制在同一图的不同面板中:
gh <- gs[[1]]
nodes <- gs_get_pop_paths(gh, path = "auto")[c(3:9, 14)]
nodes
[1] "singlets" "CD3+" "CD4" "CD4/38- DR+" "CD4/38+ DR+" "CD4/38+ DR-"
[7] "CD4/38- DR-" "CD8"
p <- autoplot(gh, nodes, bins = 64)
p

绘制在一行:
gt <- ggcyto_arrange(p, nrow = 1)
plot(gt)

组合 density:
p2 <- autoplot(gh_pop_get_data(gh, "CD3+")[,5:8]) # some density plot
p2@arrange.main <- ""#clear the default title
gt2 <- ggcyto_arrange(p2, nrow = 1)
gt3 <- gridExtra::gtable_rbind(gt, gt2)
plot(gt3)

还有其它功能,大家可以去学习,这里就不一一介绍了。
4autoplot 快速绘图
flowSet:
一维密度图:
library(ggcyto)
dataDir <- system.file("extdata",package="flowWorkspaceData")
gs <- load_gs(list.files(dataDir, pattern = "gs_bcell_auto",full = TRUE))
data(GvHD)
fs <- GvHD[subset(pData(GvHD), Patient %in%5 & Visit %in% c(5:6))[["name"]]]
autoplot(fs, x = 'FSC-H')

二维:
autoplot(fs, x = 'FSC-H', y = 'SSC-H', bins = 64)

不通过 x,绘制所有的:
autoplot(fs[[1]]) + labs_cyto("marker")

GatingSet:
autoplot(gs, "CD3", bins = 64)

标记多个细胞:
autoplot(gs, c("CD3", "CD19"), bins = 64)

GatingHierarchy:
多个细胞群及其相关的通道可以绘制在同一图的不同面板上:
gh <- gs[[1]]
nodes <- gs_get_pop_paths(gh, path = "auto")[c(3:6)]
nodes
[1] "lymph" "Live" "CD20" "CD19"
autoplot(gh, nodes, bins = 64)

5ggcyto 绘图
直方图:
library(ggcyto)
data(GvHD)
fs <- GvHD[subset(pData(GvHD), Patient %in%5:7 & Visit %in% c(5:6))[["name"]]]
fr <- fs[[1]]
p <- ggcyto(fs, aes(x = `FSC-H`))
p1 <- p + geom_histogram()
p1

设置分面:
p1 + facet_grid(Patient~Visit)

密度图:
p + geom_density()
p + geom_density(fill = "black")

# 颜色映射
ggcyto(fs, aes(x = `FSC-H`, fill = name)) + geom_density(alpha = 0.2)
# 绘制在一起
ggplot(fs, aes(x = `FSC-H`, fill = name)) + geom_density(alpha = 0.2)

山脊图:
#you can use ggridges package to display stacked density plot
require(ggridges)
#stack by fcs file ('name')
p + geom_density_ridges(aes(y = name)) + facet_null() #facet_null is used to remove the default facet_wrap (by 'name' column)
#or to stack by Visit and facet by patient
p + geom_density_ridges(aes(y = Visit)) + facet_grid(~Patient)

散点图:
# 2d hex
p <- ggcyto(fs, aes(x = `FSC-H`, y = `SSC-H`))
p <- p + geom_hex(bins = 128)
p

限制轴范围:
p <- p + ylim(c(10,9e2)) + xlim(c(10,9e2))
p

更改颜色:
p + scale_fill_gradientn(colours = rainbow(7), trans = "sqrt")
p + scale_fill_gradient(trans = "sqrt", low = "gray", high = "black")

添加 geom_gate 和 geom_stats 图层
# estimate a lymphGate (which is an ellipsoidGate) for each sample
lg <- flowStats::lymphGate(fs, channels=c("FSC-H", "SSC-H"),scale=0.6)
# apply the ellipsoidGates to their corresponding samples
fres <- filter(fs, lg)
p + geom_gate(lg)

矩形:
rect.g <- rectangleGate(list("FSC-H" = c(300,500), "SSC-H" = c(50,200)))
rect.gates <- sapply(sampleNames(fs), function(sn)rect.g)
p + geom_gate(rect.gates)

添加统计图层:
p + geom_gate(rect.gates) + geom_stats(size = 3)

添加水平或竖直密度中值线:
# 水平
den.gates.x <- fsApply(fs, openCyto::gate_mindensity,
channel = "FSC-H",
gate_range = c(100, 300), adjust = 1)
p + geom_gate(den.gates.x) + geom_stats()
# 竖直
den.gates.y <- fsApply(fs, openCyto::gate_mindensity,
channel = "SSC-H",
gate_range = c(100, 500), adjust = 1,
positive = FALSE)
p + geom_gate(den.gates.y) + geom_stats(value = lapply(rect.gates, function(g)0.1))

密度曲线里添加:
ggcyto(fs, aes(x = `FSC-H`)) +
geom_density(fill = "black", aes(y = ..scaled..)) +
geom_gate(den.gates.x) +
geom_stats(type = "count")

6结尾

这里我只简单的介绍了前面三部分的,内容太多大家就自己去好好学习一下吧!加油!

欢迎加入生信交流群。加我微信我也拉你进 微信群聊 老俊俊生信交流群
哦,数据代码已上传至QQ群,欢迎加入下载。
群二维码:

老俊俊微信:
知识星球:
所以今天你学习了吗?
欢迎小伙伴留言评论!
今天的分享就到这里了,敬请期待下一篇!
最后欢迎大家分享转发,您的点赞是对我的鼓励和肯定!
如果觉得对您帮助很大,赏杯快乐水喝喝吧!
往期回顾
◀python 学习之 python 里也能用 dplyr?
◀python 学习之 提取 Ensembl,Gencode 和 Ucsc 基因 TSS 位点
◀python 学习之 R and Python: 循环函数
◀python 学习之 fasta/fastq 处理利器––pyfastx
◀...
请关注“恒诺新知”微信公众号,感谢“R语言“,”数据那些事儿“,”老俊俊的生信笔记“,”冷🈚️思“,“珞珈R”,“生信星球”的支持!