ggforestplot 绘制美丽的森林图
点击上方关注 “公众号”

1引言
介绍一个基于 ggplot 基础绘图的 R 包 ggforestplot ,绘制森林图,希望大家能派上用场。疑者勿问,嘿嘿,问也不懂。
2安装
# install.packages("devtools")
devtools::install_github("NightingaleHealth/ggforestplot")
3加载 R 包
# Load and attach the package
library(ggforestplot)
# Load and attach other useful packages
# install.packages("tidyverse")
library(tidyverse)
4绘制基础森林图
测试数据:
# Filter only associations to BMI for the first 30 biomarkers of the example
# dataset
df <-
ggforestplot::df_linear_associations %>%
filter(
trait == "BMI",
dplyr::row_number() <= 30
)
head(df,3)
# A tibble: 3 x 5
name trait beta se pvalue
<chr> <chr> <dbl> <dbl> <dbl>
1 Isoleucine BMI 0.339 0.00945 1.11e-281
2 Leucine BMI 0.343 0.00951 1.25e-285
3 Valine BMI 0.287 0.00951 7.94e-200
绘图:
# Draw a forestplot of cross-sectional, linear associations
ggforestplot::forestplot(
df = df,
name = name,
estimate = beta,
se = se
)

总的测试数据,包含 BMI,HOMA-IR,fasting glucose 三个指标:
# Linear associations of blood biomarkers to Body Mass Index (BMI), insulin
# resistance (log(HOMA-IR)) and fasting glucose
ggforestplot::df_linear_associations %>%
print()
# A tibble: 681 x 5
name trait beta se pvalue
<chr> <chr> <dbl> <dbl> <dbl>
1 Isoleucine BMI 0.339 0.00945 1.11e-281
2 Leucine BMI 0.343 0.00951 1.25e-285
3 Valine BMI 0.287 0.00951 7.94e-200
4 Phenylalanine BMI 0.343 0.00862 0
5 Tyrosine BMI 0.261 0.00900 6.65e-185
6 Alanine BMI 0.179 0.00890 8.62e- 90
7 Glutamine BMI -0.134 0.00945 7.68e- 46
8 Glycine BMI -0.0296 0.00937 1.56e- 3
9 Histidine BMI 0.0364 0.00917 7.25e- 5
10 Lactate BMI 0.131 0.00911 9.20e- 47
# ... with 671 more rows
列说明:

添加显著性:
psignif 参数控制:
# Draw a forestplot of cross-sectional, linear associations
# Add a more descriptive x-label, a title and the threshold for statistical
# significance with Bonferroni correction.
# (Note that the df variable names 'name', 'beta' and 'se' do not really have to be
# explictly defined, as these are the default input values of these parameters.)
ggforestplot::forestplot(
df = df,
estimate = beta,
pvalue = pvalue,
psignif = 0.002,
xlab = "1-SD increment in BMInper 1-SD increment in biomarker concentration",
title = "Associations of blood biomarkers to BMI"
)

分组标记:
# Extract the biomarker names
selected_bmrs <- df %>% pull(name)
# Filter the demo dataset for the biomarkers above and all three traits:
# BMI, HOMA-IR and fasting glucose
df_compare_traits <-
ggforestplot::df_linear_associations %>%
filter(name %in% selected_bmrs) %>%
# Set class to factor to set order of display.
mutate(
trait = factor(
trait,
levels = c("BMI", "HOMA-IR", "Fasting glucose")
)
)
# Draw a forestplot of cross-sectional, linear associations
# Notice how the df variable 'trait' is used here to color the points
ggforestplot::forestplot(
df = df_compare_traits,
estimate = beta,
pvalue = pvalue,
psignif = 0.002,
xlab = "1-SD increment in cardiometabolic traitnper 1-SD increment in biomarker concentration",
title = "Biomarker associations to metabolic traits",
colour = trait
)

对 biomarker 分组分面绘图:
# Install and attach the ggforce library
# install.packages("ggforce")
library(ggforce)
# Filter df_NG_biomarker_metadata, that contain the groups, for only the 30
# biomarkers under discussion
df_grouping <-
df_NG_biomarker_metadata %>%
filter(name %in% df_compare_traits$name)
# Join the association data frame df_compare_traits with group data
df_compare_traits_groups <-
df_compare_traits %>%
# use right_join, with df_grouping on the right, to preserve the order of
# biomarkers it specifies.
dplyr::right_join(., df_grouping, by = "name") %>%
dplyr::mutate(
group = factor(.data$group, levels = unique(.data$group))
)
# Draw a forestplot of cross-sectional, linear associations.
forestplot(
df = df_compare_traits_groups,
estimate = beta,
pvalue = pvalue,
psignif = 0.002,
xlab = "1-SD increment in cardiometabolic traitnper 1-SD increment in biomarker concentration",
colour = trait
) +
ggforce::facet_col(
facets = ~group,
scales = "free_y",
space = "free"
)

5绘制风险比率
测试数据:
# Odds Ratios of Blood Biomarkers with Incident Type 2 Diabetes
ggforestplot::df_logodds_associations %>%
print()
# A tibble: 1,135 x 6
name study beta se pvalue n
<chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 Isoleucine Meta-analysis 0.285 0.0500 0.0000000126 11777
2 Leucine Meta-analysis 0.286 0.0506 0.0000000165 11783
3 Valine Meta-analysis 0.182 0.0558 0.00110 11777
4 Phenylalanine Meta-analysis 0.271 0.0538 0.000000453 11782
5 Tyrosine Meta-analysis 0.168 0.0541 0.00189 11779
6 Alanine Meta-analysis 0.122 0.0540 0.0237 11784
7 Glutamine Meta-analysis -0.172 0.0575 0.00284 11744
8 Glycine Meta-analysis -0.0773 0.0617 0.211 11496
9 Histidine Meta-analysis 0.0473 0.0553 0.393 11777
10 Lactate Meta-analysis 0.119 0.0529 0.0245 11781
# ... with 1,125 more rows
绘图:
# Filter df_NG_biomarker_metadata for only amino acids
df_grouping <-
df_NG_biomarker_metadata %>%
filter(group %in% "Amino acids")
# Join the association data frame with group data
df <-
df_logodds_associations %>%
# Set the study variable to a factor to preserve order of appearance
mutate(
study = factor(
study,
levels = c("Meta-analysis", "NFBC-1997", "DILGOM", "FINRISK-1997", "YFS")
)
) %>%
# use right_join, with df_grouping on the right, to preserve the order of
# biomarkers it specifies.
dplyr::right_join(., df_grouping, by = "name") %>%
tidyr::drop_na(.data$beta)
# Draw a forestplot of odds ratios
ggforestplot::forestplot(
df = df,
name = name,
estimate = beta,
se = se,
pvalue = pvalue,
psignif = 0.002,
colour = study,
xlab = "Odds ratio for incident type 2 diabetes (95% CI)nper 1−SD increment in metabolite concentration",
title = "Amino acids",
logodds = TRUE
)

scale_shape_manual()改变形状:
# Draw a forestplot of odds ratios
ggforestplot::forestplot(
df = df,
name = name,
estimate = beta,
se = se,
pvalue = pvalue,
psignif = 0.002,
colour = study,
shape = study,
xlab = "Odds ratio for incident type 2 diabetes (95% CI)nper 1−SD increment in metabolite concentration",
title = "Amino acids",
logodds = TRUE
) +
ggplot2::scale_shape_manual(
values = c(23L, 21L, 21L, 21L, 21L),
labels = c("Meta-analysis", "NFBC-1997", "DILGOM", "FINRISK-1997", "YFS")
)

plot_all_NG_biomarkers()函数支持输出所有 biomarkers 的图在一张 pdf 里:
# Join the built-in association demo dataset with a variable that contains
# the machine readable names of Nightingale biomarkers. (Note: if you
# have built your association data frame using the Nightingale CSV result file,
# then your data frame should already contain machine readable names.)
df <-
df_linear_associations %>%
left_join(
select(
df_NG_biomarker_metadata,
name,
machine_readable_name
),
by = "name"
)
# Print effect sizes for all Nightingale biomarkers in a custom 2-page pdf
p <- plot_all_NG_biomarkers(
df = df,
machine_readable_name = machine_readable_name,
estimate = beta,
se = se,
pvalue = pvalue,
colour = trait,
layout = "2016",
xlab = "1-SD increment in cardiometabolic trait
per 1-SD increment in biomarker concentration"
)
# Results are missing for glucose, remove NA from legend
p <- purrr::map(
.x = p,
.f = ~ .x + ggforestplot::scale_colour_ng_d(na.translate = FALSE)
)
grDevices::pdf("biomarker_linear_associations.pdf", 15, sqrt(2) * 15)
p
grDevices::dev.off()

6结尾
如果有什么问题就不要问我了,这玩意我也不懂,单纯分享好的东西而已,希望大家用得上,哈哈哈!

欢迎加入生信交流群。加我微信我也拉你进 微信群聊 老俊俊生信交流群
哦,数据代码已上传至QQ群,欢迎加入下载。
群二维码:

老俊俊微信:
知识星球:
所以今天你学习了吗?
欢迎小伙伴留言评论!
今天的分享就到这里了,敬请期待下一篇!
最后欢迎大家分享转发,您的点赞是对我的鼓励和肯定!
如果觉得对您帮助很大,赏杯快乐水喝喝吧!
往期回顾
◀python 学习之 python 里也能用 dplyr?
◀python 学习之 提取 Ensembl,Gencode 和 Ucsc 基因 TSS 位点
◀python 学习之 R and Python: 循环函数
◀python 学习之 fasta/fastq 处理利器––pyfastx
◀python 学习之处理 fasta 和 fastq 文件
◀...
请关注“恒诺新知”微信公众号,感谢“R语言“,”数据那些事儿“,”老俊俊的生信笔记“,”冷🈚️思“,“珞珈R”,“生信星球”的支持!