ggplot2包|散点图
专题介绍:R是一种广泛用于数据分析和统计计算的强大语言,于上世纪90年代开始发展起来。得益于全世界众多 爱好者的无尽努力,大家继而开发出了一种基于R但优于R基本文本编辑器的R Studio(用户的界面体验更好)。也正是由于全世界越来越多的数据科学社区和用户对R包的慷慨贡献,让R语言在全球范围内越来越流行。其中一些R包,例如MASS,SparkR, ggplot2,使数据操作,可视化和计算功能越来越强大。R是用于统计分析、绘图的语言和操作环境。R是属于GNU系统的一个自由、免费、源代码开放的软件,它是一个用于统计计算和统计制图的优秀工具。R作为一种统计分析软件,是集统计分析与图形显示于一体的。它可以运行于UNIX、Windows和Macintosh的操作系统上,而且嵌入了一个非常方便实用的帮助系统,相比于其他统计分析软件,R的学术性开发比较早,适合生物学和医学等学术学科的科研人员使用。
【R语言】开通了R语言群,大家相互学习和交流,请扫描下方二维码,备注:R群,我会邀请你入群,一起进步和成长。
朋友们,实际工作中,你们用散点图吗?散点图一般用来做什么?散点图的变体有哪些?请大家留言。
本文介绍ggplot2包绘制散点图,一种基本的散点图,用于表示两个连续变量之间的关系。通过这种可视图,直观感受两变量之间会潜藏一种什么关系,为后续的分析和建模提供指导。利用ggplot2包绘制散点图,步骤如下:
首先,R包管理
if (!require("pacman")) {
install.packages("pacman")
require("pacman")
}
p_load(ggplot2, ggthemes, dplyr, scales, grid, showtext)
其次,数据获取和整理
采用R语言自带的airquality数据集。
data(airquality)
glimpse(airquality)
summary(airquality)
第三,逐步绘制散点图
1)基础的散点图
使用geom_point()函数
aq_trim <- airquality %>%
filter(Month %in% c(7,8,9)) %>%
mutate(Month = factor(Month,
labels = c("July", "August", "September")))
p1 <- ggplot(aq_trim, aes(x = Day, y = Ozone)) +
geom_point()
p1
2)修改点的形状
使用shape参数
p2 <- ggplot(aq_trim, aes(x = Day, y = Ozone)) +
geom_point(shape = 21)
p2
3)调整x轴的刻度
使用scale_x_continuous()函数
p3 <- p2 +
scale_x_continuous(breaks = seq(1, 31, 5))
p3
4)调整轴的标签和标题
使用labs()函数
p4 <- p3 +
labs(title = "Air Quality in New York by Day") +
labs(x = "Day of the month", y = "Ozone (ppb)")
p4
5)配置调色板
p5 <- ggplot(aq_trim, aes(x = Day, y = Ozone)) +
geom_point(shape = 21, colour = "mediumvioletred", fill = "springgreen") +
labs(title = "Air Quality in New York by Day",
subtitle = "Source: New York State Department of Conservation") +
labs(x = "Day of the month", y = "Ozone (ppb)") +
scale_x_continuous(breaks = seq(1, 31, 5))
p5
或者
p5 <- ggplot(aq_trim, aes(x = Day, y = Ozone, fill = Month)) +
geom_point(shape = 21) +
labs(title = "Air Quality in New York by Day",
subtitle = "Source: New York State Department of Conservation") +
labs(x = "Day of the month", y = "Ozone (ppb)") +
scale_x_continuous(breaks = seq(1, 31, 5))
p5
fill <- c("steelblue", "yellowgreen", "violetred1")
p5 <- p5 + scale_fill_manual(values = fill)
p5
6)图例管理
p6 <- p5 + theme(legend.position = "bottom",
legend.direction = "horizontal")
p6
7)字体设置
font_add("Tahoma","Tahoma.ttf")
showtext_auto()
8)配置主题风格
a) 使用经济学杂志主题
p8_1 <- ggplot(aq_trim, aes(x = Day, y = Ozone, fill = Month)) +
geom_point(shape = 21) +
labs(title = "Air Quality in New York by Day",
subtitle = "Source: New York State Department of Conservation") +
labs(x = "Day of the month", y = "Ozone (ppb)", fill = "Months ") +
scale_x_continuous(breaks = seq(1, 31, 5)) +
scale_size(range = c(1, 10)) +
theme_economist() + scale_fill_economist() +
theme(axis.line.x = element_line(size = .5, colour = "black"),
axis.title = element_text(size = 12),
legend.position = "bottom", legend.direction = "horizontal",
legend.text = element_text(size = 10),
plot.title = element_text(family = "Tahoma"),
text = element_text(family = "Tahoma"))
p8_1
b) 使用自定义主题
fill <- c("#56B4E9","#F0E442","violetred1")
p8_2 <- ggplot(aq_trim, aes(x = Day, y = Ozone, fill = Month)) +
geom_point(shape = 21) +
labs(title = "Air Quality in New York by Day",
subtitle = "Source: New York State Department of Conservation") +
labs(x = "Day of the month", y = "Ozone (ppb)", fill = "Months ") +
scale_x_continuous(breaks = seq(1, 31, 5)) +
scale_fill_manual(values = fill) +
scale_size(range = c(1, 10)) +
theme(axis.line.x = element_line(size = .5, colour = "black"),
axis.line.y = element_line(size = .5, colour = "black"),
axis.text.x = element_text(colour = "black", size = 10),
axis.text.y = element_text(colour = "black", size = 10),
legend.position = "bottom",
legend.direction = "horizontal",
legend.key = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.title = element_text(family = "Tahoma"),
text = element_text(family = "Tahoma"))
p8_2
我做数据工作,主要用到编程语言是R语言和Python语言,我会充分地考虑彼此的优势,在实际的数据问题和工作环境中,选择合适的编程语言,以高效地工作。Python语言的资料、学习和应用,可以关注数据科学与人工智能公众号。
若是你想找数据相关的工作,或者你们团队需要招聘数据人才,可以关注数据人才公众号。
附录:完整代码
#################
#散点图
#2021-03-21
#################
# 1 R包管理
if (!require("pacman")) {
install.packages("pacman")
require("pacman")
}
p_load(ggplot2, ggthemes, dplyr, scales, grid, showtext)
# 2 数据获取和理解
data(airquality)
glimpse(airquality)
summary(airquality)
# 3 逐步画散点图
# 1)基础散点图
# 选择需要绘制图形的数据集
aq_trim <- airquality %>%
filter(Month %in% c(7,8,9)) %>%
mutate(Month = factor(Month,
labels = c("July", "August", "September")))
p1 <- ggplot(aq_trim, aes(x = Day, y = Ozone)) +
geom_point()
p1
# 2)修改点的形状
p2 <- ggplot(aq_trim, aes(x = Day, y = Ozone)) +
geom_point(shape = 21)
p2
# 3)调整x周的刻度
p3 <- p2 +
scale_x_continuous(breaks = seq(1, 31, 5))
p3
# 4)调整轴标签和标题
p4 <- p3 +
labs(title = "Air Quality in New York by Day") +
labs(x = "Day of the month", y = "Ozone (ppb)")
p4
# 5)调整调色板
p5 <- ggplot(aq_trim, aes(x = Day, y = Ozone)) +
geom_point(shape = 21, colour = "mediumvioletred", fill = "springgreen") +
labs(title = "Air Quality in New York by Day",
subtitle = "Source: New York State Department of Conservation") +
labs(x = "Day of the month", y = "Ozone (ppb)") +
scale_x_continuous(breaks = seq(1, 31, 5))
p5
# 或者
p5 <- ggplot(aq_trim, aes(x = Day, y = Ozone)) +
geom_point(shape = 21, colour = "#000000", fill = "#40b8d0") +
labs(title = "Air Quality in New York by Day",
subtitle = "Source: New York State Department of Conservation") +
labs(x = "Day of the month", y = "Ozone (ppb)") +
scale_x_continuous(breaks = seq(1, 31, 5))
p5
# 或者
p5 <- ggplot(aq_trim, aes(x = Day, y = Ozone, fill = Month)) +
geom_point(shape = 21) +
labs(title = "Air Quality in New York by Day",
subtitle = "Source: New York State Department of Conservation") +
labs(x = "Day of the month", y = "Ozone (ppb)") +
scale_x_continuous(breaks = seq(1, 31, 5))
p5
fill <- c("steelblue", "yellowgreen", "violetred1")
p5 <- p5 + scale_fill_manual(values = fill)
p5
# 6)图例管理
p6 <- p5 + theme(legend.position = "bottom",
legend.direction = "horizontal")
p6
# 7) 字体设置
font_add("Tahoma","Tahoma.ttf")
showtext_auto()
# 8)配置主题风格
# 8.1)使用经济学杂志主题
p8_1 <- ggplot(aq_trim, aes(x = Day, y = Ozone, fill = Month)) +
geom_point(shape = 21) +
labs(title = "Air Quality in New York by Day",
subtitle = "Source: New York State Department of Conservation") +
labs(x = "Day of the month", y = "Ozone (ppb)", fill = "Months ") +
scale_x_continuous(breaks = seq(1, 31, 5)) +
scale_size(range = c(1, 10)) +
theme_economist() + scale_fill_economist() +
theme(axis.line.x = element_line(size = .5, colour = "black"),
axis.title = element_text(size = 12),
legend.position = "bottom", legend.direction = "horizontal",
legend.text = element_text(size = 10),
plot.title = element_text(family = "Tahoma"),
text = element_text(family = "Tahoma"))
p8_1
# 2) 自定义主题
fill <- c("#56B4E9","#F0E442","violetred1")
p8_2 <- ggplot(aq_trim, aes(x = Day, y = Ozone, fill = Month)) +
geom_point(shape = 21) +
labs(title = "Air Quality in New York by Day",
subtitle = "Source: New York State Department of Conservation") +
labs(x = "Day of the month", y = "Ozone (ppb)", fill = "Months ") +
scale_x_continuous(breaks = seq(1, 31, 5)) +
scale_fill_manual(values = fill) +
scale_size(range = c(1, 10)) +
theme(axis.line.x = element_line(size = .5, colour = "black"),
axis.line.y = element_line(size = .5, colour = "black"),
axis.text.x = element_text(colour = "black", size = 10),
axis.text.y = element_text(colour = "black", size = 10),
legend.position = "bottom",
legend.direction = "horizontal",
legend.key = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.title = element_text(family = "Tahoma"),
text = element_text(family = "Tahoma"))
p8_2
好书推荐
3 推断统计与数据科学,moderndive和tidyverse包
4 R for machine learning,从经典的机器学习算法入手
5 R for everyone,人人都可学R和用R,以发现数据里的价值
请关注“恒诺新知”微信公众号,感谢“R语言“,”数据那些事儿“,”老俊俊的生信笔记“,”冷🈚️思“,“珞珈R”,“生信星球”的支持!