使用置信区间在 ggplot 中绘制平均每月温度
Plotting mean monthly temperature in ggplot with confidence intervals
我需要绘制月平均温度并在 x 轴上缩写月份,我需要添加 95% 置信区间但不确定如何添加。 CI 的任何视觉效果都很好。
然后我需要绘制
我将 Date...Time
分成单独的列,但我无法让 X 轴显示 ggplot
中的 month.abb
缩写月份。
我得到了以下数据集(stackflow 的缩写):
# Data
CleanTempSal = data.frame(
stringsAsFactors = F,
Date...Time = c(
"1/31/2017 20:00",
"1/31/2017 21:00",
"1/31/2017 22:00",
"1/31/2017 23:00",
"2/1/2017 0:00",
"2/1/2017 1:00",
"2/1/2017 2:00",
"2/1/2017 3:00",
"3/21/2017 10:00",
"3/21/2017 11:00",
"3/21/2017 12:00",
"3/21/2017 13:00"),
Temp..C. = c(14.87, 14.77, 15.08, 15.08,
14.96, 14.87, 15.05, 15.05,
18.87, 19.32, 19.97, 20.44),
Salinity.psu. = c(14.58, 14.52, 14.44, 14.46,
14.56, 14.67, 14.78, 14.88,
18.78, 18.81, 19.41, 19.16),
Conduc.mS.cm. = c(19.33, 19.21, 19.26, 19.28,
19.34, 19.44, 19.66, 19.78,
26.67, 26.96, 28.14, 28.09)
)
Date...Time Temp..C. Salinity.psu. Conduc.mS.cm.
1/31/2017 20:00 14.87 14.58 19.33
1/31/2017 21:00 14.77 14.52 19.21
1/31/2017 22:00 15.08 14.44 19.26
1/31/2017 23:00 15.08 14.46 19.28
2/1/2017 0:00 14.96 14.56 19.34
2/1/2017 1:00 14.87 14.67 19.44
2/1/2017 2:00 15.05 14.78 19.66
2/1/2017 3:00 15.05 14.88 19.78
3/21/2017 10:00 18.87 18.78 26.67
3/21/2017 11:00 19.32 18.81 26.96
3/21/2017 12:00 19.97 19.41 28.14
3/21/2017 13:00 20.44 19.16 28.09
和代码。
library(tidyverse)
library(ggplot2)
library(lubridate)
# convert date column to date class
CleanTempSal$Date...Time <- as.POSIXct(CleanTempSal$Date...Time, format = "%m/%d/%y %H:%M")
#Add Month Column to data set
CleanTempSal <- CleanTempSal %>% mutate(month = month(Date...Time))
CleanTempSal <- CleanTempSal %>% mutate(month2 = month.abb[month])
CleanTempSal <- CleanTempSal %>% mutate(year = year(Date...Time))
CleanTempSal <- CleanTempSal %>% mutate(hour = hour(Date...Time))
#group by month and take the mean of that month
a <- CleanTempSal %>%
group_by(month) %>%
summarise(month_mean = mean(Temp..C.))
#plot mean monthly temp
ggplot(a, aes(month, month_mean)) +
geom_point(aes(color = month_mean)) +
geom_line(aes(color = month_mean)) +
scale_color_gradient("Temp", low = "blue", high = "red4") +
labs(x = "Month of 2017",
y = "Water Tempearture (C)",
title = "Monthy Mean Water Temperature",
subtitle = "NCBS Dock - Cedar Key, FL")
给我这个
所提供的数据不会产生相同的图,因为我为简单起见已将其缩短。它只会给前三个月,手段会有所不同,但实现相同的目标。
这是解决此问题的一种方法:
为了获得月份缩写,我可能会考虑将月份保留为 POSIXct
。通过使用 floor_date
您可以获得每个时间点的月份并以所需格式存储。绘图时,您可以使用 scale_x_datetime
并指定要在 x 轴上使用的标签。在这种情况下,%b
将提供月份缩写。
要计算 95% 的置信区间,可以考虑不同的方法。一种方法是手动计算 95% CI。请注意,此处进行了假设(基于 Student t 分布)。在这种情况下,我使用 geom_ribbon
和一些透明度 (alpha .2) 来显示点之间的间隔。除此之外,您可以使用 stat_summary
,这将计算平均值和 95% CI 并显示在 ggplot
.
中
#group by month and take the mean of that month
a <- CleanTempSal %>%
group_by(month = floor_date(Date...Time, unit = "month")) %>%
summarise(month_mean = mean(Temp..C.),
sd = sd(Temp..C.),
n = n()) %>%
mutate(se = sd / sqrt(n),
lower.ci = month_mean - qt(1 - (.05/2), n - 1) * se,
upper.ci = month_mean + qt(1 - (.05/2), n - 1) * se)
#plot mean monthly temp
ggplot(a, aes(x = month, y = month_mean)) +
geom_point(aes(color = month_mean)) +
geom_line(aes(color = month_mean)) +
geom_ribbon(aes(ymin = lower.ci, ymax = upper.ci), alpha = 0.2) +
scale_color_gradient("Temp", low = "blue", high = "red4") +
scale_x_datetime(date_breaks = "1 month", date_labels = "%b") +
labs(x = "Month of 2017",
y = "Water Tempearture (C)",
title = "Monthy Mean Water Temperature",
subtitle = "NCBS Dock - Cedar Key, FL")
情节
编辑 (4/16/20):
如果你有多年的数据,在计算SD和SE时你应该同时按月和年分组:
group_by(month = floor_date(Date...Time, unit = "month"), year)
此外,我修改了 ggplot
以显示错误栏而不是色带。为获得误差线的宽度做了一些小改动,包括使用 as.Date(month)
和 scale_x_date
.
#group by month and take the mean of that month
a <- CleanTempSal %>%
group_by(month = floor_date(Date...Time, unit = "month"), year) %>%
summarise(month_mean = mean(Temp..C.),
sd = sd(Temp..C.),
n = n()) %>%
mutate(se = sd / sqrt(n),
lower.ci = month_mean - qt(1 - (.05/2), n - 1) * se,
upper.ci = month_mean + qt(1 - (.05/2), n - 1) * se)
#plot mean monthly temp
ggplot(a, aes(x = as.Date(month), y = month_mean)) +
geom_point(aes(color = month_mean)) +
geom_line(aes(color = month_mean)) +
#geom_ribbon(aes(ymin = lower.ci, ymax = upper.ci), alpha = 0.2) +
geom_errorbar(aes(ymin = month_mean - se, ymax = month_mean + se), width = 1) +
scale_color_gradient("Temp", low = "blue", high = "red4") +
scale_x_date(date_breaks = "1 month", date_labels = "%b %y") +
labs(x = "Month",
y = "Water Tempearture (C)",
title = "Monthy Mean Water Temperature",
subtitle = "NCBS Dock - Cedar Key, FL")
情节
数据
CleanTempSal <- structure(list(Date...Time = structure(c(1485914400, 1485918000,
1485921600, 1485925200, 1485928800, 1485932400, 1485936000, 1485939600,
1490108400, 1490112000, 1490115600, 1490119200), class = c("POSIXct",
"POSIXt"), tzone = ""), Temp..C. = c(14.87, 14.77, 15.08, 15.08,
14.96, 14.87, 15.05, 15.05, 18.87, 19.32, 19.97, 20.44), Salinity.psu. = c(14.58,
14.52, 14.44, 14.46, 14.56, 14.67, 14.78, 14.88, 18.78, 18.81,
19.41, 19.16), Conduc.mS.cm. = c(19.33, 19.21, 19.26, 19.28,
19.34, 19.44, 19.66, 19.78, 26.67, 26.96, 28.14, 28.09), month = c(1,
1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3), month2 = c("Jan", "Jan", "Jan",
"Jan", "Feb", "Feb", "Feb", "Feb", "Mar", "Mar", "Mar", "Mar"
), year = c(2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017,
2017, 2017, 2017), hour = c(20L, 21L, 22L, 23L, 0L, 1L, 2L, 3L,
10L, 11L, 12L, 13L)), class = "data.frame", row.names = c(NA,
-12L))
我需要绘制月平均温度并在 x 轴上缩写月份,我需要添加 95% 置信区间但不确定如何添加。 CI 的任何视觉效果都很好。
然后我需要绘制
我将 Date...Time
分成单独的列,但我无法让 X 轴显示 ggplot
中的 month.abb
缩写月份。
我得到了以下数据集(stackflow 的缩写):
# Data
CleanTempSal = data.frame(
stringsAsFactors = F,
Date...Time = c(
"1/31/2017 20:00",
"1/31/2017 21:00",
"1/31/2017 22:00",
"1/31/2017 23:00",
"2/1/2017 0:00",
"2/1/2017 1:00",
"2/1/2017 2:00",
"2/1/2017 3:00",
"3/21/2017 10:00",
"3/21/2017 11:00",
"3/21/2017 12:00",
"3/21/2017 13:00"),
Temp..C. = c(14.87, 14.77, 15.08, 15.08,
14.96, 14.87, 15.05, 15.05,
18.87, 19.32, 19.97, 20.44),
Salinity.psu. = c(14.58, 14.52, 14.44, 14.46,
14.56, 14.67, 14.78, 14.88,
18.78, 18.81, 19.41, 19.16),
Conduc.mS.cm. = c(19.33, 19.21, 19.26, 19.28,
19.34, 19.44, 19.66, 19.78,
26.67, 26.96, 28.14, 28.09)
)
Date...Time Temp..C. Salinity.psu. Conduc.mS.cm.
1/31/2017 20:00 14.87 14.58 19.33
1/31/2017 21:00 14.77 14.52 19.21
1/31/2017 22:00 15.08 14.44 19.26
1/31/2017 23:00 15.08 14.46 19.28
2/1/2017 0:00 14.96 14.56 19.34
2/1/2017 1:00 14.87 14.67 19.44
2/1/2017 2:00 15.05 14.78 19.66
2/1/2017 3:00 15.05 14.88 19.78
3/21/2017 10:00 18.87 18.78 26.67
3/21/2017 11:00 19.32 18.81 26.96
3/21/2017 12:00 19.97 19.41 28.14
3/21/2017 13:00 20.44 19.16 28.09
和代码。
library(tidyverse)
library(ggplot2)
library(lubridate)
# convert date column to date class
CleanTempSal$Date...Time <- as.POSIXct(CleanTempSal$Date...Time, format = "%m/%d/%y %H:%M")
#Add Month Column to data set
CleanTempSal <- CleanTempSal %>% mutate(month = month(Date...Time))
CleanTempSal <- CleanTempSal %>% mutate(month2 = month.abb[month])
CleanTempSal <- CleanTempSal %>% mutate(year = year(Date...Time))
CleanTempSal <- CleanTempSal %>% mutate(hour = hour(Date...Time))
#group by month and take the mean of that month
a <- CleanTempSal %>%
group_by(month) %>%
summarise(month_mean = mean(Temp..C.))
#plot mean monthly temp
ggplot(a, aes(month, month_mean)) +
geom_point(aes(color = month_mean)) +
geom_line(aes(color = month_mean)) +
scale_color_gradient("Temp", low = "blue", high = "red4") +
labs(x = "Month of 2017",
y = "Water Tempearture (C)",
title = "Monthy Mean Water Temperature",
subtitle = "NCBS Dock - Cedar Key, FL")
给我这个
所提供的数据不会产生相同的图,因为我为简单起见已将其缩短。它只会给前三个月,手段会有所不同,但实现相同的目标。
这是解决此问题的一种方法:
为了获得月份缩写,我可能会考虑将月份保留为 POSIXct
。通过使用 floor_date
您可以获得每个时间点的月份并以所需格式存储。绘图时,您可以使用 scale_x_datetime
并指定要在 x 轴上使用的标签。在这种情况下,%b
将提供月份缩写。
要计算 95% 的置信区间,可以考虑不同的方法。一种方法是手动计算 95% CI。请注意,此处进行了假设(基于 Student t 分布)。在这种情况下,我使用 geom_ribbon
和一些透明度 (alpha .2) 来显示点之间的间隔。除此之外,您可以使用 stat_summary
,这将计算平均值和 95% CI 并显示在 ggplot
.
#group by month and take the mean of that month
a <- CleanTempSal %>%
group_by(month = floor_date(Date...Time, unit = "month")) %>%
summarise(month_mean = mean(Temp..C.),
sd = sd(Temp..C.),
n = n()) %>%
mutate(se = sd / sqrt(n),
lower.ci = month_mean - qt(1 - (.05/2), n - 1) * se,
upper.ci = month_mean + qt(1 - (.05/2), n - 1) * se)
#plot mean monthly temp
ggplot(a, aes(x = month, y = month_mean)) +
geom_point(aes(color = month_mean)) +
geom_line(aes(color = month_mean)) +
geom_ribbon(aes(ymin = lower.ci, ymax = upper.ci), alpha = 0.2) +
scale_color_gradient("Temp", low = "blue", high = "red4") +
scale_x_datetime(date_breaks = "1 month", date_labels = "%b") +
labs(x = "Month of 2017",
y = "Water Tempearture (C)",
title = "Monthy Mean Water Temperature",
subtitle = "NCBS Dock - Cedar Key, FL")
情节
编辑 (4/16/20):
如果你有多年的数据,在计算SD和SE时你应该同时按月和年分组:
group_by(month = floor_date(Date...Time, unit = "month"), year)
此外,我修改了 ggplot
以显示错误栏而不是色带。为获得误差线的宽度做了一些小改动,包括使用 as.Date(month)
和 scale_x_date
.
#group by month and take the mean of that month
a <- CleanTempSal %>%
group_by(month = floor_date(Date...Time, unit = "month"), year) %>%
summarise(month_mean = mean(Temp..C.),
sd = sd(Temp..C.),
n = n()) %>%
mutate(se = sd / sqrt(n),
lower.ci = month_mean - qt(1 - (.05/2), n - 1) * se,
upper.ci = month_mean + qt(1 - (.05/2), n - 1) * se)
#plot mean monthly temp
ggplot(a, aes(x = as.Date(month), y = month_mean)) +
geom_point(aes(color = month_mean)) +
geom_line(aes(color = month_mean)) +
#geom_ribbon(aes(ymin = lower.ci, ymax = upper.ci), alpha = 0.2) +
geom_errorbar(aes(ymin = month_mean - se, ymax = month_mean + se), width = 1) +
scale_color_gradient("Temp", low = "blue", high = "red4") +
scale_x_date(date_breaks = "1 month", date_labels = "%b %y") +
labs(x = "Month",
y = "Water Tempearture (C)",
title = "Monthy Mean Water Temperature",
subtitle = "NCBS Dock - Cedar Key, FL")
情节
数据
CleanTempSal <- structure(list(Date...Time = structure(c(1485914400, 1485918000,
1485921600, 1485925200, 1485928800, 1485932400, 1485936000, 1485939600,
1490108400, 1490112000, 1490115600, 1490119200), class = c("POSIXct",
"POSIXt"), tzone = ""), Temp..C. = c(14.87, 14.77, 15.08, 15.08,
14.96, 14.87, 15.05, 15.05, 18.87, 19.32, 19.97, 20.44), Salinity.psu. = c(14.58,
14.52, 14.44, 14.46, 14.56, 14.67, 14.78, 14.88, 18.78, 18.81,
19.41, 19.16), Conduc.mS.cm. = c(19.33, 19.21, 19.26, 19.28,
19.34, 19.44, 19.66, 19.78, 26.67, 26.96, 28.14, 28.09), month = c(1,
1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3), month2 = c("Jan", "Jan", "Jan",
"Jan", "Feb", "Feb", "Feb", "Feb", "Mar", "Mar", "Mar", "Mar"
), year = c(2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017,
2017, 2017, 2017), hour = c(20L, 21L, 22L, 23L, 0L, 1L, 2L, 3L,
10L, 11L, 12L, 13L)), class = "data.frame", row.names = c(NA,
-12L))