如何融化包含预先计算的绘图置信区间的数据框?
How do I melt a dataframe that includes pre-computed confidence intervals for plotting?
我有 a dataframe,其中包括 +/- 置信区间。我自己从只有 +/- CIs 的辅助开放数据集组装了数据集,所以我无能为力。我知道用 ggplot2
绘制多个系列的最快方法是 reshape2
数据框,我可以很容易地做到这一点
melt(df, id.vars = c("Year"))
除了将 CI 列转换为正确的系列。现在,我最终想制作这样的情节。
我用
制作的
ggplot(df, aes(x = Year)) +
geom_line(aes(y = Total.inflow), color="red") +
geom_ribbon(aes(ymin = Total.inflow-Total.inflow.CI, ymax = Total.inflow+Total.inflow.CI), colour="red", fill="red", alpha=0.1) +
geom_line(aes(y = EU.inflow), color="blue") +
geom_ribbon(aes(ymin = EU.inflow-EU.inflow.CI, ymax = EU.inflow+EU.inflow.CI), colour="blue", fill="blue", alpha=0.1) +
geom_line(aes(y = ROW.inflow), color="green") +
geom_ribbon(aes(ymin = ROW.inflow-ROW.inflow.CI, ymax = ROW.inflow+ROW.inflow.CI), colour="green", fill="green", alpha=0.1)
想法?
可行的解决方案
感谢@lukeA 为我指明了正确的方法。出于某种原因,他的解决方案产生了一个空数据框,但我设法弄清楚他要做什么,并自己找到了一个合理的解决方案。
首先,让我们将 GDP 列与流量数据集分开。我从一开始就怀疑这是必要的,但我相信我可以在绘图时将其过滤掉。事实证明,将两者分开更容易。此外,我将在以后对其值进行标准化,因为数百亿...
df <- read.csv('stats.csv', header=T)
gdp <- data.frame(Year = df$Year, GDP = df$GDP/10000000000)
df <- within(df, rm(GDP))
目标是将 CI 值与其对应的系列并列。这是@lukeA 回答中 inner_join
中的代码。将其拆开后,解决方案的路径对我来说变得更加清晰。
var_value <- df %>%
select(-ends_with("CI")) %>%
gather(var, value, -Year)
var_conf <- df %>%
select(Year, ends_with("CI")) %>%
setNames(sub("(.*)\sCI$", "\1", names(.))) %>%
gather(var, conf, -Year)
final.df = data.frame(var_value, conf = var_conf$conf)
最后,@lukeA 的 ggplot 代码确实生成了他显示的图表。
ggplot(final.df, aes(
x = Year,
y = value,
ymin = value - conf,
ymax = value + conf,
color = var,
fill = var
)) +
geom_ribbon(alpha = .2) +
geom_line()
数据
df <- structure(list(Year = 1991:2014, Total.inflow = c(329L, 268L,
266L, 315L, 312L, 318L, 327L, 391L, 454L, 479L, 481L, 516L, 511L,
589L, 567L, 596L, 574L, 590L, 567L, 591L, 566L, 498L, 526L, 632L
), Total.inflow.CI = c(23L, 20L, 19L, 23L, 22L, 25L, 27L, 27L,
31L, 31L, 30L, 32L, 33L, 40L, 37L, 39L, 40L, 39L, 30L, 31L, 28L,
27L, 29L, 36L), Total.outflow = c(-285L, -281L, -266L, -238L,
-236L, -264L, -279L, -251L, -291L, -321L, -309L, -363L, -363L,
-344L, -361L, -398L, -341L, -427L, -368L, -339L, -351L, -321L,
-317L, -319L), Total.outflow.CI = c(23L, 21L, 20L, 20L, 19L,
28L, 24L, 22L, 24L, 27L, 25L, 29L, 32L, 28L, 31L, 34L, 27L, 41L,
22L, 20L, 22L, 20L, 19L, 22L), UK.inflow = c(93L, 81L, 75L, 91L,
67L, 75L, 79L, 90L, 92L, 83L, 89L, 74L, 85L, 73L, 82L, 66L, 60L,
71L, 82L, 84L, 69L, 73L, 70L, 68L), UK.inflow.CI = c(15L, 15L,
12L, 16L, 13L, 15L, 14L, 15L, 16L, 16L, 16L, 14L, 16L, 12L, 16L,
14L, 12L, 14L, 13L, 14L, 11L, 11L, 12L, 11L), UK.outflow = c(-142L,
-146L, -141L, -112L, -130L, -141L, -140L, -121L, -133L, -151L,
-150L, -172L, -184L, -189L, -175L, -200L, -158L, -159L, -130L,
-125L, -133L, -131L, -125L, -128L), UK.outflow.CI = c(17L, 16L,
16L, 14L, 15L, 22L, 19L, 18L, 16L, 18L, 18L, 22L, 22L, 23L, 22L,
26L, 19L, 22L, 11L, 11L, 12L, 14L, 11L, 13L), EU.inflow = c(60L,
49L, 48L, 53L, 60L, 74L, 70L, 75L, 64L, 55L, 54L, 57L, 58L, 128L,
149L, 173L, 189L, 186L, 162L, 171L, 168L, 148L, 193L, 256L),
EU.inflow.CI = c(12L, 10L, 8L, 10L, 11L, 14L, 18L, 14L, 16L,
13L, 15L, 16L, 17L, 22L, 23L, 26L, 28L, 27L, 19L, 21L, 18L,
17L, 20L, 25L), EU.outflow = c(-51L, -39L, -40L, -46L, -38L,
-50L, -51L, -52L, -57L, -55L, -50L, -54L, -47L, -45L, -56L,
-63L, -66L, -126L, -104L, -92L, -92L, -75L, -78L, -86L),
EU.outflow.CI = c(10L, 6L, 7L, 8L, 7L, 13L, 10L, 9L, 13L,
12L, 13L, 13L, 16L, 10L, 14L, 15L, 15L, 31L, 16L, 13L, 14L,
12L, 12L, 15L), ROW.inflow = c(175L, 138L, 143L, 171L, 185L,
169L, 178L, 226L, 298L, 340L, 338L, 385L, 368L, 388L, 336L,
358L, 325L, 333L, 323L, 336L, 329L, 277L, 264L, 308L), ROW.inflow.CI = c(13L,
10L, 11L, 13L, 15L, 14L, 14L, 17L, 21L, 23L, 20L, 24L, 22L,
31L, 25L, 25L, 25L, 25L, 19L, 18L, 19L, 18L, 18L, 24L), ROW.outflow = c(-91L,
-96L, -85L, -80L, -69L, -73L, -88L, -78L, -101L, -114L, -109L,
-136L, -133L, -109L, -129L, -135L, -117L, -142L, -134L, -122L,
-126L, -115L, -114L, -105L), ROW.outflow.CI = c(12L, 12L,
10L, 11L, 8L, 10L, 11L, 9L, 14L, 15L, 13L, 15L, 16L, 13L,
17L, 16L, 12L, 16L, 9L, 9L, 11L, 9L, 9L, 11L), GDP = c(1142797178130.51,
1179659529659.53, 1061388722255.55, 1140489745944.29, 1237561937825.47,
1306575663026.52, 1446444007858.55, 1537103345478.64, 1565408509949.85,
1554801028899.98, 1535942133294.95, 1680256294964.03, 1943025306122.45,
2297889051629.44, 2418941818181.82, 2588077276908.92, 2969733893557.42,
2793376838235.29, 2314577036921.64, 2403504326328.8, 2594904662714.31,
2630472981169.65, 2712296271989.99, 2990201431078.23)), .Names = c("Year",
"Total.inflow", "Total.inflow.CI", "Total.outflow", "Total.outflow.CI",
"UK.inflow", "UK.inflow.CI", "UK.outflow", "UK.outflow.CI", "EU.inflow",
"EU.inflow.CI", "EU.outflow", "EU.outflow.CI", "ROW.inflow",
"ROW.inflow.CI", "ROW.outflow", "ROW.outflow.CI", "GDP"), row.names = c(NA,
-24L), class = "data.frame")
例如
download.file(
"http://www.sharecsv.com/dl/88f76c7be8ade3a626f474f4857e16f8/stats.csv",
tf <- tempfile(),
method = "libcurl"
)
library(tidyverse)
df <- read_csv(tf)
inner_join(
df %>%
select(-ends_with("CI")) %>%
gather(var, value, -Year),
df %>%
select(Year, ends_with("CI")) %>%
setNames(sub("(.*)\sCI$", "\1", names(.))) %>%
gather(var, conf, -Year),
by = c("Year", "var")
) %>%
ggplot(aes(
x = Year,
y = value,
ymin = value - conf,
ymax = value + conf,
color = var,
fill = var
)) +
geom_ribbon(alpha = .2) +
geom_line()
给你
(我使用的是最新开发版的ggplot2)
我有 a dataframe,其中包括 +/- 置信区间。我自己从只有 +/- CIs 的辅助开放数据集组装了数据集,所以我无能为力。我知道用 ggplot2
绘制多个系列的最快方法是 reshape2
数据框,我可以很容易地做到这一点
melt(df, id.vars = c("Year"))
除了将 CI 列转换为正确的系列。现在,我最终想制作这样的情节。
我用
制作的ggplot(df, aes(x = Year)) +
geom_line(aes(y = Total.inflow), color="red") +
geom_ribbon(aes(ymin = Total.inflow-Total.inflow.CI, ymax = Total.inflow+Total.inflow.CI), colour="red", fill="red", alpha=0.1) +
geom_line(aes(y = EU.inflow), color="blue") +
geom_ribbon(aes(ymin = EU.inflow-EU.inflow.CI, ymax = EU.inflow+EU.inflow.CI), colour="blue", fill="blue", alpha=0.1) +
geom_line(aes(y = ROW.inflow), color="green") +
geom_ribbon(aes(ymin = ROW.inflow-ROW.inflow.CI, ymax = ROW.inflow+ROW.inflow.CI), colour="green", fill="green", alpha=0.1)
想法?
可行的解决方案
感谢@lukeA 为我指明了正确的方法。出于某种原因,他的解决方案产生了一个空数据框,但我设法弄清楚他要做什么,并自己找到了一个合理的解决方案。
首先,让我们将 GDP 列与流量数据集分开。我从一开始就怀疑这是必要的,但我相信我可以在绘图时将其过滤掉。事实证明,将两者分开更容易。此外,我将在以后对其值进行标准化,因为数百亿...
df <- read.csv('stats.csv', header=T)
gdp <- data.frame(Year = df$Year, GDP = df$GDP/10000000000)
df <- within(df, rm(GDP))
目标是将 CI 值与其对应的系列并列。这是@lukeA 回答中 inner_join
中的代码。将其拆开后,解决方案的路径对我来说变得更加清晰。
var_value <- df %>%
select(-ends_with("CI")) %>%
gather(var, value, -Year)
var_conf <- df %>%
select(Year, ends_with("CI")) %>%
setNames(sub("(.*)\sCI$", "\1", names(.))) %>%
gather(var, conf, -Year)
final.df = data.frame(var_value, conf = var_conf$conf)
最后,@lukeA 的 ggplot 代码确实生成了他显示的图表。
ggplot(final.df, aes(
x = Year,
y = value,
ymin = value - conf,
ymax = value + conf,
color = var,
fill = var
)) +
geom_ribbon(alpha = .2) +
geom_line()
数据
df <- structure(list(Year = 1991:2014, Total.inflow = c(329L, 268L,
266L, 315L, 312L, 318L, 327L, 391L, 454L, 479L, 481L, 516L, 511L,
589L, 567L, 596L, 574L, 590L, 567L, 591L, 566L, 498L, 526L, 632L
), Total.inflow.CI = c(23L, 20L, 19L, 23L, 22L, 25L, 27L, 27L,
31L, 31L, 30L, 32L, 33L, 40L, 37L, 39L, 40L, 39L, 30L, 31L, 28L,
27L, 29L, 36L), Total.outflow = c(-285L, -281L, -266L, -238L,
-236L, -264L, -279L, -251L, -291L, -321L, -309L, -363L, -363L,
-344L, -361L, -398L, -341L, -427L, -368L, -339L, -351L, -321L,
-317L, -319L), Total.outflow.CI = c(23L, 21L, 20L, 20L, 19L,
28L, 24L, 22L, 24L, 27L, 25L, 29L, 32L, 28L, 31L, 34L, 27L, 41L,
22L, 20L, 22L, 20L, 19L, 22L), UK.inflow = c(93L, 81L, 75L, 91L,
67L, 75L, 79L, 90L, 92L, 83L, 89L, 74L, 85L, 73L, 82L, 66L, 60L,
71L, 82L, 84L, 69L, 73L, 70L, 68L), UK.inflow.CI = c(15L, 15L,
12L, 16L, 13L, 15L, 14L, 15L, 16L, 16L, 16L, 14L, 16L, 12L, 16L,
14L, 12L, 14L, 13L, 14L, 11L, 11L, 12L, 11L), UK.outflow = c(-142L,
-146L, -141L, -112L, -130L, -141L, -140L, -121L, -133L, -151L,
-150L, -172L, -184L, -189L, -175L, -200L, -158L, -159L, -130L,
-125L, -133L, -131L, -125L, -128L), UK.outflow.CI = c(17L, 16L,
16L, 14L, 15L, 22L, 19L, 18L, 16L, 18L, 18L, 22L, 22L, 23L, 22L,
26L, 19L, 22L, 11L, 11L, 12L, 14L, 11L, 13L), EU.inflow = c(60L,
49L, 48L, 53L, 60L, 74L, 70L, 75L, 64L, 55L, 54L, 57L, 58L, 128L,
149L, 173L, 189L, 186L, 162L, 171L, 168L, 148L, 193L, 256L),
EU.inflow.CI = c(12L, 10L, 8L, 10L, 11L, 14L, 18L, 14L, 16L,
13L, 15L, 16L, 17L, 22L, 23L, 26L, 28L, 27L, 19L, 21L, 18L,
17L, 20L, 25L), EU.outflow = c(-51L, -39L, -40L, -46L, -38L,
-50L, -51L, -52L, -57L, -55L, -50L, -54L, -47L, -45L, -56L,
-63L, -66L, -126L, -104L, -92L, -92L, -75L, -78L, -86L),
EU.outflow.CI = c(10L, 6L, 7L, 8L, 7L, 13L, 10L, 9L, 13L,
12L, 13L, 13L, 16L, 10L, 14L, 15L, 15L, 31L, 16L, 13L, 14L,
12L, 12L, 15L), ROW.inflow = c(175L, 138L, 143L, 171L, 185L,
169L, 178L, 226L, 298L, 340L, 338L, 385L, 368L, 388L, 336L,
358L, 325L, 333L, 323L, 336L, 329L, 277L, 264L, 308L), ROW.inflow.CI = c(13L,
10L, 11L, 13L, 15L, 14L, 14L, 17L, 21L, 23L, 20L, 24L, 22L,
31L, 25L, 25L, 25L, 25L, 19L, 18L, 19L, 18L, 18L, 24L), ROW.outflow = c(-91L,
-96L, -85L, -80L, -69L, -73L, -88L, -78L, -101L, -114L, -109L,
-136L, -133L, -109L, -129L, -135L, -117L, -142L, -134L, -122L,
-126L, -115L, -114L, -105L), ROW.outflow.CI = c(12L, 12L,
10L, 11L, 8L, 10L, 11L, 9L, 14L, 15L, 13L, 15L, 16L, 13L,
17L, 16L, 12L, 16L, 9L, 9L, 11L, 9L, 9L, 11L), GDP = c(1142797178130.51,
1179659529659.53, 1061388722255.55, 1140489745944.29, 1237561937825.47,
1306575663026.52, 1446444007858.55, 1537103345478.64, 1565408509949.85,
1554801028899.98, 1535942133294.95, 1680256294964.03, 1943025306122.45,
2297889051629.44, 2418941818181.82, 2588077276908.92, 2969733893557.42,
2793376838235.29, 2314577036921.64, 2403504326328.8, 2594904662714.31,
2630472981169.65, 2712296271989.99, 2990201431078.23)), .Names = c("Year",
"Total.inflow", "Total.inflow.CI", "Total.outflow", "Total.outflow.CI",
"UK.inflow", "UK.inflow.CI", "UK.outflow", "UK.outflow.CI", "EU.inflow",
"EU.inflow.CI", "EU.outflow", "EU.outflow.CI", "ROW.inflow",
"ROW.inflow.CI", "ROW.outflow", "ROW.outflow.CI", "GDP"), row.names = c(NA,
-24L), class = "data.frame")
例如
download.file(
"http://www.sharecsv.com/dl/88f76c7be8ade3a626f474f4857e16f8/stats.csv",
tf <- tempfile(),
method = "libcurl"
)
library(tidyverse)
df <- read_csv(tf)
inner_join(
df %>%
select(-ends_with("CI")) %>%
gather(var, value, -Year),
df %>%
select(Year, ends_with("CI")) %>%
setNames(sub("(.*)\sCI$", "\1", names(.))) %>%
gather(var, conf, -Year),
by = c("Year", "var")
) %>%
ggplot(aes(
x = Year,
y = value,
ymin = value - conf,
ymax = value + conf,
color = var,
fill = var
)) +
geom_ribbon(alpha = .2) +
geom_line()
给你
(我使用的是最新开发版的ggplot2)