如何融化包含预先计算的绘图置信区间的数据框?

How do I melt a dataframe that includes pre-computed confidence intervals for plotting?

我有 a dataframe,其中包括 +/- 置信区间。我自己从只有 +/- CIs 的辅助开放数据集组装了数据集,所以我无能为力。我知道用 ggplot2 绘制多个系列的最快方法是 reshape2 数据框,我可以很容易地做到这一点

melt(df, id.vars = c("Year"))

除了将 CI 列转换为正确的系列。现在,我最终想制作这样的情节。

我用

制作的
ggplot(df, aes(x = Year)) +
  geom_line(aes(y = Total.inflow), color="red") +
  geom_ribbon(aes(ymin = Total.inflow-Total.inflow.CI, ymax = Total.inflow+Total.inflow.CI), colour="red", fill="red", alpha=0.1) +
  geom_line(aes(y = EU.inflow), color="blue") +
  geom_ribbon(aes(ymin = EU.inflow-EU.inflow.CI, ymax = EU.inflow+EU.inflow.CI), colour="blue", fill="blue", alpha=0.1) +
  geom_line(aes(y = ROW.inflow), color="green") +
  geom_ribbon(aes(ymin = ROW.inflow-ROW.inflow.CI, ymax = ROW.inflow+ROW.inflow.CI), colour="green", fill="green", alpha=0.1)

想法?


可行的解决方案

感谢@lukeA 为我指明了正确的方法。出于某种原因,他的解决方案产生了一个空数据框,但我设法弄清楚他要做什么,并自己找到了一个合理的解决方案。

首先,让我们将 GDP 列与流量数据集分开。我从一开始就怀疑这是必要的,但我相信我可以在绘图时将其过滤掉。事实证明,将两者分开更容易。此外,我将在以后对其值进行标准化,因为数百亿...

df <- read.csv('stats.csv', header=T)
gdp <- data.frame(Year = df$Year, GDP = df$GDP/10000000000)
df <- within(df, rm(GDP))

目标是将 CI 值与其对应的系列并列。这是@lukeA 回答中 inner_join 中的代码。将其拆开后,解决方案的路径对我来说变得更加清晰。

var_value <- df %>% 
    select(-ends_with("CI")) %>% 
    gather(var, value, -Year)

var_conf <- df %>% 
    select(Year, ends_with("CI")) %>% 
    setNames(sub("(.*)\sCI$", "\1", names(.))) %>% 
    gather(var, conf, -Year)

final.df = data.frame(var_value, conf = var_conf$conf)

最后,@lukeA 的 ggplot 代码确实生成了他显示的图表。

ggplot(final.df, aes(
    x = Year, 
    y = value, 
    ymin = value - conf,
    ymax = value + conf, 
    color = var, 
    fill = var
    )) + 
geom_ribbon(alpha = .2) + 
geom_line() 

数据

df <- structure(list(Year = 1991:2014, Total.inflow = c(329L, 268L, 
266L, 315L, 312L, 318L, 327L, 391L, 454L, 479L, 481L, 516L, 511L, 
589L, 567L, 596L, 574L, 590L, 567L, 591L, 566L, 498L, 526L, 632L
), Total.inflow.CI = c(23L, 20L, 19L, 23L, 22L, 25L, 27L, 27L, 
31L, 31L, 30L, 32L, 33L, 40L, 37L, 39L, 40L, 39L, 30L, 31L, 28L, 
27L, 29L, 36L), Total.outflow = c(-285L, -281L, -266L, -238L, 
-236L, -264L, -279L, -251L, -291L, -321L, -309L, -363L, -363L, 
-344L, -361L, -398L, -341L, -427L, -368L, -339L, -351L, -321L, 
-317L, -319L), Total.outflow.CI = c(23L, 21L, 20L, 20L, 19L, 
28L, 24L, 22L, 24L, 27L, 25L, 29L, 32L, 28L, 31L, 34L, 27L, 41L, 
22L, 20L, 22L, 20L, 19L, 22L), UK.inflow = c(93L, 81L, 75L, 91L, 
67L, 75L, 79L, 90L, 92L, 83L, 89L, 74L, 85L, 73L, 82L, 66L, 60L, 
71L, 82L, 84L, 69L, 73L, 70L, 68L), UK.inflow.CI = c(15L, 15L, 
12L, 16L, 13L, 15L, 14L, 15L, 16L, 16L, 16L, 14L, 16L, 12L, 16L, 
14L, 12L, 14L, 13L, 14L, 11L, 11L, 12L, 11L), UK.outflow = c(-142L, 
-146L, -141L, -112L, -130L, -141L, -140L, -121L, -133L, -151L, 
-150L, -172L, -184L, -189L, -175L, -200L, -158L, -159L, -130L, 
-125L, -133L, -131L, -125L, -128L), UK.outflow.CI = c(17L, 16L, 
16L, 14L, 15L, 22L, 19L, 18L, 16L, 18L, 18L, 22L, 22L, 23L, 22L, 
26L, 19L, 22L, 11L, 11L, 12L, 14L, 11L, 13L), EU.inflow = c(60L, 
49L, 48L, 53L, 60L, 74L, 70L, 75L, 64L, 55L, 54L, 57L, 58L, 128L, 
149L, 173L, 189L, 186L, 162L, 171L, 168L, 148L, 193L, 256L), 
    EU.inflow.CI = c(12L, 10L, 8L, 10L, 11L, 14L, 18L, 14L, 16L, 
    13L, 15L, 16L, 17L, 22L, 23L, 26L, 28L, 27L, 19L, 21L, 18L, 
    17L, 20L, 25L), EU.outflow = c(-51L, -39L, -40L, -46L, -38L, 
    -50L, -51L, -52L, -57L, -55L, -50L, -54L, -47L, -45L, -56L, 
    -63L, -66L, -126L, -104L, -92L, -92L, -75L, -78L, -86L), 
    EU.outflow.CI = c(10L, 6L, 7L, 8L, 7L, 13L, 10L, 9L, 13L, 
    12L, 13L, 13L, 16L, 10L, 14L, 15L, 15L, 31L, 16L, 13L, 14L, 
    12L, 12L, 15L), ROW.inflow = c(175L, 138L, 143L, 171L, 185L, 
    169L, 178L, 226L, 298L, 340L, 338L, 385L, 368L, 388L, 336L, 
    358L, 325L, 333L, 323L, 336L, 329L, 277L, 264L, 308L), ROW.inflow.CI = c(13L, 
    10L, 11L, 13L, 15L, 14L, 14L, 17L, 21L, 23L, 20L, 24L, 22L, 
    31L, 25L, 25L, 25L, 25L, 19L, 18L, 19L, 18L, 18L, 24L), ROW.outflow = c(-91L, 
    -96L, -85L, -80L, -69L, -73L, -88L, -78L, -101L, -114L, -109L, 
    -136L, -133L, -109L, -129L, -135L, -117L, -142L, -134L, -122L, 
    -126L, -115L, -114L, -105L), ROW.outflow.CI = c(12L, 12L, 
    10L, 11L, 8L, 10L, 11L, 9L, 14L, 15L, 13L, 15L, 16L, 13L, 
    17L, 16L, 12L, 16L, 9L, 9L, 11L, 9L, 9L, 11L), GDP = c(1142797178130.51, 
    1179659529659.53, 1061388722255.55, 1140489745944.29, 1237561937825.47, 
    1306575663026.52, 1446444007858.55, 1537103345478.64, 1565408509949.85, 
    1554801028899.98, 1535942133294.95, 1680256294964.03, 1943025306122.45, 
    2297889051629.44, 2418941818181.82, 2588077276908.92, 2969733893557.42, 
    2793376838235.29, 2314577036921.64, 2403504326328.8, 2594904662714.31, 
    2630472981169.65, 2712296271989.99, 2990201431078.23)), .Names = c("Year", 
"Total.inflow", "Total.inflow.CI", "Total.outflow", "Total.outflow.CI", 
"UK.inflow", "UK.inflow.CI", "UK.outflow", "UK.outflow.CI", "EU.inflow", 
"EU.inflow.CI", "EU.outflow", "EU.outflow.CI", "ROW.inflow", 
"ROW.inflow.CI", "ROW.outflow", "ROW.outflow.CI", "GDP"), row.names = c(NA, 
-24L), class = "data.frame")

例如

download.file(
  "http://www.sharecsv.com/dl/88f76c7be8ade3a626f474f4857e16f8/stats.csv", 
  tf <- tempfile(), 
  method = "libcurl"
)
library(tidyverse)
df <- read_csv(tf)
inner_join(
  df %>% 
    select(-ends_with("CI")) %>% 
    gather(var, value, -Year),
  df %>% 
    select(Year, ends_with("CI")) %>% 
    setNames(sub("(.*)\sCI$", "\1", names(.))) %>% 
    gather(var, conf, -Year),
  by = c("Year", "var")
) %>% 
  ggplot(aes(
    x = Year, 
    y = value, 
    ymin = value - conf,
    ymax = value + conf, 
    color = var, 
    fill = var
  )) + 
  geom_ribbon(alpha = .2) + 
  geom_line() 

给你

(我使用的是最新开发版的ggplot2)