用 high/low 和日期填充两行之间的区域

Fill area between two lines, with high/low and dates

前言:我对我自己的问题提供了一个相当令人满意的答案。我知道这是可以接受的做法。当然,我的希望是邀请建议和改进。

我的目的是绘制两个时间序列(存储在日期存储为 class 'Date' 的数据框中)并根据是否一个不同的颜色用两种不同的颜色填充数据点之间的区域高于另一个。例如,绘制一个债券指数和一个股票指数,当股票指数高于债券指数时,用红色填充区域,否则用蓝色填充区域。

出于此目的,我使用了 ggplot2,因为我相当熟悉这个包(作者:Hadley Wickham),但请随时提出其他方法。我根据 ggplot2 包的 geom_ribbon() 函数写了一个自定义函数。早些时候,我遇到了一些问题,因为我缺乏处理 geom_ribbon() 函数和 class 'Date' 的对象的经验。下面的函数代表了我解决这些问题的努力,几乎可以肯定它是迂回的、不必要的复杂的、笨拙的等等。所以我的问题是:请提出改进​​建议and/or替代方法 .最终,如果能在这里提供一个通用功能就太好了。

数据:

set.seed(123456789)
df <- data.frame(
    Date  = seq.Date(as.Date("1950-01-01"), by = "1 month", length.out = 12*10),
    Stocks = 100 + c(0, cumsum(runif(12*10-1, -30, 30))),
    Bonds = 100 + c(0, cumsum(runif(12*10-1, -5, 5))))
library('reshape2')
df <- melt(df, id.vars = 'Date')

自定义函数:

## Function to plot geom_ribbon for class Date
geom_ribbon_date <- function(data, group, N = 1000) {
    # convert column of class Date to numeric
    x_Date <- as.numeric(data[, which(sapply(data, class) == "Date")])
    # append numeric date to dataframe
    data$Date.numeric <- x_Date
    # ensure fill grid is as fine as data grid
    N <- max(N, length(x_Date))
    # generate a grid for fill
    seq_x_Date <- seq(min(x_Date), max(x_Date), length.out = N)
    # ensure the grouping variable is a factor
    group <- factor(group)
    # create a dataframe of min and max
    area <- Map(function(z) {
        d <- data[group == z,];
        approxfun(d$Date.numeric, d$value)(seq_x_Date);
    }, levels(group))
    # create a categorical variable for the max
    maxcat <- apply(do.call('cbind', area), 1, which.max)
    # output a dataframe with x, ymin, ymax, is. max 'dummy', and group
    df <- data.frame(x = seq_x_Date, 
        ymin = do.call('pmin', area), 
        ymax = do.call('pmax', area), 
        is.max = levels(group)[maxcat],
        group = cumsum(c(1, diff(maxcat) != 0))
    )
    # convert back numeric dates to column of class Date
    df$x <- as.Date(df$x, origin = "1970-01-01")
    # create and return the geom_ribbon
    gr <- geom_ribbon(data = df, aes(x, ymin = ymin, ymax = ymax, fill = is.max, group = group), inherit.aes = FALSE)
    return(gr)
}

用法:

ggplot(data = df, aes(x = Date, y = value, group = variable, colour = variable)) + 
    geom_ribbon_date(data = df, group = df$variable) +
    theme_bw() +
    xlab(NULL) +
    ylab(NULL) +
    ggtitle("Bonds Versus Stocks (Fake Data!)") +
    scale_fill_manual('is.max', breaks = c('Stocks', 'Bonds'), 
                        values = c('darkblue','darkred')) +
    theme(legend.position = 'right', legend.direction = 'vertical') +
    theme(legend.title = element_blank()) +
    theme(legend.key = element_blank())

结果:

虽然在 Whosebug 上有相关的问题和答案,但我还没有找到足够详细的来满足我的目的。以下是一些有用的交流:

  1. create-geom-ribbon-for-min-max-range:问了一个类似的问题,但提供的细节比我想要的要少。
  2. possible-bug-in-geom-ribbon:密切相关,但缺少关于如何计算 max/min 的中间步骤。
  3. fill-region-between-two-loess-smoothed-lines-in-r-with-ggplot:关系密切,但侧重于黄土线。太好了。
  4. ggplot-colouring-areas-between-density-lines-according-to-relative-position :密切相关,但侧重于密度。这个post给我很大的启发。

实际上我前一段时间也有同样的问题,这里是 。它定义了一个查找两条线之间交点的函数和另一个函数,该函数在输入中输入一个数据框,然后使用 matplotpolygon

为两列之间的 space 着色

编辑

这是代码,稍作修改以允许绘制最后一个多边形

set.seed(123456789)
dat <- data.frame(
Date  = seq.Date(as.Date("1950-01-01"), by = "1 month", length.out = 12*10),
Stocks = 100 + c(0, cumsum(runif(12*10-1, -30, 30))),
Bonds = 100 + c(0, cumsum(runif(12*10-1, -5, 5))))

intersects <- function(x1, x2) {
    seg1 <- which(!!diff(x1 > x2))     # location of first point in crossing segments
    above <- x2[seg1] > x1[seg1]       # which curve is above prior to crossing
    slope1 <- x1[seg1+1] - x1[seg1]
    slope2 <- x2[seg1+1] - x2[seg1]
    x <- seg1 + ((x2[seg1] - x1[seg1]) / (slope1 - slope2))
    y <- x1[seg1] + slope1*(x - seg1)
    data.frame(x=x, y=y, pindex=seg1, pabove=(1:2)[above+1L]) 
 # pabove is greater curve prior to crossing
}

fillColor <- function(data, addLines=TRUE) {
## Find points of intersections
ints <- intersects(data[,2], data[,3]) # because the first column is for Dates
intervals <- findInterval(1:nrow(data), c(0, ints$x))

## Make plot
matplot(data, type="n", col=2:3, lty=1, lwd=4,xaxt='n',xlab='Date')
axis(1,at=seq(1,dim(data)[1],length.out=12),
labels=data[,1][seq(1,dim(data)[1],length.out=12)])
legend("topright", c(colnames(data)[2], colnames(data)[3]), col=3:2, lty=1, lwd=2)

## Draw the polygons
for (i in seq_along(table(intervals))) {
    xstart <- ifelse(i == 1, 0, ints$x[i-1])
    ystart <- ifelse(i == 1, data[1,2], ints$y[i-1])
    xend <- ints$x[i]
    yend <- ints$y[i]
    x <- seq(nrow(data))[intervals == i]
    polygon(c(xstart, x, xend, rev(x)), c(ystart, data[x,2], yend, rev(data[x,3])),
col=ints$pabove[i]%%2+2)
}

# add end of plot

xstart <- ints[dim(ints)[1],1]
ystart <- ints[dim(ints)[1],2]
xend <- nrow(data)
yend <- data[dim(data)[1],2]
x <- seq(nrow(data))[intervals == max(intervals)]
polygon(c(xstart, x, xend, rev(x)), c(ystart, data[x,2], yend, rev(data[x,3])),
col=ints[dim(ints)[1]-1,4]%%2+2)

## Add lines for curves
if (addLines)
    invisible(lapply(1:2, function(x) lines(seq(nrow(data)), data[,x], col=x%%2+2, lwd=2)))
}

## Plot the data
fillColor(dat,FALSE)

最后的结果是这样的(与题目使用的数据相同)

也许我不理解你的全部问题,但似乎一个相当直接的方法是将第三行定义为每个时间点的两个时间序列中的最小值。 geom_ribbon 然后被调用两次(针对 Asset 的每个唯一值调用一次)以绘制由每个系列和最小线形成的色带。代码可能如下所示:

set.seed(123456789)
df <- data.frame(
  Date  = seq.Date(as.Date("1950-01-01"), by = "1 month", length.out = 12*10),
  Stocks = 100 + c(0, cumsum(runif(12*10-1, -30, 30))),
  Bonds = 100 + c(0, cumsum(runif(12*10-1, -5, 5))))

library(reshape2)
library(ggplot2)
df <- cbind(df,min_line=pmin(df[,2],df[,3]) ) 
df <- melt(df, id.vars=c("Date","min_line"), variable.name="Assets", value.name="Prices")

sp <- ggplot(data=df, aes(x=Date, fill=Assets))
sp <- sp + geom_ribbon(aes(ymax=Prices, ymin=min_line))
sp <- sp + scale_fill_manual(values=c(Stocks="darkred", Bonds="darkblue"))
sp <- sp + ggtitle("Bonds Versus Stocks (Fake Data!)")
plot(sp)

这会生成以下图表:

@walts 的答案应该仍然是赢家,但在实施他的解决方案时,我给了它一个整洁的更新。

library(tidyverse)
set.seed(2345)

# fake data​
raw_data <-
  tibble(
    date = as.Date("2020-01-01") + (1:40),
    a = 95 + cumsum(runif(40, min = -20, max = 20)),
    b = 55  + cumsum(runif(40, min = -1, max = 1))
  )
​
# the steps
# the 'y' + 'min_line'  + 'group' is the right granularity (by date) to 
# create 2 separate ribbons
df <-
  raw_data %>% 
  # find min of the two columns
  mutate(min_line = pmin(a, b)) %>% 
  pivot_longer(c(a, b), names_to = "group", values_to = "y") %>% 
  print()
​
# the result
ggplot(data = df, aes(x = date, fill = group)) +
  geom_ribbon(aes(ymax = y, ymin = min_line)) +
  theme_classic()