R:堆叠面积图不堆叠
R: Stacked area chart does not stack
我有数据要绘制成堆积面积图。在 x 轴上我有连续的数据,在 y 轴上我有连续的数据,我准备累积这些数据。这是我使用一些虚拟数据的代码:
library(data.table)
library(ggplot2)
set.seed(1)
dt <- data.table(var=sample(1:6,1000,replace=TRUE),xdata=runif(1000),ydata=runif(1000))
setorder(dt, var, xdata)
dt$cumydata <- dt[,
cumsum(ydata),
by = .(var)]$V1/sum(dt$ydata)
ggplot(dt, aes(x = xdata, y = cumydata, fill = as.factor(var))) +
geom_area(position = "stack")
这是输出图:
我的问题是数据没有正确堆叠。我猜这可能是因为数据的连续性?
对于堆积面积图,x 值和出现次数必须相同。因此,将您的示例数据更改为此将为您提供预期的输出:
set.seed(1)
dt <- data.table(ydata=runif(1002))
dt$var <- rep(1:6, each=167)
dt$xdata <- rep(runif(167), 6)
setorder(dt, var, xdata)
dt$cumydata <- dt[,
cumsum(ydata),
by = .(var)]$V1/sum(dt$ydata)
ggplot(dt,aes(x = xdata, y = cumydata, fill = as.factor(var))) +
geom_area(position = "stack")
根据Jimbou的信息,这就是我最终解决的方法。这只是一些预处理。我还把整个事情变成了对数。
library(data.table)
library(ggplot2)
set.seed(1)
dtt <- data.table(var=sample(1:6,1000,replace=TRUE),xdata=runif(1000),ydata=runif(1000))
setorder(dtt, var, xdata)
log.min.xdata <- log(min(dtt$xdata))
log.max.xdata <- log(max(dtt$xdata))
nbreaks <- 101
temp <- hist(log(dtt$xdata[dtt$var==1]),
breaks = seq(log.min.xdata, log.max.xdata, length=nbreaks),
plot = FALSE)
dt <- data.table(var = unlist(lapply(sort(unique(dtt$var)),
function(x){rep(x,nbreaks-1)})),
bin = rep(1:(nbreaks-1),length(unique(dtt$var))),
mid = rep(temp$mids))
dt$count <- dt[,
hist(log(dtt$xdata[dtt$var==var]),
breaks = seq(log.min.xdata, log.max.xdata, length=nbreaks),
plot = FALSE)$counts,
by = .(var)]$V1
dt$cumcount <- dt[,
cumsum(count),
by = .(var)]$V1
pp <- ggplot(dt, aes(x = exp(mid), y = cumcount, fill = as.factor(var))) +
geom_area(position = "stack") +
scale_x_log10() +
theme_bw() +
theme(legend.position = c(0.1, 0.70),
legend.background = element_rect(fill="lightgrey",
size=0.5, linetype="solid")) +
labs(title = "y",
fill = " var",
x = "xdata",
y = "cumcount") +
theme(title = element_text(face = "bold"),
axis.title = element_text(face = "bold"),
legend.title = element_text(face = "bold"),
legend.text = element_text(face = "bold"))
我有数据要绘制成堆积面积图。在 x 轴上我有连续的数据,在 y 轴上我有连续的数据,我准备累积这些数据。这是我使用一些虚拟数据的代码:
library(data.table)
library(ggplot2)
set.seed(1)
dt <- data.table(var=sample(1:6,1000,replace=TRUE),xdata=runif(1000),ydata=runif(1000))
setorder(dt, var, xdata)
dt$cumydata <- dt[,
cumsum(ydata),
by = .(var)]$V1/sum(dt$ydata)
ggplot(dt, aes(x = xdata, y = cumydata, fill = as.factor(var))) +
geom_area(position = "stack")
这是输出图:
我的问题是数据没有正确堆叠。我猜这可能是因为数据的连续性?
对于堆积面积图,x 值和出现次数必须相同。因此,将您的示例数据更改为此将为您提供预期的输出:
set.seed(1)
dt <- data.table(ydata=runif(1002))
dt$var <- rep(1:6, each=167)
dt$xdata <- rep(runif(167), 6)
setorder(dt, var, xdata)
dt$cumydata <- dt[,
cumsum(ydata),
by = .(var)]$V1/sum(dt$ydata)
ggplot(dt,aes(x = xdata, y = cumydata, fill = as.factor(var))) +
geom_area(position = "stack")
根据Jimbou的信息,这就是我最终解决的方法。这只是一些预处理。我还把整个事情变成了对数。
library(data.table)
library(ggplot2)
set.seed(1)
dtt <- data.table(var=sample(1:6,1000,replace=TRUE),xdata=runif(1000),ydata=runif(1000))
setorder(dtt, var, xdata)
log.min.xdata <- log(min(dtt$xdata))
log.max.xdata <- log(max(dtt$xdata))
nbreaks <- 101
temp <- hist(log(dtt$xdata[dtt$var==1]),
breaks = seq(log.min.xdata, log.max.xdata, length=nbreaks),
plot = FALSE)
dt <- data.table(var = unlist(lapply(sort(unique(dtt$var)),
function(x){rep(x,nbreaks-1)})),
bin = rep(1:(nbreaks-1),length(unique(dtt$var))),
mid = rep(temp$mids))
dt$count <- dt[,
hist(log(dtt$xdata[dtt$var==var]),
breaks = seq(log.min.xdata, log.max.xdata, length=nbreaks),
plot = FALSE)$counts,
by = .(var)]$V1
dt$cumcount <- dt[,
cumsum(count),
by = .(var)]$V1
pp <- ggplot(dt, aes(x = exp(mid), y = cumcount, fill = as.factor(var))) +
geom_area(position = "stack") +
scale_x_log10() +
theme_bw() +
theme(legend.position = c(0.1, 0.70),
legend.background = element_rect(fill="lightgrey",
size=0.5, linetype="solid")) +
labs(title = "y",
fill = " var",
x = "xdata",
y = "cumcount") +
theme(title = element_text(face = "bold"),
axis.title = element_text(face = "bold"),
legend.title = element_text(face = "bold"),
legend.text = element_text(face = "bold"))