坡度图 - ggplot2
Slope Chart - ggplot2
我正在测试 ggplot2
的一些模板,我对斜率图表很感兴趣,可以从这里获得:
完美运行。
我只是想做一个类似的但是只定义了一个组,就像下面的代码:
library(dplyr)
library(ggplot2)
theme_set(theme_classic())
source_df <- read.csv("https://raw.githubusercontent.com/jkeirstead/r-slopegraph/master/cancer_survival_rates.csv")
source_df <- filter(source_df, group == "Thyroid")
然后我从示例中复制剩余的代码:
# Define functions. Source: https://github.com/jkeirstead/r-slopegraph
tufte_sort <- function(df, x="year", y="value", group="group", method="tufte", min.space=0.05) {
## First rename the columns for consistency
ids <- match(c(x, y, group), names(df))
df <- df[,ids]
names(df) <- c("x", "y", "group")
## Expand grid to ensure every combination has a defined value
tmp <- expand.grid(x=unique(df$x), group=unique(df$group))
tmp <- merge(df, tmp, all.y=TRUE)
df <- mutate(tmp, y=ifelse(is.na(y), 0, y))
## Cast into a matrix shape and arrange by first column
require(reshape2)
tmp <- dcast(df, group ~ x, value.var="y")
ord <- order(tmp[,2])
tmp <- tmp[ord,]
min.space <- min.space*diff(range(tmp[,-1]))
yshift <- numeric(nrow(tmp))
## Start at "bottom" row
## Repeat for rest of the rows until you hit the top
for (i in 2:nrow(tmp)) {
## Shift subsequent row up by equal space so gap between
## two entries is >= minimum
mat <- as.matrix(tmp[(i-1):i, -1])
d.min <- min(diff(mat))
yshift[i] <- ifelse(d.min < min.space, min.space - d.min, 0)
}
tmp <- cbind(tmp, yshift=cumsum(yshift))
scale <- 1
tmp <- melt(tmp, id=c("group", "yshift"), variable.name="x", value.name="y")
## Store these gaps in a separate variable so that they can be scaled ypos = a*yshift + y
tmp <- transform(tmp, ypos=y + scale*yshift)
return(tmp)
}
plot_slopegraph <- function(df) {
ylabs <- subset(df, x==head(x,1))$group
yvals <- subset(df, x==head(x,1))$ypos
fontSize <- 3
gg <- ggplot(df,aes(x=x,y=ypos)) +
geom_line(aes(group=group),colour="grey80") +
geom_point(colour="white",size=8) +
geom_text(aes(label=y), size=fontSize, family="American Typewriter") +
scale_y_continuous(name="", breaks=yvals, labels=ylabs)
return(gg)
}
## Prepare data
df <- tufte_sort(source_df,
x="year",
y="value",
group="group",
method="tufte",
min.space=0.05)
df <- transform(df,
x=factor(x, levels=c(5,10,15,20),
labels=c("5 years","10 years","15 years","20 years")),
y=round(y))
## Plot
plot_slopegraph(df) + labs(title="Estimates of % survival rates") +
theme(axis.title=element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(hjust=0.5,
family = "American Typewriter",
face="bold"),
axis.text = element_text(family = "American Typewriter",
face="bold"))
我发现的问题是,如果我在 source_df$group
中只取一个对象,连接线就会消失 () 在这种情况下 Thyroid
,如下所示:
如果我只在同一列中添加另一个项目,一切都很好,并且会出现连接线。
有没有办法在这种情况下也有线条?我已经尝试了很多方法,但没有成功删除包含 NA
值的行,因此我不知道如何解决这个问题,如果...可以解决。
预先感谢您的每一个最终回复!
我可以建议一个更简单的方法,使用 {ggh4x} 包,它有一个基本的 R type = "b" 像 geom.您可以删除点,然后绘制文本。
您将在三行代码中得到结果:)
library(tidyverse)
library(ggh4x)
source_df <- read.csv("https://raw.githubusercontent.com/jkeirstead/r-slopegraph/master/cancer_survival_rates.csv")
source_df <- filter(source_df, group == "Thyroid")
ggplot(source_df, aes(year, value)) +
## set shape to NA
geom_pointpath(aes(group = group, mult = 1), shape = NA) +
geom_text(aes(label = value))
由 reprex package (v2.0.1)
于 2021-12-30 创建
我修复了简单地添加行:
df <- df[complete.cases(df), ]
在绘图说明之前。问题是生成了许多具有 NA
值的行,并且此行删除了具有空值的行。
我正在测试 ggplot2
的一些模板,我对斜率图表很感兴趣,可以从这里获得:
完美运行。
我只是想做一个类似的但是只定义了一个组,就像下面的代码:
library(dplyr)
library(ggplot2)
theme_set(theme_classic())
source_df <- read.csv("https://raw.githubusercontent.com/jkeirstead/r-slopegraph/master/cancer_survival_rates.csv")
source_df <- filter(source_df, group == "Thyroid")
然后我从示例中复制剩余的代码:
# Define functions. Source: https://github.com/jkeirstead/r-slopegraph
tufte_sort <- function(df, x="year", y="value", group="group", method="tufte", min.space=0.05) {
## First rename the columns for consistency
ids <- match(c(x, y, group), names(df))
df <- df[,ids]
names(df) <- c("x", "y", "group")
## Expand grid to ensure every combination has a defined value
tmp <- expand.grid(x=unique(df$x), group=unique(df$group))
tmp <- merge(df, tmp, all.y=TRUE)
df <- mutate(tmp, y=ifelse(is.na(y), 0, y))
## Cast into a matrix shape and arrange by first column
require(reshape2)
tmp <- dcast(df, group ~ x, value.var="y")
ord <- order(tmp[,2])
tmp <- tmp[ord,]
min.space <- min.space*diff(range(tmp[,-1]))
yshift <- numeric(nrow(tmp))
## Start at "bottom" row
## Repeat for rest of the rows until you hit the top
for (i in 2:nrow(tmp)) {
## Shift subsequent row up by equal space so gap between
## two entries is >= minimum
mat <- as.matrix(tmp[(i-1):i, -1])
d.min <- min(diff(mat))
yshift[i] <- ifelse(d.min < min.space, min.space - d.min, 0)
}
tmp <- cbind(tmp, yshift=cumsum(yshift))
scale <- 1
tmp <- melt(tmp, id=c("group", "yshift"), variable.name="x", value.name="y")
## Store these gaps in a separate variable so that they can be scaled ypos = a*yshift + y
tmp <- transform(tmp, ypos=y + scale*yshift)
return(tmp)
}
plot_slopegraph <- function(df) {
ylabs <- subset(df, x==head(x,1))$group
yvals <- subset(df, x==head(x,1))$ypos
fontSize <- 3
gg <- ggplot(df,aes(x=x,y=ypos)) +
geom_line(aes(group=group),colour="grey80") +
geom_point(colour="white",size=8) +
geom_text(aes(label=y), size=fontSize, family="American Typewriter") +
scale_y_continuous(name="", breaks=yvals, labels=ylabs)
return(gg)
}
## Prepare data
df <- tufte_sort(source_df,
x="year",
y="value",
group="group",
method="tufte",
min.space=0.05)
df <- transform(df,
x=factor(x, levels=c(5,10,15,20),
labels=c("5 years","10 years","15 years","20 years")),
y=round(y))
## Plot
plot_slopegraph(df) + labs(title="Estimates of % survival rates") +
theme(axis.title=element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(hjust=0.5,
family = "American Typewriter",
face="bold"),
axis.text = element_text(family = "American Typewriter",
face="bold"))
我发现的问题是,如果我在 source_df$group
中只取一个对象,连接线就会消失 () 在这种情况下 Thyroid
,如下所示:
如果我只在同一列中添加另一个项目,一切都很好,并且会出现连接线。
有没有办法在这种情况下也有线条?我已经尝试了很多方法,但没有成功删除包含 NA
值的行,因此我不知道如何解决这个问题,如果...可以解决。
预先感谢您的每一个最终回复!
我可以建议一个更简单的方法,使用 {ggh4x} 包,它有一个基本的 R type = "b" 像 geom.您可以删除点,然后绘制文本。
您将在三行代码中得到结果:)
library(tidyverse)
library(ggh4x)
source_df <- read.csv("https://raw.githubusercontent.com/jkeirstead/r-slopegraph/master/cancer_survival_rates.csv")
source_df <- filter(source_df, group == "Thyroid")
ggplot(source_df, aes(year, value)) +
## set shape to NA
geom_pointpath(aes(group = group, mult = 1), shape = NA) +
geom_text(aes(label = value))
由 reprex package (v2.0.1)
于 2021-12-30 创建我修复了简单地添加行:
df <- df[complete.cases(df), ]
在绘图说明之前。问题是生成了许多具有 NA
值的行,并且此行删除了具有空值的行。