条形图中合计总数的百分比
Aggregate percentage of total in bar plot
我有一个这样的数据集(示例)
data.frame(loan_status=c('Y','N','Y','N','Y','N'), property_area=c('Semiurban', 'Urban', 'Rural', 'Semiurban', 'Urban', 'Rural'))
其中loan_status=='Y'
表示贷款已被接受。
我想绘制每个 property_area
的接受百分比,如下所示:
我尝试使用 ggplot 库但没有成功..
最简单的方法是什么?
你可以从类似的事情开始。
scales
库允许轻松地将标签转换为百分比。
library(scales); library(ggplot2); library(dplyr)
df %>% group_by(property_area) %>%
summarise(acceptance_pct = sum(loan_status == 'Y') / n()) %>%
ggplot(aes(x = acceptance_pct, y = property_area)) +
geom_col(fill = "darkgreen") +
geom_text(aes(label = percent(acceptance_pct)), hjust = -0.1) +
scale_x_continuous(labels = percent_format(accuracy = 1))
你能从这个开始吗?
p = ggplot(mydataf, aes(y = foo)) +
geom_bar(aes(x = (..count..)/sum(..count..))) +
scale_x_continuous(formatter = 'percent')
这是一个 base R 解决方案,包括根据百分比更改色调。
我扩展了您的数据以显示不同的百分比,而不仅仅是 50%。另外,我将“Y”更改为 TRUE
,将“N”更改为 FALSE
。
# adjust data
df1$loan_status[df1$loan_status == "Y"] <- T
df1$loan_status[df1$loan_status == "N"] <- F
# get groups
pl <- aggregate( loan_status ~ property_area, df1, function(x)
sum(as.logical(x))/length(x) )
# adjust plotting area
par(mar=c(14,6,14,4))
# plot data, add percentages and grid
bp <- barplot( pl$loan_status,
names=pl$property_area,
horiz=T, border=F, las=1,
col=rgb(0.1, 1-pl$loan_status, 0.1, 1),
xlim=c(0,1.14) );
text(pl$loan_status, bp, labels=paste(round(pl$loan_status*100, digits=2),"%"), pos=4);
abline(v = seq(0,1,by=0.2), col = "grey", lty = "dotted")
数据
df1 <- structure(list(loan_status = c("Y", "N", "Y", "N", "Y", "N",
"Y", "Y", "Y", "N"), property_area = c("Semiurban", "Urban",
"Rural", "Semiurban", "Urban", "Rural", "Semiurban", "Semiurban",
"Semiurban", "Rural")), row.names = c("1", "2", "3", "4", "5",
"6", "7", "8", "9", "10"), class = "data.frame")
我有一个这样的数据集(示例)
data.frame(loan_status=c('Y','N','Y','N','Y','N'), property_area=c('Semiurban', 'Urban', 'Rural', 'Semiurban', 'Urban', 'Rural'))
其中loan_status=='Y'
表示贷款已被接受。
我想绘制每个 property_area
的接受百分比,如下所示:
我尝试使用 ggplot 库但没有成功..
最简单的方法是什么?
你可以从类似的事情开始。
scales
库允许轻松地将标签转换为百分比。
library(scales); library(ggplot2); library(dplyr)
df %>% group_by(property_area) %>%
summarise(acceptance_pct = sum(loan_status == 'Y') / n()) %>%
ggplot(aes(x = acceptance_pct, y = property_area)) +
geom_col(fill = "darkgreen") +
geom_text(aes(label = percent(acceptance_pct)), hjust = -0.1) +
scale_x_continuous(labels = percent_format(accuracy = 1))
你能从这个开始吗?
p = ggplot(mydataf, aes(y = foo)) +
geom_bar(aes(x = (..count..)/sum(..count..))) +
scale_x_continuous(formatter = 'percent')
这是一个 base R 解决方案,包括根据百分比更改色调。
我扩展了您的数据以显示不同的百分比,而不仅仅是 50%。另外,我将“Y”更改为 TRUE
,将“N”更改为 FALSE
。
# adjust data
df1$loan_status[df1$loan_status == "Y"] <- T
df1$loan_status[df1$loan_status == "N"] <- F
# get groups
pl <- aggregate( loan_status ~ property_area, df1, function(x)
sum(as.logical(x))/length(x) )
# adjust plotting area
par(mar=c(14,6,14,4))
# plot data, add percentages and grid
bp <- barplot( pl$loan_status,
names=pl$property_area,
horiz=T, border=F, las=1,
col=rgb(0.1, 1-pl$loan_status, 0.1, 1),
xlim=c(0,1.14) );
text(pl$loan_status, bp, labels=paste(round(pl$loan_status*100, digits=2),"%"), pos=4);
abline(v = seq(0,1,by=0.2), col = "grey", lty = "dotted")
数据
df1 <- structure(list(loan_status = c("Y", "N", "Y", "N", "Y", "N",
"Y", "Y", "Y", "N"), property_area = c("Semiurban", "Urban",
"Rural", "Semiurban", "Urban", "Rural", "Semiurban", "Semiurban",
"Semiurban", "Rural")), row.names = c("1", "2", "3", "4", "5",
"6", "7", "8", "9", "10"), class = "data.frame")