R ggplot - 图利润 x 月或国家
R ggplot - Graph Profit x Month or Countrie
我是编码的新手,刚开始做一些 R 图形,现在我有点迷失在数据分析中,需要一些光照!我正在训练一些分析,我得到了一个非常长的数据集,包含 19 个国家 x 12 个月,每个月都有一个利润。有点像这样:
Country Month Profit
Brazil Jan 50
Brazil fev 80
Brazil mar 15
Austria Jan 35
Austria fev 80
Austria mar 47
France Jan 21
France fev 66
France mar 15
我正在考虑制作一张图表来显示全年的利润,并为每个国家/地区制作另一张图表,这样我就可以看到排名靠前和排名靠后的 2 个国家/地区,但我有点不知如何去做?或者有更好的方法来总结这个列表吗?
也许这样开始:
library(tidyverse)
u <- data.table::fread('Country Month Profit
Brazil Jan 50
Brazil fev 80
Brazil mar 15
Austria Jan 35
Austria fev 80
Austria mar 47
France Jan 21
France fev 66
France mar 15') %>% as_tibble()
u$Month <- factor(u$Month, levels = c('Jan', 'fev', 'mar'))
ggplot() +
geom_line(data = u, aes(x = Month, y = Profit, color = Country, group = Country))
最好有像 1:12 这样的真实月份列,而不必重构级别,然后您可以使用 lubridate::month()
来标记此列。
比如lubridate::month(1L, label = TRUE, abbr = TRUE)
> lubridate::month(1L, label = TRUE, abbr = TRUE)
[1] jan
Levels: jan < fév < mar < avr < mai < jui < jul < aoû < sep < oct < nov < déc
你可以尝试这样的事情。 fct_*()
函数来自 forcats
包,population
来自 tidyr
。这两个都在tidyverse
中。我希望它能给你一些想法
library(tidyverse)
# fuller reprex don't worry about this part
df <-
tidyr::population |>
filter(year >= 2010) |>
transmute(
country,
year,
profit = (population / 1e6 * rnorm(1))
) |>
filter(
fct_lump(country, w = profit, n = 19) != "Other"
)
# how to highlight top and bottom performers
df |>
mutate(
country = fct_reorder(country, profit, sum, .desc = TRUE),
rank = as.integer(country),
color = case_when( # these order best in the legend if they are alphabetical or a factor
rank %in% 1:2 ~ "best",
rank %in% 18:19 ~ "worst",
TRUE ~ "middle"
)
) |>
ggplot(aes(year, profit, group = country)) +
geom_col(aes(fill = color), alpha = 0.5) +
scale_size(range = c(0.5, 1)) +
facet_wrap(~country, scales = "free_y") + # you could drop scales
scale_fill_manual(values = c("dodgerblue", "grey80", "red")) +
theme_minimal() +
theme(panel.grid = element_blank())
我会这样做:
############ Libraries
library(ggplot2)
############ These lines are just to replicate the structure of your dataframe
df <- data.frame(Country=character(),
Month=character(),
Profit=integer(),
stringsAsFactors=FALSE)
for(one.country in LETTERS){
for(one.month in c("jan","feb","mar","apr","may","june",
"july","aug","sept","oct","nov","dec")){
add <- data.frame(Country=c(one.country),
Month=c(one.month),
Profit=c(sample(0:100,1)),
stringsAsFactors=FALSE)
df <- rbind(df,add)
}
}
############ If you keep months as characters you need to set the variable as factor and
# define the specific order (else they'll be ordered alphabetically in the plot)
df$Month <- factor(df$Month,
levels=c("jan","feb","mar","apr","may","june",
"july","aug","sept","oct","nov","dec"))
show.this.country <- "A" # you can use this variable to switch from
# one country to the other to explore them
ggplot(df[df$Country==show.this.country,])+
geom_col(aes(x=Month,y=Profit),colour="steelblue4",fill="steelblue2")+
labs(title = paste0("country ",show.this.country))+
theme(plot.margin = unit(c(0.5, 0, 1, 1), "cm"), # theme variables are not needed, but
plot.title = element_text(hjust = 0.5,vjust = 2), # they make it look cleaner in my view
axis.title.x = element_text(vjust=-2),
axis.title.y = element_text(vjust=7))
# or loop through if you want to print them all
for(show.this.country in levels(as.factor(df$Country))){
# (but in that case remember to add print(), otherwise they won't show)
print(
ggplot(df[df$Country==show.this.country,])+
geom_col(aes(x=Month,y=Profit),colour="steelblue4",fill="steelblue2")+
labs(title = paste0("country ",show.this.country))+
theme(plot.margin = unit(c(0.5, 0, 1, 1), "cm"),
plot.title = element_text(hjust = 0.5,vjust = 2),
axis.title.x = element_text(vjust=-2),
axis.title.y = element_text(vjust=7))
)
}
然后是各国之间的比较:
# You can rearrange a bit to have the totals per country on a separate dataframe
df2 <- aggregate(x = df$Profit,
by = list(df$Country),
FUN = sum)
colnames(df2) <- c("Country","Total")
# these will return the lines in this dataframe with
# "n.extreme" number of highest and lowest values:
n.extremes <- 3
highest <- order(df2$Total, decreasing=TRUE)[1:n.extremes]
lowest <- order(df2$Total, decreasing=FALSE)[1:n.extremes]
# this is one way to show the 3 best and 3 worst performers
ggplot(df[df$Country%in%df2$Country[c(highest,lowest)],])+
geom_col(aes(x=Month,y=Profit,fill=Country),position = "dodge")+
labs(title = paste0("best and worst performers"))+
theme(plot.margin = unit(c(0.5, 0, 1, 1), "cm"),
plot.title = element_text(hjust = 0.5,vjust = 2),
axis.title.x = element_text(vjust=-2),
axis.title.y = element_text(vjust=7))+
scale_fill_brewer(palette="Spectral")
# (but ggplot provides many more, so have fun exploring!)
我是编码的新手,刚开始做一些 R 图形,现在我有点迷失在数据分析中,需要一些光照!我正在训练一些分析,我得到了一个非常长的数据集,包含 19 个国家 x 12 个月,每个月都有一个利润。有点像这样:
Country Month Profit
Brazil Jan 50
Brazil fev 80
Brazil mar 15
Austria Jan 35
Austria fev 80
Austria mar 47
France Jan 21
France fev 66
France mar 15
我正在考虑制作一张图表来显示全年的利润,并为每个国家/地区制作另一张图表,这样我就可以看到排名靠前和排名靠后的 2 个国家/地区,但我有点不知如何去做?或者有更好的方法来总结这个列表吗?
也许这样开始:
library(tidyverse)
u <- data.table::fread('Country Month Profit
Brazil Jan 50
Brazil fev 80
Brazil mar 15
Austria Jan 35
Austria fev 80
Austria mar 47
France Jan 21
France fev 66
France mar 15') %>% as_tibble()
u$Month <- factor(u$Month, levels = c('Jan', 'fev', 'mar'))
ggplot() +
geom_line(data = u, aes(x = Month, y = Profit, color = Country, group = Country))
最好有像 1:12 这样的真实月份列,而不必重构级别,然后您可以使用 lubridate::month()
来标记此列。
比如lubridate::month(1L, label = TRUE, abbr = TRUE)
> lubridate::month(1L, label = TRUE, abbr = TRUE)
[1] jan
Levels: jan < fév < mar < avr < mai < jui < jul < aoû < sep < oct < nov < déc
你可以尝试这样的事情。 fct_*()
函数来自 forcats
包,population
来自 tidyr
。这两个都在tidyverse
中。我希望它能给你一些想法
library(tidyverse)
# fuller reprex don't worry about this part
df <-
tidyr::population |>
filter(year >= 2010) |>
transmute(
country,
year,
profit = (population / 1e6 * rnorm(1))
) |>
filter(
fct_lump(country, w = profit, n = 19) != "Other"
)
# how to highlight top and bottom performers
df |>
mutate(
country = fct_reorder(country, profit, sum, .desc = TRUE),
rank = as.integer(country),
color = case_when( # these order best in the legend if they are alphabetical or a factor
rank %in% 1:2 ~ "best",
rank %in% 18:19 ~ "worst",
TRUE ~ "middle"
)
) |>
ggplot(aes(year, profit, group = country)) +
geom_col(aes(fill = color), alpha = 0.5) +
scale_size(range = c(0.5, 1)) +
facet_wrap(~country, scales = "free_y") + # you could drop scales
scale_fill_manual(values = c("dodgerblue", "grey80", "red")) +
theme_minimal() +
theme(panel.grid = element_blank())
我会这样做:
############ Libraries
library(ggplot2)
############ These lines are just to replicate the structure of your dataframe
df <- data.frame(Country=character(),
Month=character(),
Profit=integer(),
stringsAsFactors=FALSE)
for(one.country in LETTERS){
for(one.month in c("jan","feb","mar","apr","may","june",
"july","aug","sept","oct","nov","dec")){
add <- data.frame(Country=c(one.country),
Month=c(one.month),
Profit=c(sample(0:100,1)),
stringsAsFactors=FALSE)
df <- rbind(df,add)
}
}
############ If you keep months as characters you need to set the variable as factor and
# define the specific order (else they'll be ordered alphabetically in the plot)
df$Month <- factor(df$Month,
levels=c("jan","feb","mar","apr","may","june",
"july","aug","sept","oct","nov","dec"))
show.this.country <- "A" # you can use this variable to switch from
# one country to the other to explore them
ggplot(df[df$Country==show.this.country,])+
geom_col(aes(x=Month,y=Profit),colour="steelblue4",fill="steelblue2")+
labs(title = paste0("country ",show.this.country))+
theme(plot.margin = unit(c(0.5, 0, 1, 1), "cm"), # theme variables are not needed, but
plot.title = element_text(hjust = 0.5,vjust = 2), # they make it look cleaner in my view
axis.title.x = element_text(vjust=-2),
axis.title.y = element_text(vjust=7))
# or loop through if you want to print them all
for(show.this.country in levels(as.factor(df$Country))){
# (but in that case remember to add print(), otherwise they won't show)
print(
ggplot(df[df$Country==show.this.country,])+
geom_col(aes(x=Month,y=Profit),colour="steelblue4",fill="steelblue2")+
labs(title = paste0("country ",show.this.country))+
theme(plot.margin = unit(c(0.5, 0, 1, 1), "cm"),
plot.title = element_text(hjust = 0.5,vjust = 2),
axis.title.x = element_text(vjust=-2),
axis.title.y = element_text(vjust=7))
)
}
然后是各国之间的比较:
# You can rearrange a bit to have the totals per country on a separate dataframe
df2 <- aggregate(x = df$Profit,
by = list(df$Country),
FUN = sum)
colnames(df2) <- c("Country","Total")
# these will return the lines in this dataframe with
# "n.extreme" number of highest and lowest values:
n.extremes <- 3
highest <- order(df2$Total, decreasing=TRUE)[1:n.extremes]
lowest <- order(df2$Total, decreasing=FALSE)[1:n.extremes]
# this is one way to show the 3 best and 3 worst performers
ggplot(df[df$Country%in%df2$Country[c(highest,lowest)],])+
geom_col(aes(x=Month,y=Profit,fill=Country),position = "dodge")+
labs(title = paste0("best and worst performers"))+
theme(plot.margin = unit(c(0.5, 0, 1, 1), "cm"),
plot.title = element_text(hjust = 0.5,vjust = 2),
axis.title.x = element_text(vjust=-2),
axis.title.y = element_text(vjust=7))+
scale_fill_brewer(palette="Spectral")
# (but ggplot provides many more, so have fun exploring!)