R ggplot - 图利润 x 月或国家

R ggplot - Graph Profit x Month or Countrie

我是编码的新手,刚开始做一些 R 图形,现在我有点迷失在数据分析中,需要一些光照!我正在训练一些分析,我得到了一个非常长的数据集,包含 19 个国家 x 12 个月,每个月都有一个利润。有点像这样:

Country   Month   Profit
Brazil    Jan     50
Brazil    fev     80
Brazil    mar     15
Austria   Jan     35
Austria   fev     80
Austria   mar     47
France    Jan     21
France    fev     66
France    mar     15

我正在考虑制作一张图表来显示全年的利润,并为每个国家/地区制作另一张图表,这样我就可以看到排名靠前和排名靠后的 2 个国家/地区,但我有点不知如何去做?或者有更好的方法来总结这个列表吗?

也许这样开始:

library(tidyverse)

u <- data.table::fread('Country   Month   Profit
Brazil    Jan     50
Brazil    fev     80
Brazil    mar     15
Austria   Jan     35
Austria   fev     80
Austria   mar     47
France    Jan     21
France    fev     66
France    mar     15') %>% as_tibble()

u$Month <- factor(u$Month, levels = c('Jan', 'fev', 'mar'))
ggplot() + 
  geom_line(data = u, aes(x = Month, y = Profit, color = Country, group = Country))

最好有像 1:12 这样的真实月份列,而不必重构级别,然后您可以使用 lubridate::month() 来标记此列。

比如lubridate::month(1L, label = TRUE, abbr = TRUE)

> lubridate::month(1L, label = TRUE, abbr = TRUE)
[1] jan
Levels: jan < fév < mar < avr < mai < jui < jul < aoû < sep < oct < nov < déc

你可以尝试这样的事情。 fct_*() 函数来自 forcats 包,population 来自 tidyr。这两个都在tidyverse中。我希望它能给你一些想法

library(tidyverse)

# fuller reprex don't worry about this part
df <- 
  tidyr::population |>
  filter(year >= 2010) |> 
  transmute(
    country,
    year,
    profit = (population / 1e6 * rnorm(1))
  ) |> 
  filter(
    fct_lump(country, w = profit, n = 19) != "Other"
  )

# how to highlight top and bottom performers
df |> 
  mutate(
    country = fct_reorder(country, profit, sum, .desc = TRUE),
    rank = as.integer(country),
    color = case_when( # these order best in the legend if they are alphabetical or a factor
      rank %in% 1:2 ~ "best",
      rank %in% 18:19 ~ "worst",
      TRUE ~ "middle"
    )
  ) |> 
  ggplot(aes(year, profit, group = country)) +
  geom_col(aes(fill = color), alpha = 0.5) +
  scale_size(range = c(0.5, 1)) +
  facet_wrap(~country, scales = "free_y") + # you could drop scales
  scale_fill_manual(values = c("dodgerblue", "grey80", "red")) +
  theme_minimal() +
  theme(panel.grid = element_blank())

我会这样做:

############ Libraries
    
    library(ggplot2)

############ These lines are just to replicate the structure of your dataframe

df <- data.frame(Country=character(),
                 Month=character(), 
                 Profit=integer(), 
                 stringsAsFactors=FALSE) 

for(one.country in LETTERS){
  for(one.month in c("jan","feb","mar","apr","may","june",
                     "july","aug","sept","oct","nov","dec")){
    
    add <- data.frame(Country=c(one.country),
                      Month=c(one.month), 
                      Profit=c(sample(0:100,1)),
                      stringsAsFactors=FALSE) 
    
    df <- rbind(df,add)
    
  }
}

############ If you keep months as characters you need to set the variable as factor and
           # define the specific order (else they'll be ordered alphabetically in the plot)

df$Month <- factor(df$Month,
                   levels=c("jan","feb","mar","apr","may","june",
                            "july","aug","sept","oct","nov","dec"))

show.this.country <- "A"        # you can use this variable to switch from
                                # one country to the other to explore them


ggplot(df[df$Country==show.this.country,])+
  geom_col(aes(x=Month,y=Profit),colour="steelblue4",fill="steelblue2")+
  labs(title = paste0("country ",show.this.country))+
  theme(plot.margin = unit(c(0.5, 0, 1, 1), "cm"),          # theme variables are not needed, but 
        plot.title = element_text(hjust = 0.5,vjust = 2),   # they make it look cleaner in my view
        axis.title.x = element_text(vjust=-2),
        axis.title.y = element_text(vjust=7))
  

# or loop through if you want to print them all

for(show.this.country in levels(as.factor(df$Country))){
  
# (but in that case remember to add print(), otherwise they won't show)
print(                
  ggplot(df[df$Country==show.this.country,])+
  geom_col(aes(x=Month,y=Profit),colour="steelblue4",fill="steelblue2")+
  labs(title = paste0("country ",show.this.country))+
  theme(plot.margin = unit(c(0.5, 0, 1, 1), "cm"),
        plot.title = element_text(hjust = 0.5,vjust = 2),
        axis.title.x = element_text(vjust=-2),
        axis.title.y = element_text(vjust=7))
)
  
}  

然后是各国之间的比较:

# You can rearrange a bit to have the totals per country on a separate dataframe

df2 <- aggregate(x = df$Profit,
                 by = list(df$Country),
                 FUN = sum)
colnames(df2) <- c("Country","Total")

# these will return the lines in this dataframe with
# "n.extreme" number of highest and lowest values:

n.extremes <- 3
highest <- order(df2$Total, decreasing=TRUE)[1:n.extremes]
lowest  <- order(df2$Total, decreasing=FALSE)[1:n.extremes]

# this is one way to show the 3 best and 3 worst performers


ggplot(df[df$Country%in%df2$Country[c(highest,lowest)],])+
  geom_col(aes(x=Month,y=Profit,fill=Country),position = "dodge")+
  labs(title = paste0("best and worst performers"))+
  theme(plot.margin = unit(c(0.5, 0, 1, 1), "cm"),
        plot.title = element_text(hjust = 0.5,vjust = 2),
        axis.title.x = element_text(vjust=-2),
        axis.title.y = element_text(vjust=7))+
  scale_fill_brewer(palette="Spectral")
  
# (but ggplot provides many more, so have fun exploring!)