使用一个变量的名称将行转换为列

Convert rows into columns taking the names from one variable

我有一个这样的数据集df:

df <- as.data.frame(matrix(c(1,1,1,1,2,2,
                            2020,2020,2020,2020,2020,2020, 
                            1,2,1,2,3,6, 
                            "Spain","Spain","France","France","Germany","Japan",
                            0,40,5,200,98,300, 
                            1,3,2,19,4,11), ncol = 6))
names(df) <- c('ID', 'year', 'month', 'country', 'buy', 'sell')

我要改造df。将 IDyearmonth 作为参考变量并基于 country 我试图通过以下方式将行转换为新变量 df_modified:

df_modified <- as.data.frame(matrix(c(1,1,2,2,
                                      2020,2020,2020,2020, 
                                      1,2,3,6,
                                      0,40,"NA","NA",
                                      5,200,"NA","NA",
                                      1,3,"NA","NA", 
                                      2,19,"NA","NA", 
                                      "NA","NA",98,300, 
                                      "NA","NA",4,11), ncol = 9))
names(df_modified) <- c('ID', 'year', 'month', 'buy_Spain', 'sell_Spain', 'buy_France','sell_France', 'buy_Germany', 'sell_Germany')

我试过:

library(reshape2)
df_modified <- dcast(df, id+year+month ~ country)

然而它并没有成功。

注意:数字变量在我的原始数据集中是数字(国家是字符等)。

有线索吗?

此致

我们可以在dcast

中指定value.var
library(data.table)
dcast(setDT(df),  ID + year + month ~ country, value.var = c('buy', 'sell'))

或使用pivot_wider

library(tidyr)
library(dplyr)
 df %>%
   pivot_wider(names_from = country, values_from = c(buy, sell), 
        values_fill = list(buy = '0', sell = '0'))
# A tibble: 4 x 11
#  ID    year  month buy_Spain buy_France buy_Germany buy_Japan sell_Spain sell_France sell_Germany sell_Japan
#  <chr> <chr> <chr> <chr>     <chr>      <chr>       <chr>     <chr>      <chr>       <chr>        <chr>     
#1 1     2020  1     0         5          0           0         1          2           0            0         
#2 1     2020  2     40        200        0           0         3          19          0            0         
#3 2     2020  3     0         0          98          0         0          0           4            0         
#4 2     2020  6     0         0          0           300       0          0           0            11     

如果我们需要转换为数值class,只需type.convert

df %>%
    type.convert(as.is = TRUE) %>%
    pivot_wider(names_from = country, values_from = c(buy, sell), 
        values_fill = list(buy = 0, sell = 0))
# A tibble: 4 x 11
#     ID  year month buy_Spain buy_France buy_Germany buy_Japan sell_Spain sell_France sell_Germany sell_Japan
#  <int> <int> <int>     <int>      <int>       <int>     <int>      <int>       <int>        <int>      <int>
#1     1  2020     1         0          5           0         0          1           2            0          0
#2     1  2020     2        40        200           0         0          3          19            0          0
#3     2  2020     3         0          0          98         0          0           0            4          0
#4     2  2020     6         0          0           0       300          0           0            0         11

试试这个方法:

library(tidyverse)
#Reshape
df %>% pivot_wider(names_from = country,values_from=c(buy,sell))

输出:

# A tibble: 4 x 11
  ID    year  month buy_Spain buy_France buy_Germany buy_Japan sell_Spain sell_France sell_Germany
  <fct> <fct> <fct> <fct>     <fct>      <fct>       <fct>     <fct>      <fct>       <fct>       
1 1     2020  1     0         5          NA          NA        1          2           NA          
2 1     2020  2     40        200        NA          NA        3          19          NA          
3 2     2020  3     NA        NA         98          NA        NA         NA          4           
4 2     2020  6     NA        NA         NA          300       NA         NA          NA          
# ... with 1 more variable: sell_Japan <fct>

如果你想转换为性格,你可以使用的因素:

#Reshape 2
df %>% pivot_wider(names_from = country,values_from=c(buy,sell)) %>%
  mutate_at(vars(contains("_")),funs(as.character))

输出:

# A tibble: 4 x 11
  ID    year  month buy_Spain buy_France buy_Germany buy_Japan sell_Spain sell_France sell_Germany
  <fct> <fct> <fct> <chr>     <chr>      <chr>       <chr>     <chr>      <chr>       <chr>       
1 1     2020  1     0         5          NA          NA        1          2           NA          
2 1     2020  2     40        200        NA          NA        3          19          NA          
3 2     2020  3     NA        NA         98          NA        NA         NA          4           
4 2     2020  6     NA        NA         NA          300       NA         NA          NA          
# ... with 1 more variable: sell_Japan <chr>

或作为数字类型:

#Reshape 3
df %>% pivot_wider(names_from = country,values_from=c(buy,sell)) %>%
  mutate_at(vars(contains("_")),funs(as.numeric(as.character(.))))

输出:

# A tibble: 4 x 11
  ID    year  month buy_Spain buy_France buy_Germany buy_Japan sell_Spain sell_France sell_Germany
  <fct> <fct> <fct>     <dbl>      <dbl>       <dbl>     <dbl>      <dbl>       <dbl>        <dbl>
1 1     2020  1             0          5          NA        NA          1           2           NA
2 1     2020  2            40        200          NA        NA          3          19           NA
3 2     2020  3            NA         NA          98        NA         NA          NA            4
4 2     2020  6            NA         NA          NA       300         NA          NA           NA
# ... with 1 more variable: sell_Japan <dbl>

要将所有 NA 替换为零,您可以使用:

#Reshape 4
df %>% pivot_wider(names_from = country,values_from=c(buy,sell)) %>%
  mutate_at(vars(contains("_")),funs(as.numeric(as.character(.)))) %>%
  replace(is.na(.),0)

输出:

# A tibble: 4 x 11
  ID    year  month buy_Spain buy_France buy_Germany buy_Japan sell_Spain sell_France sell_Germany
  <fct> <fct> <fct>     <dbl>      <dbl>       <dbl>     <dbl>      <dbl>       <dbl>        <dbl>
1 1     2020  1             0          5           0         0          1           2            0
2 1     2020  2            40        200           0         0          3          19            0
3 2     2020  3             0          0          98         0          0           0            4
4 2     2020  6             0          0           0       300          0           0            0
# ... with 1 more variable: sell_Japan <dbl>