使用一个变量的名称将行转换为列
Convert rows into columns taking the names from one variable
我有一个这样的数据集df:
df <- as.data.frame(matrix(c(1,1,1,1,2,2,
2020,2020,2020,2020,2020,2020,
1,2,1,2,3,6,
"Spain","Spain","France","France","Germany","Japan",
0,40,5,200,98,300,
1,3,2,19,4,11), ncol = 6))
names(df) <- c('ID', 'year', 'month', 'country', 'buy', 'sell')
我要改造df。将 ID、year 和 month 作为参考变量并基于 country 我试图通过以下方式将行转换为新变量 df_modified:
df_modified <- as.data.frame(matrix(c(1,1,2,2,
2020,2020,2020,2020,
1,2,3,6,
0,40,"NA","NA",
5,200,"NA","NA",
1,3,"NA","NA",
2,19,"NA","NA",
"NA","NA",98,300,
"NA","NA",4,11), ncol = 9))
names(df_modified) <- c('ID', 'year', 'month', 'buy_Spain', 'sell_Spain', 'buy_France','sell_France', 'buy_Germany', 'sell_Germany')
我试过:
library(reshape2)
df_modified <- dcast(df, id+year+month ~ country)
然而它并没有成功。
注意:数字变量在我的原始数据集中是数字(国家是字符等)。
有线索吗?
此致
我们可以在dcast
中指定value.var
library(data.table)
dcast(setDT(df), ID + year + month ~ country, value.var = c('buy', 'sell'))
或使用pivot_wider
library(tidyr)
library(dplyr)
df %>%
pivot_wider(names_from = country, values_from = c(buy, sell),
values_fill = list(buy = '0', sell = '0'))
# A tibble: 4 x 11
# ID year month buy_Spain buy_France buy_Germany buy_Japan sell_Spain sell_France sell_Germany sell_Japan
# <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#1 1 2020 1 0 5 0 0 1 2 0 0
#2 1 2020 2 40 200 0 0 3 19 0 0
#3 2 2020 3 0 0 98 0 0 0 4 0
#4 2 2020 6 0 0 0 300 0 0 0 11
如果我们需要转换为数值class,只需type.convert
df %>%
type.convert(as.is = TRUE) %>%
pivot_wider(names_from = country, values_from = c(buy, sell),
values_fill = list(buy = 0, sell = 0))
# A tibble: 4 x 11
# ID year month buy_Spain buy_France buy_Germany buy_Japan sell_Spain sell_France sell_Germany sell_Japan
# <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
#1 1 2020 1 0 5 0 0 1 2 0 0
#2 1 2020 2 40 200 0 0 3 19 0 0
#3 2 2020 3 0 0 98 0 0 0 4 0
#4 2 2020 6 0 0 0 300 0 0 0 11
试试这个方法:
library(tidyverse)
#Reshape
df %>% pivot_wider(names_from = country,values_from=c(buy,sell))
输出:
# A tibble: 4 x 11
ID year month buy_Spain buy_France buy_Germany buy_Japan sell_Spain sell_France sell_Germany
<fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct>
1 1 2020 1 0 5 NA NA 1 2 NA
2 1 2020 2 40 200 NA NA 3 19 NA
3 2 2020 3 NA NA 98 NA NA NA 4
4 2 2020 6 NA NA NA 300 NA NA NA
# ... with 1 more variable: sell_Japan <fct>
如果你想转换为性格,你可以使用的因素:
#Reshape 2
df %>% pivot_wider(names_from = country,values_from=c(buy,sell)) %>%
mutate_at(vars(contains("_")),funs(as.character))
输出:
# A tibble: 4 x 11
ID year month buy_Spain buy_France buy_Germany buy_Japan sell_Spain sell_France sell_Germany
<fct> <fct> <fct> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 1 2020 1 0 5 NA NA 1 2 NA
2 1 2020 2 40 200 NA NA 3 19 NA
3 2 2020 3 NA NA 98 NA NA NA 4
4 2 2020 6 NA NA NA 300 NA NA NA
# ... with 1 more variable: sell_Japan <chr>
或作为数字类型:
#Reshape 3
df %>% pivot_wider(names_from = country,values_from=c(buy,sell)) %>%
mutate_at(vars(contains("_")),funs(as.numeric(as.character(.))))
输出:
# A tibble: 4 x 11
ID year month buy_Spain buy_France buy_Germany buy_Japan sell_Spain sell_France sell_Germany
<fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 2020 1 0 5 NA NA 1 2 NA
2 1 2020 2 40 200 NA NA 3 19 NA
3 2 2020 3 NA NA 98 NA NA NA 4
4 2 2020 6 NA NA NA 300 NA NA NA
# ... with 1 more variable: sell_Japan <dbl>
要将所有 NA
替换为零,您可以使用:
#Reshape 4
df %>% pivot_wider(names_from = country,values_from=c(buy,sell)) %>%
mutate_at(vars(contains("_")),funs(as.numeric(as.character(.)))) %>%
replace(is.na(.),0)
输出:
# A tibble: 4 x 11
ID year month buy_Spain buy_France buy_Germany buy_Japan sell_Spain sell_France sell_Germany
<fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 2020 1 0 5 0 0 1 2 0
2 1 2020 2 40 200 0 0 3 19 0
3 2 2020 3 0 0 98 0 0 0 4
4 2 2020 6 0 0 0 300 0 0 0
# ... with 1 more variable: sell_Japan <dbl>
我有一个这样的数据集df:
df <- as.data.frame(matrix(c(1,1,1,1,2,2,
2020,2020,2020,2020,2020,2020,
1,2,1,2,3,6,
"Spain","Spain","France","France","Germany","Japan",
0,40,5,200,98,300,
1,3,2,19,4,11), ncol = 6))
names(df) <- c('ID', 'year', 'month', 'country', 'buy', 'sell')
我要改造df。将 ID、year 和 month 作为参考变量并基于 country 我试图通过以下方式将行转换为新变量 df_modified:
df_modified <- as.data.frame(matrix(c(1,1,2,2,
2020,2020,2020,2020,
1,2,3,6,
0,40,"NA","NA",
5,200,"NA","NA",
1,3,"NA","NA",
2,19,"NA","NA",
"NA","NA",98,300,
"NA","NA",4,11), ncol = 9))
names(df_modified) <- c('ID', 'year', 'month', 'buy_Spain', 'sell_Spain', 'buy_France','sell_France', 'buy_Germany', 'sell_Germany')
我试过:
library(reshape2)
df_modified <- dcast(df, id+year+month ~ country)
然而它并没有成功。
注意:数字变量在我的原始数据集中是数字(国家是字符等)。
有线索吗?
此致
我们可以在dcast
value.var
library(data.table)
dcast(setDT(df), ID + year + month ~ country, value.var = c('buy', 'sell'))
或使用pivot_wider
library(tidyr)
library(dplyr)
df %>%
pivot_wider(names_from = country, values_from = c(buy, sell),
values_fill = list(buy = '0', sell = '0'))
# A tibble: 4 x 11
# ID year month buy_Spain buy_France buy_Germany buy_Japan sell_Spain sell_France sell_Germany sell_Japan
# <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#1 1 2020 1 0 5 0 0 1 2 0 0
#2 1 2020 2 40 200 0 0 3 19 0 0
#3 2 2020 3 0 0 98 0 0 0 4 0
#4 2 2020 6 0 0 0 300 0 0 0 11
如果我们需要转换为数值class,只需type.convert
df %>%
type.convert(as.is = TRUE) %>%
pivot_wider(names_from = country, values_from = c(buy, sell),
values_fill = list(buy = 0, sell = 0))
# A tibble: 4 x 11
# ID year month buy_Spain buy_France buy_Germany buy_Japan sell_Spain sell_France sell_Germany sell_Japan
# <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
#1 1 2020 1 0 5 0 0 1 2 0 0
#2 1 2020 2 40 200 0 0 3 19 0 0
#3 2 2020 3 0 0 98 0 0 0 4 0
#4 2 2020 6 0 0 0 300 0 0 0 11
试试这个方法:
library(tidyverse)
#Reshape
df %>% pivot_wider(names_from = country,values_from=c(buy,sell))
输出:
# A tibble: 4 x 11
ID year month buy_Spain buy_France buy_Germany buy_Japan sell_Spain sell_France sell_Germany
<fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct>
1 1 2020 1 0 5 NA NA 1 2 NA
2 1 2020 2 40 200 NA NA 3 19 NA
3 2 2020 3 NA NA 98 NA NA NA 4
4 2 2020 6 NA NA NA 300 NA NA NA
# ... with 1 more variable: sell_Japan <fct>
如果你想转换为性格,你可以使用的因素:
#Reshape 2
df %>% pivot_wider(names_from = country,values_from=c(buy,sell)) %>%
mutate_at(vars(contains("_")),funs(as.character))
输出:
# A tibble: 4 x 11
ID year month buy_Spain buy_France buy_Germany buy_Japan sell_Spain sell_France sell_Germany
<fct> <fct> <fct> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 1 2020 1 0 5 NA NA 1 2 NA
2 1 2020 2 40 200 NA NA 3 19 NA
3 2 2020 3 NA NA 98 NA NA NA 4
4 2 2020 6 NA NA NA 300 NA NA NA
# ... with 1 more variable: sell_Japan <chr>
或作为数字类型:
#Reshape 3
df %>% pivot_wider(names_from = country,values_from=c(buy,sell)) %>%
mutate_at(vars(contains("_")),funs(as.numeric(as.character(.))))
输出:
# A tibble: 4 x 11
ID year month buy_Spain buy_France buy_Germany buy_Japan sell_Spain sell_France sell_Germany
<fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 2020 1 0 5 NA NA 1 2 NA
2 1 2020 2 40 200 NA NA 3 19 NA
3 2 2020 3 NA NA 98 NA NA NA 4
4 2 2020 6 NA NA NA 300 NA NA NA
# ... with 1 more variable: sell_Japan <dbl>
要将所有 NA
替换为零,您可以使用:
#Reshape 4
df %>% pivot_wider(names_from = country,values_from=c(buy,sell)) %>%
mutate_at(vars(contains("_")),funs(as.numeric(as.character(.)))) %>%
replace(is.na(.),0)
输出:
# A tibble: 4 x 11
ID year month buy_Spain buy_France buy_Germany buy_Japan sell_Spain sell_France sell_Germany
<fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 2020 1 0 5 0 0 1 2 0
2 1 2020 2 40 200 0 0 3 19 0
3 2 2020 3 0 0 98 0 0 0 4
4 2 2020 6 0 0 0 300 0 0 0
# ... with 1 more variable: sell_Japan <dbl>