使用 dcast 将数据从长到宽移动
Moving data long to wide with dcast
过去几天我一直在 Stack Overflow 上尝试找到解决我遇到的这个问题的方法。
我正在分析从国家学生信息交换所收到的数据,特别是毕业数据。所以我有一些虚拟数据
df <- data.frame(id=c('1', '1', '1'), grad_date=c('20160501', '20170524', '20180524'), order=c('1', '2', '3'), inst_name=c('community college 1', 'univ 1', 'univ 2'), inst_state=c('CA', 'CA', 'CA'), level=c('Associate of Applied Sciences', 'Bachelors of Applied Sciences', 'Masters of Applied Sciences'), deg_maj_1=c('NETWORK SECURITY', 'INFO ASSUR CYBR-SECURITY', 'CISCO CCNA PREPARATION'), deg_cip_1=c('111003', '520299', '111003'), deg_maj_2=c('NA', 'NA', 'NA'), deg_cip_2=c('NA', 'NA', 'NA'), deg_maj_3=c('NA', 'NA', 'NA'), deg_cip_3=c('NA', 'NA', 'NA'), deg_maj_4=c('NA', 'NA', 'NA'), deg_cip_4=c('NA', 'NA', 'NA'))
并且我正在尝试扩大此数据范围:
df_wide<- dcast(df, id ~ order, value.var = c("inst_name", "inst_state", "level", "deg_maj_1", "deg_cip_1", "deg_maj_2", "deg_cip_2", "deg_maj_3", "deg_cip_3", "deg_maj_4", "deg_cip_4"))
我收到了这个错误:
.subset2(x, i, exact = exact) 错误:
递归索引在级别 2 失败
我去了 and 并得到了同样的错误
如果这有帮助:
str(df)
'data.frame': 3 obs. of 14 variables:
$ id : Factor w/ 1 level "1": 1 1 1
$ grad_date : Factor w/ 3 levels "20160501","20170524",..: 1 2 3
$ order : Factor w/ 3 levels "1","2","3": 1 2 3
$ inst_name : Factor w/ 3 levels "community college 1",..: 1 2 3
$ inst_state: Factor w/ 1 level "CA": 1 1 1
$ level : Factor w/ 3 levels "Associate of Applied Sciences",..: 1 2 3
$ deg_maj_1 : Factor w/ 3 levels "CISCO CCNA PREPARATION",..: 3 2 1
$ deg_cip_1 : Factor w/ 2 levels "111003","520299": 1 2 1
$ deg_maj_2 : Factor w/ 1 level "NA": 1 1 1
$ deg_cip_2 : Factor w/ 1 level "NA": 1 1 1
$ deg_maj_3 : Factor w/ 1 level "NA": 1 1 1
$ deg_cip_3 : Factor w/ 1 level "NA": 1 1 1
$ deg_maj_4 : Factor w/ 1 level "NA": 1 1 1
$ deg_cip_4 : Factor w/ 1 level "NA": 1 1 1
有人可以帮忙吗?我束手无策
编辑添加:
期望的输出(是的,我知道它是 loooooooooooong 但它是必需的)
df_wide <- data.frame(id=c('1'), grad_date=c('20160501'), inst_name_1=c('community college 1'), inst_state_1=c('CA'), level_1=c('Associate of Applied Sciences'), deg_maj_1_1=c('NETWORK SECURITY'), deg_cip_1_1=c('111003'), deg_maj_2_1=c('NA'), deg_cip_2_1=c('NA'), deg_maj_3_1=c('NA'), deg_cip_3_1=c('NA'), deg_maj_4_1=c('NA'), deg_cip_4_1=c('NA'), inst_name_2=c('univ 1'), inst_state_2=c('CA'), level_2=c('Bachelors of Applied Sciences'), deg_maj_1_2=c('INFO ASSUR CYBR-SECURITY'), deg_cip_1_2=c('520299'), deg_maj_2_2=c('NA'), deg_cip_2_2=c('NA'), deg_maj_3_2=c('NA'), deg_cip_3_2=c('NA'), deg_maj_4_2=c('NA'), deg_cip_4_2=c('NA'), inst_name_3=c('univ 2'), inst_state_3=c('CA'), level_3=c('Masters of Applied Sciences'), deg_maj_1_2=c('CISCO CCNA PREPARATION'), deg_cip_1_3=c('111003'), deg_maj_2_3=c('NA'), deg_cip_2_3=c('NA'), deg_maj_3_3=c('NA'), deg_cip_3_3=c('NA'), deg_maj_4_3=c('NA'), deg_cip_4_3=c('NA'))
如果你不依赖 dcast()
,基础 R 的 reshape()
可以让你到达你想要的地方。
reshape(df, idvar="id", timevar = "order", direction="wide")
屈服
id grad_date.1 inst_name.1 inst_state.1 level.1
1 1 20160501 community college 1 CA Associate of Applied Sciences
deg_maj_1.1 deg_cip_1.1 deg_maj_2.1 deg_cip_2.1 deg_maj_3.1 deg_cip_3.1
1 NETWORK SECURITY 111003 NA NA NA NA
deg_maj_4.1 deg_cip_4.1 grad_date.2 inst_name.2 inst_state.2
1 NA NA 20170524 univ 1 CA
level.2 deg_maj_1.2 deg_cip_1.2 deg_maj_2.2
1 Bachelors of Applied Sciences INFO ASSUR CYBR-SECURITY 520299 NA
deg_cip_2.2 deg_maj_3.2 deg_cip_3.2 deg_maj_4.2 deg_cip_4.2 grad_date.3 inst_name.3
1 NA NA NA NA NA 20180524 univ 2
inst_state.3 level.3 deg_maj_1.3 deg_cip_1.3
1 CA Masters of Applied Sciences CISCO CCNA PREPARATION 111003
deg_maj_2.3 deg_cip_2.3 deg_maj_3.3 deg_cip_3.3 deg_maj_4.3 deg_cip_4.3
1 NA NA NA NA NA NA
为了完整起见,dcast()
的 data.table
版本能够同时重塑多个值列:
library(data.table)
dcast(setDT(df), id ~ order, value.var = tail(names(df), -3L))
id inst_name_1 inst_name_2 inst_name_3 inst_state_1 inst_state_2 inst_state_3 level_1
1: 1 community college 1 univ 1 univ 2 CA CA CA Associate of Applied Sciences
level_2 level_3 deg_maj_1_1 deg_maj_1_2
1: Bachelors of Applied Sciences Masters of Applied Sciences NETWORK SECURITY INFO ASSUR CYBR-SECURITY
deg_maj_1_3 deg_cip_1_1 deg_cip_1_2 deg_cip_1_3 deg_maj_2_1 deg_maj_2_2 deg_maj_2_3 deg_cip_2_1 deg_cip_2_2
1: CISCO CCNA PREPARATION 111003 520299 111003 NA NA NA NA NA
deg_cip_2_3 deg_maj_3_1 deg_maj_3_2 deg_maj_3_3 deg_cip_3_1 deg_cip_3_2 deg_cip_3_3 deg_maj_4_1 deg_maj_4_2 deg_maj_4_3
1: NA NA NA NA NA NA NA NA NA NA
deg_cip_4_1 deg_cip_4_2 deg_cip_4_3
1: NA NA NA
过去几天我一直在 Stack Overflow 上尝试找到解决我遇到的这个问题的方法。
我正在分析从国家学生信息交换所收到的数据,特别是毕业数据。所以我有一些虚拟数据
df <- data.frame(id=c('1', '1', '1'), grad_date=c('20160501', '20170524', '20180524'), order=c('1', '2', '3'), inst_name=c('community college 1', 'univ 1', 'univ 2'), inst_state=c('CA', 'CA', 'CA'), level=c('Associate of Applied Sciences', 'Bachelors of Applied Sciences', 'Masters of Applied Sciences'), deg_maj_1=c('NETWORK SECURITY', 'INFO ASSUR CYBR-SECURITY', 'CISCO CCNA PREPARATION'), deg_cip_1=c('111003', '520299', '111003'), deg_maj_2=c('NA', 'NA', 'NA'), deg_cip_2=c('NA', 'NA', 'NA'), deg_maj_3=c('NA', 'NA', 'NA'), deg_cip_3=c('NA', 'NA', 'NA'), deg_maj_4=c('NA', 'NA', 'NA'), deg_cip_4=c('NA', 'NA', 'NA'))
并且我正在尝试扩大此数据范围:
df_wide<- dcast(df, id ~ order, value.var = c("inst_name", "inst_state", "level", "deg_maj_1", "deg_cip_1", "deg_maj_2", "deg_cip_2", "deg_maj_3", "deg_cip_3", "deg_maj_4", "deg_cip_4"))
我收到了这个错误:
.subset2(x, i, exact = exact) 错误: 递归索引在级别 2 失败
我去了
如果这有帮助:
str(df)
'data.frame': 3 obs. of 14 variables:
$ id : Factor w/ 1 level "1": 1 1 1
$ grad_date : Factor w/ 3 levels "20160501","20170524",..: 1 2 3
$ order : Factor w/ 3 levels "1","2","3": 1 2 3
$ inst_name : Factor w/ 3 levels "community college 1",..: 1 2 3
$ inst_state: Factor w/ 1 level "CA": 1 1 1
$ level : Factor w/ 3 levels "Associate of Applied Sciences",..: 1 2 3
$ deg_maj_1 : Factor w/ 3 levels "CISCO CCNA PREPARATION",..: 3 2 1
$ deg_cip_1 : Factor w/ 2 levels "111003","520299": 1 2 1
$ deg_maj_2 : Factor w/ 1 level "NA": 1 1 1
$ deg_cip_2 : Factor w/ 1 level "NA": 1 1 1
$ deg_maj_3 : Factor w/ 1 level "NA": 1 1 1
$ deg_cip_3 : Factor w/ 1 level "NA": 1 1 1
$ deg_maj_4 : Factor w/ 1 level "NA": 1 1 1
$ deg_cip_4 : Factor w/ 1 level "NA": 1 1 1
有人可以帮忙吗?我束手无策
编辑添加: 期望的输出(是的,我知道它是 loooooooooooong 但它是必需的)
df_wide <- data.frame(id=c('1'), grad_date=c('20160501'), inst_name_1=c('community college 1'), inst_state_1=c('CA'), level_1=c('Associate of Applied Sciences'), deg_maj_1_1=c('NETWORK SECURITY'), deg_cip_1_1=c('111003'), deg_maj_2_1=c('NA'), deg_cip_2_1=c('NA'), deg_maj_3_1=c('NA'), deg_cip_3_1=c('NA'), deg_maj_4_1=c('NA'), deg_cip_4_1=c('NA'), inst_name_2=c('univ 1'), inst_state_2=c('CA'), level_2=c('Bachelors of Applied Sciences'), deg_maj_1_2=c('INFO ASSUR CYBR-SECURITY'), deg_cip_1_2=c('520299'), deg_maj_2_2=c('NA'), deg_cip_2_2=c('NA'), deg_maj_3_2=c('NA'), deg_cip_3_2=c('NA'), deg_maj_4_2=c('NA'), deg_cip_4_2=c('NA'), inst_name_3=c('univ 2'), inst_state_3=c('CA'), level_3=c('Masters of Applied Sciences'), deg_maj_1_2=c('CISCO CCNA PREPARATION'), deg_cip_1_3=c('111003'), deg_maj_2_3=c('NA'), deg_cip_2_3=c('NA'), deg_maj_3_3=c('NA'), deg_cip_3_3=c('NA'), deg_maj_4_3=c('NA'), deg_cip_4_3=c('NA'))
如果你不依赖 dcast()
,基础 R 的 reshape()
可以让你到达你想要的地方。
reshape(df, idvar="id", timevar = "order", direction="wide")
屈服
id grad_date.1 inst_name.1 inst_state.1 level.1
1 1 20160501 community college 1 CA Associate of Applied Sciences
deg_maj_1.1 deg_cip_1.1 deg_maj_2.1 deg_cip_2.1 deg_maj_3.1 deg_cip_3.1
1 NETWORK SECURITY 111003 NA NA NA NA
deg_maj_4.1 deg_cip_4.1 grad_date.2 inst_name.2 inst_state.2
1 NA NA 20170524 univ 1 CA
level.2 deg_maj_1.2 deg_cip_1.2 deg_maj_2.2
1 Bachelors of Applied Sciences INFO ASSUR CYBR-SECURITY 520299 NA
deg_cip_2.2 deg_maj_3.2 deg_cip_3.2 deg_maj_4.2 deg_cip_4.2 grad_date.3 inst_name.3
1 NA NA NA NA NA 20180524 univ 2
inst_state.3 level.3 deg_maj_1.3 deg_cip_1.3
1 CA Masters of Applied Sciences CISCO CCNA PREPARATION 111003
deg_maj_2.3 deg_cip_2.3 deg_maj_3.3 deg_cip_3.3 deg_maj_4.3 deg_cip_4.3
1 NA NA NA NA NA NA
为了完整起见,dcast()
的 data.table
版本能够同时重塑多个值列:
library(data.table)
dcast(setDT(df), id ~ order, value.var = tail(names(df), -3L))
id inst_name_1 inst_name_2 inst_name_3 inst_state_1 inst_state_2 inst_state_3 level_1 1: 1 community college 1 univ 1 univ 2 CA CA CA Associate of Applied Sciences level_2 level_3 deg_maj_1_1 deg_maj_1_2 1: Bachelors of Applied Sciences Masters of Applied Sciences NETWORK SECURITY INFO ASSUR CYBR-SECURITY deg_maj_1_3 deg_cip_1_1 deg_cip_1_2 deg_cip_1_3 deg_maj_2_1 deg_maj_2_2 deg_maj_2_3 deg_cip_2_1 deg_cip_2_2 1: CISCO CCNA PREPARATION 111003 520299 111003 NA NA NA NA NA deg_cip_2_3 deg_maj_3_1 deg_maj_3_2 deg_maj_3_3 deg_cip_3_1 deg_cip_3_2 deg_cip_3_3 deg_maj_4_1 deg_maj_4_2 deg_maj_4_3 1: NA NA NA NA NA NA NA NA NA NA deg_cip_4_1 deg_cip_4_2 deg_cip_4_3 1: NA NA NA