按 id 重塑并保持其他变量固定
Reshape by id and keep other variables fixed
我想做的是重新排序数据在列中的数据,但保留其余变量
c1<- c("ID","Location", "Year","Gender", "MoneySpent", "MoneyWithCreditCard")
c2<- c(1,"EEUU",2007,"M",1500,400)
c3<- c(1,"EEUU",2008,"M",3900,0)
c4<- c(1,"EEUU",2009,"M",0,100)
c5<- c(2,"Germany",2007,"F",0,1000)
c6<- c(2,"Germany",2008,"F",4000,500)
c7<- c(2,"Germany",2009,"F",700,0)
c8<- c(2,"Germany",2010,"F",0,50)
Df<-data.frame(rbind(c2,c3,c4,c5,c6,c7,c8))
colnames(Df)<-c1
# ID Location Year Gender MoneySpent MoneyWithCreditCard TypeofHome
#c2 1 EEUU 2007 M 1500 400 House
#c3 1 EEUU 2008 M 3900 0 House
#c4 1 EEUU 2009 M 0 100 House
#c5 2 Germany 2007 F 0 1000 Department
#c6 2 Germany 2008 F 4000 500 Department
#c7 2 Germany 2009 F 700 0 Department
#c8 2 Germany 2010 F 0 50 Department
我需要的结果是这个:
# ID Location Gender TypeofHome MS.2007 MS.2008 MS.2009 MS.2010 MWC.2007 MWC.2008 MWC.2009 MWC.2010
# 1 EEUU M House 1500 3900 0 NA 400 0 100 NA
# 2 Germany F Department 0 4000 700 0 1000 500 0 50
哪个是更好的解决方案?谢谢顺便说一句!
这里是 pivot_longer
和 pivot_wider
的选项。我们首先在 'Money' 列上使用 pivot_longer
重塑为 'long' 格式,通过根据列名附加 'MC' 或 'MWC' 来更改 'Year' ,并执行 pivot_wider
到 'wide' 格式
library(dplyr)
library(tidyr)
library(stringr)
Df %>%
pivot_longer(cols = starts_with("Money")) %>%
mutate(Year = case_when(name == "MoneySpent" ~ str_c("MS.", Year),
TRUE ~ str_c("MWC.", Year))) %>%
select(-name) %>%
pivot_wider(names_from = Year, values_from = value)
#ID Location Gender TypeofHome MS.2007 MWC.2007 MS.2008 MWC.2008 MS.2009 MWC.2009 MS.2010 MWC.2010
#1 1 EEUU M House 1500 400 3900 0 0 100 NA NA
#2 2 Germany F Department 0 1000 4000 500 700 0 0 50
或使用rename_at
Df %>%
rename_at(vars(matches("Money")), ~ str_remove_all(., "[a-z]+")) %>%
pivot_wider(names_from = Year, values_from = starts_with("M"))
# ID Location Gender MS_2007 MS_2008 MS_2009 MS_2010 MWCC_2007 MWCC_2008 MWCC_2009 MWCC_2010
#1 1 EEUU M 1500 3900 0 <NA> 400 0 100 <NA>
#2 2 Germany F 0 4000 700 0 1000 500 0 50
这将重命名原始数据中的列,因此只需要一个数据透视表:
library(dplyr)
library(tidyr)
Df %>%
rename(MS = MoneySpent, MWC = CreditCard) %>%
pivot_wider(names_from = c("Year"),
values_from = c("MS", "MWC"))
# # A tibble: 2 x 11
# ID Location Gender MS_2007 MS_2008 MS_2009 MS_2010 MWC_2007 MWC_2008 MWC_2009 MWC_2010
# <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct>
# 1 1 EEUU M 1500 3900 0 NA 400 0 100 NA
# 2 2 Germany F 0 4000 700 0 1000 500 0 50
您可能想尝试 reshape
来自 base R.
reshape(setNames(Df, c(names(Df)[1:4], "MS", "MWS", "TypeOfHome")),
idvar=c("ID", "Location", "Gender", "TypeOfHome"),
timevar="Year", direction="wide")
# ID Location Gender TypeOfHome MS.2007 MWS.2007 MS.2008 MWS.2008 MS.2009 MWS.2009 MS.2010 MWS.2010
# 1 1 EEUU M House 1500 400 3900 0 0 100 <NA> <NA>
# 4 2 Germany F Appartment 0 1000 4000 500 700 0 0 50
我想做的是重新排序数据在列中的数据,但保留其余变量
c1<- c("ID","Location", "Year","Gender", "MoneySpent", "MoneyWithCreditCard")
c2<- c(1,"EEUU",2007,"M",1500,400)
c3<- c(1,"EEUU",2008,"M",3900,0)
c4<- c(1,"EEUU",2009,"M",0,100)
c5<- c(2,"Germany",2007,"F",0,1000)
c6<- c(2,"Germany",2008,"F",4000,500)
c7<- c(2,"Germany",2009,"F",700,0)
c8<- c(2,"Germany",2010,"F",0,50)
Df<-data.frame(rbind(c2,c3,c4,c5,c6,c7,c8))
colnames(Df)<-c1
# ID Location Year Gender MoneySpent MoneyWithCreditCard TypeofHome
#c2 1 EEUU 2007 M 1500 400 House
#c3 1 EEUU 2008 M 3900 0 House
#c4 1 EEUU 2009 M 0 100 House
#c5 2 Germany 2007 F 0 1000 Department
#c6 2 Germany 2008 F 4000 500 Department
#c7 2 Germany 2009 F 700 0 Department
#c8 2 Germany 2010 F 0 50 Department
我需要的结果是这个:
# ID Location Gender TypeofHome MS.2007 MS.2008 MS.2009 MS.2010 MWC.2007 MWC.2008 MWC.2009 MWC.2010
# 1 EEUU M House 1500 3900 0 NA 400 0 100 NA
# 2 Germany F Department 0 4000 700 0 1000 500 0 50
哪个是更好的解决方案?谢谢顺便说一句!
这里是 pivot_longer
和 pivot_wider
的选项。我们首先在 'Money' 列上使用 pivot_longer
重塑为 'long' 格式,通过根据列名附加 'MC' 或 'MWC' 来更改 'Year' ,并执行 pivot_wider
到 'wide' 格式
library(dplyr)
library(tidyr)
library(stringr)
Df %>%
pivot_longer(cols = starts_with("Money")) %>%
mutate(Year = case_when(name == "MoneySpent" ~ str_c("MS.", Year),
TRUE ~ str_c("MWC.", Year))) %>%
select(-name) %>%
pivot_wider(names_from = Year, values_from = value)
#ID Location Gender TypeofHome MS.2007 MWC.2007 MS.2008 MWC.2008 MS.2009 MWC.2009 MS.2010 MWC.2010
#1 1 EEUU M House 1500 400 3900 0 0 100 NA NA
#2 2 Germany F Department 0 1000 4000 500 700 0 0 50
或使用rename_at
Df %>%
rename_at(vars(matches("Money")), ~ str_remove_all(., "[a-z]+")) %>%
pivot_wider(names_from = Year, values_from = starts_with("M"))
# ID Location Gender MS_2007 MS_2008 MS_2009 MS_2010 MWCC_2007 MWCC_2008 MWCC_2009 MWCC_2010
#1 1 EEUU M 1500 3900 0 <NA> 400 0 100 <NA>
#2 2 Germany F 0 4000 700 0 1000 500 0 50
这将重命名原始数据中的列,因此只需要一个数据透视表:
library(dplyr)
library(tidyr)
Df %>%
rename(MS = MoneySpent, MWC = CreditCard) %>%
pivot_wider(names_from = c("Year"),
values_from = c("MS", "MWC"))
# # A tibble: 2 x 11
# ID Location Gender MS_2007 MS_2008 MS_2009 MS_2010 MWC_2007 MWC_2008 MWC_2009 MWC_2010
# <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct>
# 1 1 EEUU M 1500 3900 0 NA 400 0 100 NA
# 2 2 Germany F 0 4000 700 0 1000 500 0 50
您可能想尝试 reshape
来自 base R.
reshape(setNames(Df, c(names(Df)[1:4], "MS", "MWS", "TypeOfHome")),
idvar=c("ID", "Location", "Gender", "TypeOfHome"),
timevar="Year", direction="wide")
# ID Location Gender TypeOfHome MS.2007 MWS.2007 MS.2008 MWS.2008 MS.2009 MWS.2009 MS.2010 MWS.2010
# 1 1 EEUU M House 1500 400 3900 0 0 100 <NA> <NA>
# 4 2 Germany F Appartment 0 1000 4000 500 700 0 0 50