有选择地应用 pivot_wider() 函数
Selectively Applying pivot_wider() Function
背景:我们要求每个参与者识别多种情绪,然后收集每种情绪的数据,这样他们识别出的第一种情绪就有一个列,其次,等等,然后是针对每种情绪的每个后续问题的单独列。在宽格式下,它看起来像这样:
rows <- 1:4
cols <- c("PID", "Stage", "Emo1_", "Emo2_",
"Emo1_Intense", "Emo2_Intense",
"Emo1_Desc", "Emo2_Desc", "Keyword")
df <- data.frame(matrix(NA,
nrow = length(rows),
ncol = length(cols),
dimnames = list(rows, cols)))
df$PID <- c("A-001", "A-002", "A-003", "A-004")
df$Stage <- c("Beginning", "End", "Middle", "Middle")
df$Emo1_ <- c("Fear", "Sadness", "Happy", "Anger")
df$Emo2_ <- c("Content", "Depressed", "Lost", "Sad")
df$Emo1_Intense <- 5:8
df$Emo2_Intense <- 1:4
df$Emo1_Desc <- c("E", "F", "G", "H")
df$Emo2_Desc <- c("A", "B", "C", "D")
df$Keyword <- c("Bus", "Ceiling", "Chainsaw", "Floor")
# PID Stage Emo1_ Emo2_ Emo1_Intense Emo2_Intense Emo1_Desc Emo2_Desc Keyword
#1 A-001 Beginning Fear Content 5 1 E A Bus
#2 A-002 End Sadness Depressed 6 2 F B Ceiling
#3 A-003 Middle Happy Lost 7 3 G C Chainsaw
#4 A-004 Middle Anger Sad 8 4 H D Floor
问题:我脑袋放屁,不知道如何将此数据框转换为以下格式,其中我们有单列,每个列都捕获:1.)一种情绪被命名为哪个位置,2.) 哪种情绪被命名,3.) 每种情绪的每个后续问题:
rows <- 1:8
cols <- c("PID", "Stage", "Number", "Emo", "Intense", "Desc", "Keyword")
df <- data.frame(matrix(NA,
nrow = length(rows),
ncol = length(cols),
dimnames = list(rows, cols)))
df$PID <- sort(rep(c("A-001", "A-002", "A-003", "A-004"), 2))
df$Stage <- sort(rep(c("Beginning", "Middle", "Middle", "End"), 2))
df$Number <- rep(1:2, 4)
df$Emo <- c("Fear", "Content", "Sadness", "Depressed", "Happy", "Lost", "Anger", "Sad")
df$Intense <- c(5,1,6,2,7,3,4,8)
df$Desc <- c("E", "A", "F", "B", "G", "C", "H", "D")
df$Keyword <- rep(c("Bus", "Ceiling", "Chainsaw", "Floor"),2)
# PID Stage Number Emo Intense Desc Keyword
#1 A-001 Beginning 1 Fear 5 E Bus
#2 A-001 Beginning 2 Content 1 A Ceiling
#3 A-002 End 1 Sadness 6 F Chainsaw
#4 A-002 End 2 Depressed 2 B Floor
#5 A-003 Middle 1 Happy 7 G Bus
#6 A-003 Middle 2 Lost 3 C Ceiling
#7 A-004 Middle 1 Anger 4 H Chainsaw
#8 A-004 Middle 2 Sad 8 D Floor
我可以手动执行此操作,但它的数据集比这大得多,而且我知道我已经使用 pivot_longer 一千次来简单地执行此操作;我现在正在努力让它发挥作用。它的列合并要么太保守要么太自由,我很难找到平衡点。
命名约定是任意的。如果以其他方式重新格式化可能会更好,请成为我的客人!
这个解决方案怎么样?
需要更新以“_”结尾的列的名称并对数字列进行一些润色。我相信这可以在一行中完成。
#rename columns that end with _
torename<-grep("(Emo._)$", names(df))
names(df)[torename] <- paste0(names(df)[torename], "Emo")
answer<- pivot_longer(df, cols= starts_with("Emo"), names_to=c( "Number", ".value"),
names_sep = "_", names_repair="unique")
#clean-up the Number column
answer$Number <- gsub("Emo", "", answer$Number)
answer
# A tibble: 8 × 7
PID Stage Keyword Number Emo Intense Desc
<chr> <chr> <chr> <chr> <chr> <int> <chr>
1 A-001 Beginning Bus 1 Fear 5 E
2 A-001 Beginning Bus 2 Content 1 A
3 A-002 End Ceiling 1 Sadness 6 F
4 A-002 End Ceiling 2 Depressed 2 B
5 A-003 Middle Chainsaw 1 Happy 7 G
6 A-003 Middle Chainsaw 2 Lost 3 C
7 A-004 Middle Floor 1 Anger 8 H
8 A-004 Middle Floor 2 Sad 4 D
背景:我们要求每个参与者识别多种情绪,然后收集每种情绪的数据,这样他们识别出的第一种情绪就有一个列,其次,等等,然后是针对每种情绪的每个后续问题的单独列。在宽格式下,它看起来像这样:
rows <- 1:4
cols <- c("PID", "Stage", "Emo1_", "Emo2_",
"Emo1_Intense", "Emo2_Intense",
"Emo1_Desc", "Emo2_Desc", "Keyword")
df <- data.frame(matrix(NA,
nrow = length(rows),
ncol = length(cols),
dimnames = list(rows, cols)))
df$PID <- c("A-001", "A-002", "A-003", "A-004")
df$Stage <- c("Beginning", "End", "Middle", "Middle")
df$Emo1_ <- c("Fear", "Sadness", "Happy", "Anger")
df$Emo2_ <- c("Content", "Depressed", "Lost", "Sad")
df$Emo1_Intense <- 5:8
df$Emo2_Intense <- 1:4
df$Emo1_Desc <- c("E", "F", "G", "H")
df$Emo2_Desc <- c("A", "B", "C", "D")
df$Keyword <- c("Bus", "Ceiling", "Chainsaw", "Floor")
# PID Stage Emo1_ Emo2_ Emo1_Intense Emo2_Intense Emo1_Desc Emo2_Desc Keyword
#1 A-001 Beginning Fear Content 5 1 E A Bus
#2 A-002 End Sadness Depressed 6 2 F B Ceiling
#3 A-003 Middle Happy Lost 7 3 G C Chainsaw
#4 A-004 Middle Anger Sad 8 4 H D Floor
问题:我脑袋放屁,不知道如何将此数据框转换为以下格式,其中我们有单列,每个列都捕获:1.)一种情绪被命名为哪个位置,2.) 哪种情绪被命名,3.) 每种情绪的每个后续问题:
rows <- 1:8
cols <- c("PID", "Stage", "Number", "Emo", "Intense", "Desc", "Keyword")
df <- data.frame(matrix(NA,
nrow = length(rows),
ncol = length(cols),
dimnames = list(rows, cols)))
df$PID <- sort(rep(c("A-001", "A-002", "A-003", "A-004"), 2))
df$Stage <- sort(rep(c("Beginning", "Middle", "Middle", "End"), 2))
df$Number <- rep(1:2, 4)
df$Emo <- c("Fear", "Content", "Sadness", "Depressed", "Happy", "Lost", "Anger", "Sad")
df$Intense <- c(5,1,6,2,7,3,4,8)
df$Desc <- c("E", "A", "F", "B", "G", "C", "H", "D")
df$Keyword <- rep(c("Bus", "Ceiling", "Chainsaw", "Floor"),2)
# PID Stage Number Emo Intense Desc Keyword
#1 A-001 Beginning 1 Fear 5 E Bus
#2 A-001 Beginning 2 Content 1 A Ceiling
#3 A-002 End 1 Sadness 6 F Chainsaw
#4 A-002 End 2 Depressed 2 B Floor
#5 A-003 Middle 1 Happy 7 G Bus
#6 A-003 Middle 2 Lost 3 C Ceiling
#7 A-004 Middle 1 Anger 4 H Chainsaw
#8 A-004 Middle 2 Sad 8 D Floor
我可以手动执行此操作,但它的数据集比这大得多,而且我知道我已经使用 pivot_longer 一千次来简单地执行此操作;我现在正在努力让它发挥作用。它的列合并要么太保守要么太自由,我很难找到平衡点。
命名约定是任意的。如果以其他方式重新格式化可能会更好,请成为我的客人!
这个解决方案怎么样?
需要更新以“_”结尾的列的名称并对数字列进行一些润色。我相信这可以在一行中完成。
#rename columns that end with _
torename<-grep("(Emo._)$", names(df))
names(df)[torename] <- paste0(names(df)[torename], "Emo")
answer<- pivot_longer(df, cols= starts_with("Emo"), names_to=c( "Number", ".value"),
names_sep = "_", names_repair="unique")
#clean-up the Number column
answer$Number <- gsub("Emo", "", answer$Number)
answer
# A tibble: 8 × 7
PID Stage Keyword Number Emo Intense Desc
<chr> <chr> <chr> <chr> <chr> <int> <chr>
1 A-001 Beginning Bus 1 Fear 5 E
2 A-001 Beginning Bus 2 Content 1 A
3 A-002 End Ceiling 1 Sadness 6 F
4 A-002 End Ceiling 2 Depressed 2 B
5 A-003 Middle Chainsaw 1 Happy 7 G
6 A-003 Middle Chainsaw 2 Lost 3 C
7 A-004 Middle Floor 1 Anger 8 H
8 A-004 Middle Floor 2 Sad 4 D