重命名 R 中的相关变量组
Renaming group of related variables in R
注意:我不确定这个问题的标题是什么好。我很乐意修复它。
假设我有这个测试数据框:
library(tidyverse)
test.dat <- tibble(
"User1" = c("Aaron","Aaron","Charlie"),
"User2" = c("Ben","Ben","Aaron"),
"User3" = c("Charlie","Charlie","Ben"),
"first_1" = c("A","A","C"),
"first_2" = c("B","B","A"),
"first_3" = c("C","C","B"),
"second_1" = c("A","A","C"),
"second_2" = c("B","B","A"),
"second_3" = c("C","C","B"),
"third_1" = c("A","A","C"),
"third_2" = c("B","B","A"),
"third_3" = c("C","C","B")
)
# A tibble: 3 x 12
User1 User2 User3 first_1 first_2 first_3 second_1 second_2 second_3 third_1 third_2 third_3
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 Aaron Ben Charlie A B C A B C A B C
2 Aaron Ben Charlie A B C A B C A B C
3 Charlie Aaron Ben C A B C A B C A B
我希望 User1 和带有 _1
的变量始终指代 Aaron,User2 始终指代 Ben,并且带有 _2
的变量等等。
我能做的是通过创建临时变量 Loc
来定位每个名字在 User1:User3 中的位置,然后使用 case_when
:
test.try <- test.dat %>%
rowwise() %>%
mutate(U1Loc = case_when(User1 == "Aaron" ~ 1,
User2 == "Aaron" ~ 2,
User3 == "Aaron" ~ 3),
U2Loc = case_when(User1 == "Ben" ~ 1,
User2 == "Ben" ~ 2,
User3 == "Ben" ~ 3),
U3Loc = case_when(User1 == "Charlie" ~ 1,
User2 == "Charlie" ~ 2,
User3 == "Charlie" ~ 3)) %>%
mutate(newUser_1 = case_when(U1Loc == 1 ~ User1,
U1Loc == 2 ~ User2,
U1Loc == 3 ~ User3),
newUser_2 = case_when(U2Loc == 1 ~ User1,
U2Loc == 2 ~ User2,
U2Loc == 3 ~ User3),
newUser_3 = case_when(U3Loc == 1 ~ User1,
U3Loc == 2 ~ User2,
U3Loc == 3 ~ User3)) %>%
mutate(newFirst_1 = case_when(U1Loc == 1 ~ first_1,
U1Loc == 2 ~ first_2,
U1Loc == 3 ~ first_3),
newFirst_2 = case_when(U2Loc == 1 ~ first_1,
U2Loc == 2 ~ first_2,
U2Loc == 3 ~ first_3),
newFirst_3 = case_when(U3Loc == 1 ~ first_1,
U3Loc == 2 ~ first_2,
U3Loc == 3 ~ first_3)) %>%
mutate(newSecond_1 = case_when(U1Loc == 1 ~ second_1,
U1Loc == 2 ~ second_2,
U1Loc == 3 ~ second_3),
newSecond_2 = case_when(U2Loc == 1 ~ second_1,
U2Loc == 2 ~ second_2,
U2Loc == 3 ~ second_3),
newSecond_3 = case_when(U3Loc == 1 ~ second_1,
U3Loc == 2 ~ second_2,
U3Loc == 3 ~ second_3)) %>%
select(starts_with("new"))
得到
> test.try
# A tibble: 3 x 9
# Rowwise:
newUser_1 newUser_2 newUser_3 newFirst_1 newFirst_2 newFirst_3 newSecond_1 newSecond_2 newSecond_3
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 Aaron Ben Charlie A B C A B C
2 Aaron Ben Charlie A B C A B C
3 Aaron Ben Charlie A B C A B C
但是,变数越多,越繁琐。除了 for 循环之外,有没有办法实现这一点,最好是在 tidy 方法中?我的猜测是使用 across()
但我似乎无法像我想象的那样让它工作。
我们可以通过旋转到 'long' 格式轻松做到这一点,然后在替换值后重新整形回宽
library(dplyr)
library(tidyr)
out <- test.dat %>%
mutate(rn = row_number()) %>%
pivot_longer(cols = -rn, names_to = c('.value', 'grp'),
names_sep = "(?<=[a-z])_?(?=[0-9])") %>%
group_by(grp) %>%
mutate(across(User:third, first)) %>%
pivot_wider(names_from = grp, values_from = c(User, first, second, third)) %>%
select(-rn)
-输出
out
# A tibble: 3 x 12
# User_1 User_2 User_3 first_1 first_2 first_3 second_1 second_2 second_3 third_1 third_2 third_3
# <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#1 Aaron Ben Charlie A B C A B C A B C
#2 Aaron Ben Charlie A B C A B C A B C
#3 Aaron Ben Charlie A B C A B C A B C
如果我们想重命名以new
为前缀的列
library(stringr)
out %>%
rename_all(~ str_c('new', .))
或者另一种选择是创建一个键值数据集,以便在列中的值不按顺序时进行匹配和替换
keydat <- tibble(grp = as.character(1:3), UserKey = c("Aaron", "Ben", "Charlie"),
abbr = substr(UserKey, 1, 1))
test.dat %>%
mutate(rn = row_number()) %>%
pivot_longer(cols = -rn, names_to = c('.value', 'grp'),
names_sep = "(?<=[a-z])_?(?=[0-9])") %>%
left_join(keydat) %>%
mutate(User = UserKey) %>%
mutate(across(first:third, ~ abbr)) %>%
select(-UserKey, -abbr) %>%
pivot_wider(names_from = grp, values_from = c(User, first, second, third)) %>%
select(-rn)
# A tibble: 3 x 12
# User_1 User_2 User_3 first_1 first_2 first_3 second_1 second_2 second_3 third_1 third_2 third_3
# <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#1 Aaron Ben Charlie A B C A B C A B C
#2 Aaron Ben Charlie A B C A B C A B C
#3 Aaron Ben Charlie A B C A B C A B C
注意:我不确定这个问题的标题是什么好。我很乐意修复它。
假设我有这个测试数据框:
library(tidyverse)
test.dat <- tibble(
"User1" = c("Aaron","Aaron","Charlie"),
"User2" = c("Ben","Ben","Aaron"),
"User3" = c("Charlie","Charlie","Ben"),
"first_1" = c("A","A","C"),
"first_2" = c("B","B","A"),
"first_3" = c("C","C","B"),
"second_1" = c("A","A","C"),
"second_2" = c("B","B","A"),
"second_3" = c("C","C","B"),
"third_1" = c("A","A","C"),
"third_2" = c("B","B","A"),
"third_3" = c("C","C","B")
)
# A tibble: 3 x 12
User1 User2 User3 first_1 first_2 first_3 second_1 second_2 second_3 third_1 third_2 third_3
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 Aaron Ben Charlie A B C A B C A B C
2 Aaron Ben Charlie A B C A B C A B C
3 Charlie Aaron Ben C A B C A B C A B
我希望 User1 和带有 _1
的变量始终指代 Aaron,User2 始终指代 Ben,并且带有 _2
的变量等等。
我能做的是通过创建临时变量 Loc
来定位每个名字在 User1:User3 中的位置,然后使用 case_when
:
test.try <- test.dat %>%
rowwise() %>%
mutate(U1Loc = case_when(User1 == "Aaron" ~ 1,
User2 == "Aaron" ~ 2,
User3 == "Aaron" ~ 3),
U2Loc = case_when(User1 == "Ben" ~ 1,
User2 == "Ben" ~ 2,
User3 == "Ben" ~ 3),
U3Loc = case_when(User1 == "Charlie" ~ 1,
User2 == "Charlie" ~ 2,
User3 == "Charlie" ~ 3)) %>%
mutate(newUser_1 = case_when(U1Loc == 1 ~ User1,
U1Loc == 2 ~ User2,
U1Loc == 3 ~ User3),
newUser_2 = case_when(U2Loc == 1 ~ User1,
U2Loc == 2 ~ User2,
U2Loc == 3 ~ User3),
newUser_3 = case_when(U3Loc == 1 ~ User1,
U3Loc == 2 ~ User2,
U3Loc == 3 ~ User3)) %>%
mutate(newFirst_1 = case_when(U1Loc == 1 ~ first_1,
U1Loc == 2 ~ first_2,
U1Loc == 3 ~ first_3),
newFirst_2 = case_when(U2Loc == 1 ~ first_1,
U2Loc == 2 ~ first_2,
U2Loc == 3 ~ first_3),
newFirst_3 = case_when(U3Loc == 1 ~ first_1,
U3Loc == 2 ~ first_2,
U3Loc == 3 ~ first_3)) %>%
mutate(newSecond_1 = case_when(U1Loc == 1 ~ second_1,
U1Loc == 2 ~ second_2,
U1Loc == 3 ~ second_3),
newSecond_2 = case_when(U2Loc == 1 ~ second_1,
U2Loc == 2 ~ second_2,
U2Loc == 3 ~ second_3),
newSecond_3 = case_when(U3Loc == 1 ~ second_1,
U3Loc == 2 ~ second_2,
U3Loc == 3 ~ second_3)) %>%
select(starts_with("new"))
得到
> test.try
# A tibble: 3 x 9
# Rowwise:
newUser_1 newUser_2 newUser_3 newFirst_1 newFirst_2 newFirst_3 newSecond_1 newSecond_2 newSecond_3
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 Aaron Ben Charlie A B C A B C
2 Aaron Ben Charlie A B C A B C
3 Aaron Ben Charlie A B C A B C
但是,变数越多,越繁琐。除了 for 循环之外,有没有办法实现这一点,最好是在 tidy 方法中?我的猜测是使用 across()
但我似乎无法像我想象的那样让它工作。
我们可以通过旋转到 'long' 格式轻松做到这一点,然后在替换值后重新整形回宽
library(dplyr)
library(tidyr)
out <- test.dat %>%
mutate(rn = row_number()) %>%
pivot_longer(cols = -rn, names_to = c('.value', 'grp'),
names_sep = "(?<=[a-z])_?(?=[0-9])") %>%
group_by(grp) %>%
mutate(across(User:third, first)) %>%
pivot_wider(names_from = grp, values_from = c(User, first, second, third)) %>%
select(-rn)
-输出
out
# A tibble: 3 x 12
# User_1 User_2 User_3 first_1 first_2 first_3 second_1 second_2 second_3 third_1 third_2 third_3
# <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#1 Aaron Ben Charlie A B C A B C A B C
#2 Aaron Ben Charlie A B C A B C A B C
#3 Aaron Ben Charlie A B C A B C A B C
如果我们想重命名以new
为前缀的列
library(stringr)
out %>%
rename_all(~ str_c('new', .))
或者另一种选择是创建一个键值数据集,以便在列中的值不按顺序时进行匹配和替换
keydat <- tibble(grp = as.character(1:3), UserKey = c("Aaron", "Ben", "Charlie"),
abbr = substr(UserKey, 1, 1))
test.dat %>%
mutate(rn = row_number()) %>%
pivot_longer(cols = -rn, names_to = c('.value', 'grp'),
names_sep = "(?<=[a-z])_?(?=[0-9])") %>%
left_join(keydat) %>%
mutate(User = UserKey) %>%
mutate(across(first:third, ~ abbr)) %>%
select(-UserKey, -abbr) %>%
pivot_wider(names_from = grp, values_from = c(User, first, second, third)) %>%
select(-rn)
# A tibble: 3 x 12
# User_1 User_2 User_3 first_1 first_2 first_3 second_1 second_2 second_3 third_1 third_2 third_3
# <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#1 Aaron Ben Charlie A B C A B C A B C
#2 Aaron Ben Charlie A B C A B C A B C
#3 Aaron Ben Charlie A B C A B C A B C