dplyr 通过比较变量和不同大小的向量来改变变量
dplyr mutate a variable by comparing a variable and vectors of different sizes
我有以下类型的数据框
df <- tibble::tribble(~x,
c("A", "B"),
c("A", "B", "C"),
c("A", "B", "C", "D"),
c("A", "B"))
和像这样的载体
vec1 <- c("A", "B")
vec2 <- c("A", "B", "C")
vec3 <- c("A", "B", "C", "D")
我想改变一个变量 y 来显示哪一行有哪个向量。我尝试了以下操作,但得到了带有警告的空 y 变量:"longer object length is not a multiple of shorter object length"
df_new <- df %>%
mutate(y = case_when(x == vec1 ~ "vec1",
x == vec2 ~ "vec2",
x == vec2 ~ "vec3"))
期望的输出是
df_new <- tibble::tribble(~x, ~y,
c("A", "B"), "vec1",
c("A", "B", "C"), "vec2",
c("A", "B", "C", "D"), "vec3",
c("A", "B"), "vec1")
使用 map2_lgl
和 identical
评估向量是否相同的解决方案。
library(tidyverse)
df_new <- df %>%
mutate(y = case_when(
map2_lgl(x, list(vec1), ~identical(.x, .y)) ~"vec1",
map2_lgl(x, list(vec2), ~identical(.x, .y)) ~"vec2",
map2_lgl(x, list(vec3), ~identical(.x, .y)) ~"vec3"
))
df_new
# # A tibble: 4 x 2
# x y
# <list> <chr>
# 1 <chr [2]> vec1
# 2 <chr [3]> vec2
# 3 <chr [4]> vec3
# 4 <chr [2]> vec1
这是一个更具编程性的替代方案 - 您不需要明确指定每个向量
数据
df <- tibble::tribble(~x,
c("A", "B"),
c("A", "B", "C"),
c("A", "B", "C", "D"),
c("A", "B"))
vec1 <- c("A", "B")
vec2 <- c("A", "B", "C")
vec3 <- c("A", "B", "C", "D")
解决方案 - 利用 ls(...)
到 return 相关矢量名称使用模式
vecs <- ls(pattern="vec")
L <- lapply(vecs, get)
names(L) <- vecs
df %>%
mutate(y = names(L)[match(x, L)])
# A tibble: 4 x 2
# x y
# <list> <chr>
# 1 <chr [2]> vec1
# 2 <chr [3]> vec2
# 3 <chr [4]> vec3
# 4 <chr [2]> vec1
这也行得通:
comp <- list(vec1, vec2, vec3)
df %>%
mutate(y = map_chr(df$x, ~ paste0("vec", which(comp %in% list(.)))))
# A tibble: 4 x 2
x y
<list> <chr>
1 <chr [2]> vec1
2 <chr [3]> vec2
3 <chr [4]> vec3
4 <chr [2]> vec1
一个选项是使用 compare::compareEqual
和 apply
作为:
library(dplyr)
library(compare)
df$y <- apply(df,1, function(x){
dplyr::case_when(
isTRUE(compareEqual(x[[1]], vec1)) ~ "vec1",
isTRUE(compareEqual(x[[1]], vec2)) ~ "vec2",
isTRUE(compareEqual(x[[1]],vec3)) ~ "vec3"
)})
df
# # A tibble: 4 x 2
# x y
# <list> <chr>
# 1 <chr [2]> vec1
# 2 <chr [3]> vec2
# 3 <chr [4]> vec3
# 4 <chr [2]> vec1
很遗憾,您不能对异国情调的格式进行 left_join
,但我们可以欺骗并加入 dput
字符串:
library(tidyverse)
lkp <- enframe(map_chr(lst(vec1,vec2,vec3),~capture.output(dput(.x))))
df %>%
mutate(value = map_chr(x,~capture.output(dput(.x)))) %>%
left_join(lkp) %>%
select(-value)
# # A tibble: 4 x 2
# x name
# <list> <chr>
# 1 <chr [2]> vec1
# 2 <chr [3]> vec2
# 3 <chr [4]> vec3
# 4 <chr [2]> vec1
我有以下类型的数据框
df <- tibble::tribble(~x,
c("A", "B"),
c("A", "B", "C"),
c("A", "B", "C", "D"),
c("A", "B"))
和像这样的载体
vec1 <- c("A", "B")
vec2 <- c("A", "B", "C")
vec3 <- c("A", "B", "C", "D")
我想改变一个变量 y 来显示哪一行有哪个向量。我尝试了以下操作,但得到了带有警告的空 y 变量:"longer object length is not a multiple of shorter object length"
df_new <- df %>%
mutate(y = case_when(x == vec1 ~ "vec1",
x == vec2 ~ "vec2",
x == vec2 ~ "vec3"))
期望的输出是
df_new <- tibble::tribble(~x, ~y,
c("A", "B"), "vec1",
c("A", "B", "C"), "vec2",
c("A", "B", "C", "D"), "vec3",
c("A", "B"), "vec1")
使用 map2_lgl
和 identical
评估向量是否相同的解决方案。
library(tidyverse)
df_new <- df %>%
mutate(y = case_when(
map2_lgl(x, list(vec1), ~identical(.x, .y)) ~"vec1",
map2_lgl(x, list(vec2), ~identical(.x, .y)) ~"vec2",
map2_lgl(x, list(vec3), ~identical(.x, .y)) ~"vec3"
))
df_new
# # A tibble: 4 x 2
# x y
# <list> <chr>
# 1 <chr [2]> vec1
# 2 <chr [3]> vec2
# 3 <chr [4]> vec3
# 4 <chr [2]> vec1
这是一个更具编程性的替代方案 - 您不需要明确指定每个向量
数据
df <- tibble::tribble(~x,
c("A", "B"),
c("A", "B", "C"),
c("A", "B", "C", "D"),
c("A", "B"))
vec1 <- c("A", "B")
vec2 <- c("A", "B", "C")
vec3 <- c("A", "B", "C", "D")
解决方案 - 利用 ls(...)
到 return 相关矢量名称使用模式
vecs <- ls(pattern="vec")
L <- lapply(vecs, get)
names(L) <- vecs
df %>%
mutate(y = names(L)[match(x, L)])
# A tibble: 4 x 2
# x y
# <list> <chr>
# 1 <chr [2]> vec1
# 2 <chr [3]> vec2
# 3 <chr [4]> vec3
# 4 <chr [2]> vec1
这也行得通:
comp <- list(vec1, vec2, vec3)
df %>%
mutate(y = map_chr(df$x, ~ paste0("vec", which(comp %in% list(.)))))
# A tibble: 4 x 2
x y
<list> <chr>
1 <chr [2]> vec1
2 <chr [3]> vec2
3 <chr [4]> vec3
4 <chr [2]> vec1
一个选项是使用 compare::compareEqual
和 apply
作为:
library(dplyr)
library(compare)
df$y <- apply(df,1, function(x){
dplyr::case_when(
isTRUE(compareEqual(x[[1]], vec1)) ~ "vec1",
isTRUE(compareEqual(x[[1]], vec2)) ~ "vec2",
isTRUE(compareEqual(x[[1]],vec3)) ~ "vec3"
)})
df
# # A tibble: 4 x 2
# x y
# <list> <chr>
# 1 <chr [2]> vec1
# 2 <chr [3]> vec2
# 3 <chr [4]> vec3
# 4 <chr [2]> vec1
很遗憾,您不能对异国情调的格式进行 left_join
,但我们可以欺骗并加入 dput
字符串:
library(tidyverse)
lkp <- enframe(map_chr(lst(vec1,vec2,vec3),~capture.output(dput(.x))))
df %>%
mutate(value = map_chr(x,~capture.output(dput(.x)))) %>%
left_join(lkp) %>%
select(-value)
# # A tibble: 4 x 2
# x name
# <list> <chr>
# 1 <chr [2]> vec1
# 2 <chr [3]> vec2
# 3 <chr [4]> vec3
# 4 <chr [2]> vec1