替换 dplyr 管道中的重复元素
Replace duplicate elements in dplyr pipe
我想替换组中的重复元素
df <- data.frame(A=c("a", "a", "a", "b", "b", "c"), group = c(1, 1, 2, 2, 2, 3))
我想保留组的第一个元素,同时用 NA 替换其他任何元素。类似于:
df <- df %>%
group_by(group) %>%
mutate(B = first(A))
这不是我想要的。我想要的是 B <- c(a, NA, a, NA, NA, c)
将replace
与duplicated
一起使用:
df %>% group_by(group) %>% mutate(B = replace(A, duplicated(A), NA))
# A tibble: 6 x 2
# Groups: group [3]
# A group
# <fctr> <dbl>
#1 a 1
#2 NA 1
#3 a 2
#4 b 2
#5 NA 2
#6 c 3
或者如果只保留第一个元素:
df %>%
group_by(group) %>%
mutate(B = ifelse(row_number() == 1, as.character(A), NA))
# A tibble: 6 x 2
# Groups: group [3]
# A group
# <chr> <dbl>
#1 a 1
#2 <NA> 1
#3 a 2
#4 <NA> 2
#5 <NA> 2
#6 c 3
或使用 replace
:
df %>%
group_by(group) %>%
mutate(B = replace(A, row_number() > 1, NA))
# A tibble: 6 x 2
# Groups: group [3]
# A group
# <fctr> <dbl>
#1 a 1
#2 NA 1
#3 a 2
#4 NA 2
#5 NA 2
#6 c 3
在 data.table
你可以这样做:
library(data.table)
setDT(df)[, B := c(A[1], rep(NA, .N - 1)), by = group]
或dplyr
中的相同逻辑:
library(dplyr)
df %>% group_by(group) %>% mutate(B = c(as.character(A[1]), rep(NA, n() - 1)))
# A tibble: 6 x 3
# Groups: group [3]
# A group B
# <fctr> <dbl> <chr>
#1 a 1 a
#2 a 1 <NA>
#3 a 2 a
#4 b 2 <NA>
#5 b 2 <NA>
#6 c 3 c
我想替换组中的重复元素
df <- data.frame(A=c("a", "a", "a", "b", "b", "c"), group = c(1, 1, 2, 2, 2, 3))
我想保留组的第一个元素,同时用 NA 替换其他任何元素。类似于:
df <- df %>%
group_by(group) %>%
mutate(B = first(A))
这不是我想要的。我想要的是 B <- c(a, NA, a, NA, NA, c)
将replace
与duplicated
一起使用:
df %>% group_by(group) %>% mutate(B = replace(A, duplicated(A), NA))
# A tibble: 6 x 2
# Groups: group [3]
# A group
# <fctr> <dbl>
#1 a 1
#2 NA 1
#3 a 2
#4 b 2
#5 NA 2
#6 c 3
或者如果只保留第一个元素:
df %>%
group_by(group) %>%
mutate(B = ifelse(row_number() == 1, as.character(A), NA))
# A tibble: 6 x 2
# Groups: group [3]
# A group
# <chr> <dbl>
#1 a 1
#2 <NA> 1
#3 a 2
#4 <NA> 2
#5 <NA> 2
#6 c 3
或使用 replace
:
df %>%
group_by(group) %>%
mutate(B = replace(A, row_number() > 1, NA))
# A tibble: 6 x 2
# Groups: group [3]
# A group
# <fctr> <dbl>
#1 a 1
#2 NA 1
#3 a 2
#4 NA 2
#5 NA 2
#6 c 3
在 data.table
你可以这样做:
library(data.table)
setDT(df)[, B := c(A[1], rep(NA, .N - 1)), by = group]
或dplyr
中的相同逻辑:
library(dplyr)
df %>% group_by(group) %>% mutate(B = c(as.character(A[1]), rep(NA, n() - 1)))
# A tibble: 6 x 3
# Groups: group [3]
# A group B
# <fctr> <dbl> <chr>
#1 a 1 a
#2 a 1 <NA>
#3 a 2 a
#4 b 2 <NA>
#5 b 2 <NA>
#6 c 3 c