找出 r 中两列元素的差异 b/w
Find the difference b/w two column elements in r
如何将 diff 元素 b/w factor_Nov 和 factor_Jan 放入名为 diff
的新列中
df=data.frame(id=c("1","2","3"),
factor_Nov=c("A|B|C","E","F|H|G"),
factor_Jan=c("B|H|E","E","X|Y|Z"))
输出应该是
df=data.frame(id=c("1","2","3"),
factor_Nov=c("A|B|C","E","F|H|G"),
factor_Jan=c("B|H|E","E","X|Y|Z"),
diff=c("A|C|H|E",NA,"X|Y|Z|F|H|G"))
我试过 setdiff 但没用
一个选项是使用 strsplit
拆分列,使用分隔符 |
,然后使用 Map
获取不是 intersect
、paste
他们 collapse = "|"
df$diff <- unlist(Map(function(x, y) paste(setdiff(union(x, y),
intersect(x, y)), collapse="|"),
strsplit(as.character(df$factor_Nov), "|", fixed = TRUE),
strsplit(as.character(df$factor_Jan), "|", fixed = TRUE)))
与tidyverse
:
library(dplyr)
library(tidyr)
#Code
new <- df %>% left_join(
df %>% separate_rows(c(factor_Nov,factor_Jan)) %>%
pivot_longer(-id) %>%
group_by(id,value) %>%
filter(n() == 1) %>%
ungroup() %>% arrange(id,value) %>%
group_by(id) %>%
summarise(Diff=paste0(value,collapse = '|')))
输出:
id factor_Nov factor_Jan Diff
1 1 A|B|C B|H|E A|C|E|H
2 2 E E <NA>
3 3 F|H|G X|Y|Z F|G|H|X|Y|Z
一个data.table
选项
setDT(df)[
,
diff := do.call(
Map,
c(
function(...) paste0(setdiff(union(...), intersect(...)), collapse = "|"),
unname(lapply(.SD, strsplit, split = "\|"))
)
),
.SDcols = patterns("^factor_")
]
给予
> df
id factor_Nov factor_Jan diff
1: 1 A|B|C B|H|E A|C|H|E
2: 2 E E
3: 3 F|H|G X|Y|Z F|H|G|X|Y|Z
如何将 diff 元素 b/w factor_Nov 和 factor_Jan 放入名为 diff
的新列中 df=data.frame(id=c("1","2","3"),
factor_Nov=c("A|B|C","E","F|H|G"),
factor_Jan=c("B|H|E","E","X|Y|Z"))
输出应该是
df=data.frame(id=c("1","2","3"),
factor_Nov=c("A|B|C","E","F|H|G"),
factor_Jan=c("B|H|E","E","X|Y|Z"),
diff=c("A|C|H|E",NA,"X|Y|Z|F|H|G"))
我试过 setdiff 但没用
一个选项是使用 strsplit
拆分列,使用分隔符 |
,然后使用 Map
获取不是 intersect
、paste
他们 collapse = "|"
df$diff <- unlist(Map(function(x, y) paste(setdiff(union(x, y),
intersect(x, y)), collapse="|"),
strsplit(as.character(df$factor_Nov), "|", fixed = TRUE),
strsplit(as.character(df$factor_Jan), "|", fixed = TRUE)))
与tidyverse
:
library(dplyr)
library(tidyr)
#Code
new <- df %>% left_join(
df %>% separate_rows(c(factor_Nov,factor_Jan)) %>%
pivot_longer(-id) %>%
group_by(id,value) %>%
filter(n() == 1) %>%
ungroup() %>% arrange(id,value) %>%
group_by(id) %>%
summarise(Diff=paste0(value,collapse = '|')))
输出:
id factor_Nov factor_Jan Diff
1 1 A|B|C B|H|E A|C|E|H
2 2 E E <NA>
3 3 F|H|G X|Y|Z F|G|H|X|Y|Z
一个data.table
选项
setDT(df)[
,
diff := do.call(
Map,
c(
function(...) paste0(setdiff(union(...), intersect(...)), collapse = "|"),
unname(lapply(.SD, strsplit, split = "\|"))
)
),
.SDcols = patterns("^factor_")
]
给予
> df
id factor_Nov factor_Jan diff
1: 1 A|B|C B|H|E A|C|H|E
2: 2 E E
3: 3 F|H|G X|Y|Z F|H|G|X|Y|Z