从两列分配唯一 ID,其中值在不同行上的顺序可以相反
assign unique ID from two columns where values can be in inverse order on different rows
我有一个 df,我想在其中为位于两列但位于不同行中并以相反顺序显示的唯一 ID 的组合创建一个唯一 ID。如何做到这一点?感谢您的帮助。
df:
> CALL_NO DUP_OF_CALL_NO
> 171573729 171573731
> 171573729 171573731
> 171630085 171630084
> 171630085 171630084
> 171573731 171573729
> 171573731 171573729
> 171630084 171630085
> 171630084 171630085
> 171573731 171573729
> 171573731 171573729
期望的输出:
> ID CALL_NO DUP_OF_CALL_NO
> 1 171573729 171573731
> 1 171573729 171573731
> 2 171630085 171630084
> 2 171630085 171630084
> 1 171573731 171573729
> 1 171573731 171573729
> 2 171630084 171630085
> 2 171630084 171630085
> 1 171573731 171573729
> 1 171573731 171573729
df$ID=as.numeric(factor(apply(df,1,function(x)toString(sort(x)))))
> df
CALL_NO DUP_OF_CALL_NO ID
1 171573729 171573731 1
2 171573729 171573731 1
3 171630085 171630084 2
4 171630085 171630084 2
5 171573731 171573729 1
6 171573731 171573729 1
7 171630084 171630085 2
8 171630084 171630085 2
9 171573731 171573729 1
10 171573731 171573729 1
或者您也可以使用 do.call
:
as.numeric(factor(do.call(function(x,y)paste(pmin(x,y),pmax(x,y)),unname(df))))
[1] 1 1 2 2 1 1 2 2 1 1
与data.table:
创建示例数据集
set.seed(1234)
df <- data.frame(
call1 = sample(1:10, 100, T),
call2 = sample(1:10, 100, T),
stringsAsFactors = F
)
# load data.table package and convert df to a data.table
library(data.table)
setDT(df)
添加分组指标:
# order df so group numbers are increasing by (call1, call2)
df <- df[order(call1, call2)]
# add group id
df[, id := .GRP, by=.(call1, call2)]
#first standardize order of CALL_NO and DUP_OF_CALL_NO to create groupings
df = df %>%
mutate( call1 = pmin(CALL_NO, DUP_OF_CALL_NO),
call2 = pmax(CALL_NO, DUP_OF_CALL_NO))
###Create unique ID across related CALL_NO and DUP_OF_CALL_NO
#convert to data.table
df<- data.table(df)
#apply ID to associate group of calls
df[, ID := .GRP, by=.(call1, call2)]
#convert back to data.frame
class(as.data.frame(df))
我有一个 df,我想在其中为位于两列但位于不同行中并以相反顺序显示的唯一 ID 的组合创建一个唯一 ID。如何做到这一点?感谢您的帮助。
df:
> CALL_NO DUP_OF_CALL_NO
> 171573729 171573731
> 171573729 171573731
> 171630085 171630084
> 171630085 171630084
> 171573731 171573729
> 171573731 171573729
> 171630084 171630085
> 171630084 171630085
> 171573731 171573729
> 171573731 171573729
期望的输出:
> ID CALL_NO DUP_OF_CALL_NO
> 1 171573729 171573731
> 1 171573729 171573731
> 2 171630085 171630084
> 2 171630085 171630084
> 1 171573731 171573729
> 1 171573731 171573729
> 2 171630084 171630085
> 2 171630084 171630085
> 1 171573731 171573729
> 1 171573731 171573729
df$ID=as.numeric(factor(apply(df,1,function(x)toString(sort(x)))))
> df
CALL_NO DUP_OF_CALL_NO ID
1 171573729 171573731 1
2 171573729 171573731 1
3 171630085 171630084 2
4 171630085 171630084 2
5 171573731 171573729 1
6 171573731 171573729 1
7 171630084 171630085 2
8 171630084 171630085 2
9 171573731 171573729 1
10 171573731 171573729 1
或者您也可以使用 do.call
:
as.numeric(factor(do.call(function(x,y)paste(pmin(x,y),pmax(x,y)),unname(df))))
[1] 1 1 2 2 1 1 2 2 1 1
与data.table:
创建示例数据集
set.seed(1234)
df <- data.frame(
call1 = sample(1:10, 100, T),
call2 = sample(1:10, 100, T),
stringsAsFactors = F
)
# load data.table package and convert df to a data.table
library(data.table)
setDT(df)
添加分组指标:
# order df so group numbers are increasing by (call1, call2)
df <- df[order(call1, call2)]
# add group id
df[, id := .GRP, by=.(call1, call2)]
#first standardize order of CALL_NO and DUP_OF_CALL_NO to create groupings
df = df %>%
mutate( call1 = pmin(CALL_NO, DUP_OF_CALL_NO),
call2 = pmax(CALL_NO, DUP_OF_CALL_NO))
###Create unique ID across related CALL_NO and DUP_OF_CALL_NO
#convert to data.table
df<- data.table(df)
#apply ID to associate group of calls
df[, ID := .GRP, by=.(call1, call2)]
#convert back to data.frame
class(as.data.frame(df))