来自两个数据帧的两列的条件 setdiff(全部到全部),具有用于进行匹配的数字范围
Conditional setdiff (all-to-all) on two columns from two dataframes with a numeric range for taking a match
这里有两个示例数据框:
df1 <- data.frame(Time1v1 = c(55.25, 59.36, 40.26, 786.008, 980.569, 11.2, 10.11, 23.11),
Time2v1 = c(81, 12, 13, 11.0112, 93.9, 14.8, 15.3, 78.91))
df2 <- data.frame(Time1v2 = c(10.13, 980.659, 14.42, 90.1, 40.3298, 9234, 59.35),
Time2v2 = c(25.1, 88.9, 120, 911, 22.1253, 81, 15.1))
> df1
Time1v1 Time2v1
1 55.250 81.0000
2 59.360 12.0000
3 40.260 13.0000
4 786.008 11.0112
5 980.569 93.9000
6 11.200 14.8000
7 10.110 15.3000
8 23.110 78.9100
> df2
Time1v2 Time2v2
1 10.1300 25.1000
2 980.6590 88.9000
3 14.4200 120.0000
4 90.1000 911.0000
5 40.3298 22.1253
6 9234.0000 81.0000
7 59.3500 15.1000
我想将 df1
的每一行与 df2
的每一行进行比较。如果 df1 和 df2 中的 Time1
之间的差异在 [-0.1,+0.1]
范围内并且 Time2
中的差异在 [-10,+10]
范围内,则必须删除 df1 中的特定行。
尝试解决
尝试解决这个问题。有没有更好的方法?
df1$remove <- rep("No", nrow(df1))
for(i in 1:nrow(df1)){
for(j in 1:nrow(df2)){
if(data.table::inrange(df1$Time1v1[i], df2$Time1v2[j] - 0.1, df2$Time1v2[j] + 0.1) && data.table::inrange(df1$Time2v1[i], df2$Time2v2[j] - 10, df2$Time2v2[j] + 10)) {df1$remove[i] <- "remove"}
}
}
这给了我:
> df1
Time1v1 Time2v1 remove
1 55.250 81.0000 No
2 59.360 12.0000 remove
3 40.260 13.0000 remove
4 786.008 11.0112 No
5 980.569 93.9000 remove
6 11.200 14.8000 No
7 10.110 15.3000 remove
8 23.110 78.9100 No
预期最终结果
并且最终预期的输出将是:
> df1[which(df1$remove != "remove"),-3]
Time1v1 Time2v1
1 55.250 81.0000
4 786.008 11.0112
6 11.200 14.8000
8 23.110 78.9100
相关
Perform non-pairwise all-to-all comparisons between two unordered character vectors --- The opposite of intersect --- all-to-all setdiff
这是一种手动(手动声明列)方法,
m1 <- outer(df1$Time1v1, df2$Time1v2, `-`)
m2 <- outer(df1$Time2v1, df2$Timev2, `-`)
i1 <- intersect(which(m1 >= -0.1 & m1 <= 0.1, arr.ind = TRUE)[,1],
which(m2 >= -10 & m2 <= 10, arr.ind = TRUE)[,1])
df1[-i1,]
# Time1v1 Time2v1
#1 55.250 81.0000
#4 786.008 11.0112
#6 11.200 14.8000
#8 23.110 78.9100
滚动到最接近的值并求出距离:
library(data.table)
setDT(df1); setDT(df2)
df1[, dist1 := df2[df1, on=.(Time1v2 = Time1v1), roll="nearest", abs(x.Time1v2 - i.Time1v1)]]
df1[, dist2 := df2[df1, on=.(Time2v2 = Time2v1), roll="nearest", abs(x.Time2v2 - i.Time2v1)]]
df1[dist1 > 0.1 | dist2 > 10]
Time1v1 Time2v1 dist1 dist2
1: 55.250 81.0000 4.100 0.0000
2: 786.008 11.0112 194.651 4.0888
3: 11.200 14.8000 1.070 0.3000
4: 23.110 78.9100 8.690 2.0900
这里有两个示例数据框:
df1 <- data.frame(Time1v1 = c(55.25, 59.36, 40.26, 786.008, 980.569, 11.2, 10.11, 23.11),
Time2v1 = c(81, 12, 13, 11.0112, 93.9, 14.8, 15.3, 78.91))
df2 <- data.frame(Time1v2 = c(10.13, 980.659, 14.42, 90.1, 40.3298, 9234, 59.35),
Time2v2 = c(25.1, 88.9, 120, 911, 22.1253, 81, 15.1))
> df1
Time1v1 Time2v1
1 55.250 81.0000
2 59.360 12.0000
3 40.260 13.0000
4 786.008 11.0112
5 980.569 93.9000
6 11.200 14.8000
7 10.110 15.3000
8 23.110 78.9100
> df2
Time1v2 Time2v2
1 10.1300 25.1000
2 980.6590 88.9000
3 14.4200 120.0000
4 90.1000 911.0000
5 40.3298 22.1253
6 9234.0000 81.0000
7 59.3500 15.1000
我想将 df1
的每一行与 df2
的每一行进行比较。如果 df1 和 df2 中的 Time1
之间的差异在 [-0.1,+0.1]
范围内并且 Time2
中的差异在 [-10,+10]
范围内,则必须删除 df1 中的特定行。
尝试解决
尝试解决这个问题。有没有更好的方法?
df1$remove <- rep("No", nrow(df1))
for(i in 1:nrow(df1)){
for(j in 1:nrow(df2)){
if(data.table::inrange(df1$Time1v1[i], df2$Time1v2[j] - 0.1, df2$Time1v2[j] + 0.1) && data.table::inrange(df1$Time2v1[i], df2$Time2v2[j] - 10, df2$Time2v2[j] + 10)) {df1$remove[i] <- "remove"}
}
}
这给了我:
> df1
Time1v1 Time2v1 remove
1 55.250 81.0000 No
2 59.360 12.0000 remove
3 40.260 13.0000 remove
4 786.008 11.0112 No
5 980.569 93.9000 remove
6 11.200 14.8000 No
7 10.110 15.3000 remove
8 23.110 78.9100 No
预期最终结果
并且最终预期的输出将是:
> df1[which(df1$remove != "remove"),-3]
Time1v1 Time2v1
1 55.250 81.0000
4 786.008 11.0112
6 11.200 14.8000
8 23.110 78.9100
相关
Perform non-pairwise all-to-all comparisons between two unordered character vectors --- The opposite of intersect --- all-to-all setdiff
这是一种手动(手动声明列)方法,
m1 <- outer(df1$Time1v1, df2$Time1v2, `-`)
m2 <- outer(df1$Time2v1, df2$Timev2, `-`)
i1 <- intersect(which(m1 >= -0.1 & m1 <= 0.1, arr.ind = TRUE)[,1],
which(m2 >= -10 & m2 <= 10, arr.ind = TRUE)[,1])
df1[-i1,]
# Time1v1 Time2v1
#1 55.250 81.0000
#4 786.008 11.0112
#6 11.200 14.8000
#8 23.110 78.9100
滚动到最接近的值并求出距离:
library(data.table)
setDT(df1); setDT(df2)
df1[, dist1 := df2[df1, on=.(Time1v2 = Time1v1), roll="nearest", abs(x.Time1v2 - i.Time1v1)]]
df1[, dist2 := df2[df1, on=.(Time2v2 = Time2v1), roll="nearest", abs(x.Time2v2 - i.Time2v1)]]
df1[dist1 > 0.1 | dist2 > 10]
Time1v1 Time2v1 dist1 dist2
1: 55.250 81.0000 4.100 0.0000
2: 786.008 11.0112 194.651 4.0888
3: 11.200 14.8000 1.070 0.3000
4: 23.110 78.9100 8.690 2.0900