计算列值之间的差异
calculate difference between values of a column
dat <- data.frame(s=c(1,1,1,1,2,2,2,2,3,3,3,3),
c1=c("w","x","y","z","w","x","y","z","w","x","y","z"),
c2=c("m","m","m","m","f","f","f","f","m","m","m","m"),
c3=c(1,2,3,4,5,6,7,8,9,10,11,12))
> dat
s c1 c2 c3
1 1 w m 1
2 1 x m 2
3 1 y m 3
4 1 z m 4
5 2 w f 5
6 2 x f 6
7 2 y f 7
8 2 z f 8
9 3 w m 9
10 3 x m 10
11 3 y m 11
12 3 z m 12
我想计算 c1
(w-x、x-y 等)和每个 s
的每个组合的 c3
值之间的差异。输出可能如下所示
s diff c2 c3
1 w-x m -1
1 w-y m -2
1 w-z m -3
1 x-y m -1
etc
我认为 aggregate
函数应该可以工作,但我不知道如何定义组合并将它们传递给函数参数。
如果你愿意使用 dplyr
这样的东西应该有用。
dat <- data.frame(s=c(1,1,1,1,2,2,2,2,3,3,3,3), c1=c("w","x","y","z","w","x","y","z","w","x","y","z"), c2=c("m","m","m","m","f","f","f","f","m","m","m","m"), c3=c(1,2,3,4,5,6,7,8,9,10,11,12))
library(dplyr)
dat$c1 <- as.character(dat$c1)
dat2 <- dat %>%
left_join(dat, by = c("s", "c2")) %>%
filter(c1.x > c1.y) %>%
transmute(s, diff = paste(c1.y, c1.x, sep = "-"), c2, c3 = c3.y - c3.x)
dat2
## s diff c2 c3
## 1 1 w-x m -1
## 2 1 w-y m -2
## 3 1 x-y m -1
## 4 1 w-z m -3
## 5 1 x-z m -2
## 6 1 y-z m -1
## 7 2 w-x f -1
## 8 2 w-y f -2
## 9 2 x-y f -1
## 10 2 w-z f -3
## 11 2 x-z f -2
## 12 2 y-z f -1
## 13 3 w-x m -1
## 14 3 w-y m -2
## 15 3 x-y m -1
## 16 3 w-z m -3
## 17 3 x-z m -2
## 18 3 y-z m -1
do.call(rbind, lapply(split(dat, dat$s), function(a){
a$c1 = as.character(a$c1)
d = setNames(data.frame(t(combn(unique(a$c1), 2, FUN = function(x){
list(x[1], x[2], a$c3[a$c1 == x[1]] - a$c3[a$c1 == x[2]])
}))), c("col1", "col2", "val"))
d$s = a$s[1]
d$c2 = a$c2[1]
d
}))
# col1 col2 val s c2
#1.1 w x -1 1 m
#1.2 w y -2 1 m
#1.3 w z -3 1 m
#1.4 x y -1 1 m
#1.5 x z -2 1 m
#1.6 y z -1 1 m
#2.1 w x -1 2 f
#2.2 w y -2 2 f
#2.3 w z -3 2 f
#2.4 x y -1 2 f
#2.5 x z -2 2 f
#2.6 y z -1 2 f
#3.1 w x -1 3 m
#3.2 w y -2 3 m
#3.3 w z -3 3 m
#3.4 x y -1 3 m
#3.5 x z -2 3 m
#3.6 y z -1 3 m
dat <- data.frame(s=c(1,1,1,1,2,2,2,2,3,3,3,3),
c1=c("w","x","y","z","w","x","y","z","w","x","y","z"),
c2=c("m","m","m","m","f","f","f","f","m","m","m","m"),
c3=c(1,2,3,4,5,6,7,8,9,10,11,12))
> dat
s c1 c2 c3
1 1 w m 1
2 1 x m 2
3 1 y m 3
4 1 z m 4
5 2 w f 5
6 2 x f 6
7 2 y f 7
8 2 z f 8
9 3 w m 9
10 3 x m 10
11 3 y m 11
12 3 z m 12
我想计算 c1
(w-x、x-y 等)和每个 s
的每个组合的 c3
值之间的差异。输出可能如下所示
s diff c2 c3
1 w-x m -1
1 w-y m -2
1 w-z m -3
1 x-y m -1
etc
我认为 aggregate
函数应该可以工作,但我不知道如何定义组合并将它们传递给函数参数。
如果你愿意使用 dplyr
这样的东西应该有用。
dat <- data.frame(s=c(1,1,1,1,2,2,2,2,3,3,3,3), c1=c("w","x","y","z","w","x","y","z","w","x","y","z"), c2=c("m","m","m","m","f","f","f","f","m","m","m","m"), c3=c(1,2,3,4,5,6,7,8,9,10,11,12))
library(dplyr)
dat$c1 <- as.character(dat$c1)
dat2 <- dat %>%
left_join(dat, by = c("s", "c2")) %>%
filter(c1.x > c1.y) %>%
transmute(s, diff = paste(c1.y, c1.x, sep = "-"), c2, c3 = c3.y - c3.x)
dat2
## s diff c2 c3
## 1 1 w-x m -1
## 2 1 w-y m -2
## 3 1 x-y m -1
## 4 1 w-z m -3
## 5 1 x-z m -2
## 6 1 y-z m -1
## 7 2 w-x f -1
## 8 2 w-y f -2
## 9 2 x-y f -1
## 10 2 w-z f -3
## 11 2 x-z f -2
## 12 2 y-z f -1
## 13 3 w-x m -1
## 14 3 w-y m -2
## 15 3 x-y m -1
## 16 3 w-z m -3
## 17 3 x-z m -2
## 18 3 y-z m -1
do.call(rbind, lapply(split(dat, dat$s), function(a){
a$c1 = as.character(a$c1)
d = setNames(data.frame(t(combn(unique(a$c1), 2, FUN = function(x){
list(x[1], x[2], a$c3[a$c1 == x[1]] - a$c3[a$c1 == x[2]])
}))), c("col1", "col2", "val"))
d$s = a$s[1]
d$c2 = a$c2[1]
d
}))
# col1 col2 val s c2
#1.1 w x -1 1 m
#1.2 w y -2 1 m
#1.3 w z -3 1 m
#1.4 x y -1 1 m
#1.5 x z -2 1 m
#1.6 y z -1 1 m
#2.1 w x -1 2 f
#2.2 w y -2 2 f
#2.3 w z -3 2 f
#2.4 x y -1 2 f
#2.5 x z -2 2 f
#2.6 y z -1 2 f
#3.1 w x -1 3 m
#3.2 w y -2 3 m
#3.3 w z -3 3 m
#3.4 x y -1 3 m
#3.5 x z -2 3 m
#3.6 y z -1 3 m