如何将 data.table 列发送到函数
How to send data.table columns to a function
我有以下数据和代码:
mydt = structure(list(vnum1 = c(0.517551446921093, -0.997822163825322,
3.40784990301597, -0.20990292802279, 0.171252718589118, -0.624084617915488,
0.0979152932727754, -0.673949942523713, 0.689937370719125, -0.356403906786312,
-0.565253563082689, -0.725285109477077, -0.343253827285705, -0.515803106223986,
2.21193745540815, 0.179392018244011, 0.695885203438304, -0.869946981188651,
0.170084087339536, 0.864392658315656, 0.801471783050381, 0.753880989575548,
-0.572671791856263, -0.238511443188091, -1.1837711276515, 1.13728246296508,
0.702244681081861, -0.851470541269798, 0.0471820411719659, 0.547952252697306,
0.527539936397851, 0.247070882010565, -0.562100684713534, -1.05183021003772,
0.934263969812236, -0.603673312084538, -2.00612207642211, 0.2312103046843,
-0.214991379754579, 0.282701708464789, 0.289934023279607, 0.567328033965404,
-0.359157137438815, 0.648221129776207, 0.857904763904759, 0.289415512264559,
1.06555885899638, 0.333119386976963, -1.46070627726311, 0.0552050036156248
), vfac1 = structure(c(2L, 1L, 1L, 2L, 1L, 3L, 2L, 3L, 1L, 4L,
4L, 3L, 1L, 3L, 1L, 4L, 4L, 4L, 1L, 2L, 2L, 4L, 2L, 4L, 1L, 3L,
4L, 1L, 2L, 2L, 2L, 1L, 3L, 4L, 1L, 2L, 1L, 3L, 1L, 4L, 2L, 3L,
2L, 1L, 2L, 2L, 2L, 3L, 4L, 2L), .Label = c("1", "2", "3", "4"
), class = "factor"), vch1 = structure(c(3L, 4L, 5L, 4L, 1L,
5L, 5L, 3L, 3L, 4L, 1L, 4L, 3L, 5L, 1L, 3L, 4L, 5L, 1L, 3L, 5L,
2L, 5L, 5L, 1L, 2L, 5L, 5L, 1L, 3L, 4L, 1L, 2L, 2L, 5L, 1L, 4L,
2L, 1L, 5L, 4L, 4L, 3L, 2L, 5L, 4L, 3L, 2L, 3L, 2L), .Label = c("A",
"B", "C", "D", "E"), class = "factor")), .Names = c("vnum1",
"vfac1", "vch1"), class = c("data.table", "data.frame"), row.names = c(NA,
50L))
mydt[,list(mean=mean(vnum1), sd=sd(vnum1)),list(vfac1, vch1)]
vfac1 vch1 mean sd
1: 2 C 0.52725962 0.54536269
2: 1 D -1.50197212 0.71297571
3: 1 E 1.16354778 2.13889714
4: 2 D 0.22424664 0.31039463
5: 1 A 0.23359711 1.10743823
6: 3 E -0.56994386 0.07656659
7: 2 E 0.29615501 0.67455339
8: 3 C -0.67394994 NA
9: 1 C 0.17334177 0.73057650
10: 4 D 0.16974065 0.74408077
11: 4 A -0.56525356 NA
12: 3 D -0.07897854 0.91401552
13: 4 C -0.64065713 1.15972463
14: 4 E -0.03087801 0.67895741
15: 4 B -0.14897461 1.27683063
16: 3 B 0.28487787 0.69502367
17: 2 A -0.27824564 0.46022423
18: 1 B 0.64822113 NA
19: 2 B 0.05520500 NA
我想创建以下函数,我可以在其中发送列名并获得上述结果。但是,以下功能不起作用:
myfn = function(ddt, 'vnum1', 'vfac1', 'vch1'){
mydt[,list(mean=mean('vnum1'), sd=sd('vnum1')),list('vfac1', 'vch1')]
}
如何发送列名(或列向量本身)以便从函数中获取结果?感谢您的帮助。
你可以试试
myfn <- function(dt, v1, v2, v3){
dt[, list(mean=mean(eval(as.name(v1))), sd=sd(eval(as.name(v1)))),
by=c(deparse(substitute(v2)), deparse(substitute(v3)))]
}
myfn(mydt, 'vnum1', vfac1, vch1)
# vfac1 vch1 mean sd
#1: 2 C 0.52725962 0.54536269
#2: 1 D -1.50197212 0.71297571
#3: 1 E 1.16354778 2.13889714
#4: 2 D 0.22424664 0.31039463
#5: 1 A 0.23359711 1.10743823
#6: 3 E -0.56994386 0.07656659
#7: 2 E 0.29615501 0.67455339
#8: 3 C -0.67394994 NA
#9: 1 C 0.17334177 0.73057650
#10: 4 D 0.16974065 0.74408077
#11: 4 A -0.56525356 NA
#12: 3 D -0.07897854 0.91401552
#13: 4 C -0.64065713 1.15972463
#14: 4 E -0.03087801 0.67895741
#15: 4 B -0.14897461 1.27683063
#16: 3 B 0.28487787 0.69502367
#17: 2 A -0.27824564 0.46022423
#18: 1 B 0.64822113 NA
#19: 2 B 0.05520500 NA
在更改列名时也有效
setnames(mydt, names(mydt), letters[1:3])
head(myfn(mydt, 'a', b, c),2)
# b c mean sd
#1: 2 C 0.5272596 0.5453627
#2: 1 D -1.5019721 0.7129757
或者您可以删除 deparse(substitute(..
并传递变量是带引号的字符串
myfn <- function(dt, v1, v2, v3){
dt[, list(mean=mean(eval(as.name(v1))), sd=sd(eval(as.name(v1)))),
by=c(v2, v3)]
}
myfn(mydt, 'vnum1', 'vfac1', 'vch1')
这是另一个可以接受不带引号的字符串的变体
myfn <- function(dt, v1, v2, v3){
args <- as.list(match.call())
e1 <- c(deparse(args$v2), deparse(args$v3))
dt[, .(mean=mean(eval(args$v1)), sd=sd(eval(args$v1))), by=e1]
}
head(myfn(mydt, vnum1, vfac1, vch1),2)
# vfac1 vch1 mean sd
#1: 2 C 0.5272596 0.5453627
#2: 1 D -1.5019721 0.7129757
我认为您可以通过使用 .SD
来简化它并使其更加 data.table
惯用。您也不需要在 by
语句中进行评估,因为 data.table
不会在全局环境中查找变量以进行聚合(与 j
语句不同)。就这么简单
myfn <- function(dt, v1, v2, v3){
dt[, .(mean = mean(.SD[[v1]]), sd = sd(.SD[[v1]])), c(v2, v3)]
}
myfn(mydt, "vnum1", "vfac1", "vch1")
## vfac1 vch1 mean sd
## 1: 2 C 0.5272596 0.54536269
## 2: 1 D -1.5019721 0.71297571
## 3: 1 E 1.1635478 2.13889714
## 4: 2 D 0.2242466 0.31039463
## 5: 1 A 0.2335971 1.10743823
## 6: 3 E -0.5699439 0.07656659
...
正在测试其他列名
setnames(mydt, letters[1:3])
head(myfn(mydt, "a", "b", "c"), 2)
# b c mean sd
# 1: 2 C 0.5272596 0.5453627
# 2: 1 D -1.5019721 0.7129757
或者,您也可以使用 get
,如
myfn <- function(dt, v1, v2, v3){
dt[, .(mean = mean(get(v1)), sd = sd(get(v1))), c(v2, v3)]
}
尽管如此,j
语句中的效率明智的@akruns eval(as.name())
组合应该是最快的,直到 Arun/Matt 将优化 .SD
。
我有以下数据和代码:
mydt = structure(list(vnum1 = c(0.517551446921093, -0.997822163825322,
3.40784990301597, -0.20990292802279, 0.171252718589118, -0.624084617915488,
0.0979152932727754, -0.673949942523713, 0.689937370719125, -0.356403906786312,
-0.565253563082689, -0.725285109477077, -0.343253827285705, -0.515803106223986,
2.21193745540815, 0.179392018244011, 0.695885203438304, -0.869946981188651,
0.170084087339536, 0.864392658315656, 0.801471783050381, 0.753880989575548,
-0.572671791856263, -0.238511443188091, -1.1837711276515, 1.13728246296508,
0.702244681081861, -0.851470541269798, 0.0471820411719659, 0.547952252697306,
0.527539936397851, 0.247070882010565, -0.562100684713534, -1.05183021003772,
0.934263969812236, -0.603673312084538, -2.00612207642211, 0.2312103046843,
-0.214991379754579, 0.282701708464789, 0.289934023279607, 0.567328033965404,
-0.359157137438815, 0.648221129776207, 0.857904763904759, 0.289415512264559,
1.06555885899638, 0.333119386976963, -1.46070627726311, 0.0552050036156248
), vfac1 = structure(c(2L, 1L, 1L, 2L, 1L, 3L, 2L, 3L, 1L, 4L,
4L, 3L, 1L, 3L, 1L, 4L, 4L, 4L, 1L, 2L, 2L, 4L, 2L, 4L, 1L, 3L,
4L, 1L, 2L, 2L, 2L, 1L, 3L, 4L, 1L, 2L, 1L, 3L, 1L, 4L, 2L, 3L,
2L, 1L, 2L, 2L, 2L, 3L, 4L, 2L), .Label = c("1", "2", "3", "4"
), class = "factor"), vch1 = structure(c(3L, 4L, 5L, 4L, 1L,
5L, 5L, 3L, 3L, 4L, 1L, 4L, 3L, 5L, 1L, 3L, 4L, 5L, 1L, 3L, 5L,
2L, 5L, 5L, 1L, 2L, 5L, 5L, 1L, 3L, 4L, 1L, 2L, 2L, 5L, 1L, 4L,
2L, 1L, 5L, 4L, 4L, 3L, 2L, 5L, 4L, 3L, 2L, 3L, 2L), .Label = c("A",
"B", "C", "D", "E"), class = "factor")), .Names = c("vnum1",
"vfac1", "vch1"), class = c("data.table", "data.frame"), row.names = c(NA,
50L))
mydt[,list(mean=mean(vnum1), sd=sd(vnum1)),list(vfac1, vch1)]
vfac1 vch1 mean sd
1: 2 C 0.52725962 0.54536269
2: 1 D -1.50197212 0.71297571
3: 1 E 1.16354778 2.13889714
4: 2 D 0.22424664 0.31039463
5: 1 A 0.23359711 1.10743823
6: 3 E -0.56994386 0.07656659
7: 2 E 0.29615501 0.67455339
8: 3 C -0.67394994 NA
9: 1 C 0.17334177 0.73057650
10: 4 D 0.16974065 0.74408077
11: 4 A -0.56525356 NA
12: 3 D -0.07897854 0.91401552
13: 4 C -0.64065713 1.15972463
14: 4 E -0.03087801 0.67895741
15: 4 B -0.14897461 1.27683063
16: 3 B 0.28487787 0.69502367
17: 2 A -0.27824564 0.46022423
18: 1 B 0.64822113 NA
19: 2 B 0.05520500 NA
我想创建以下函数,我可以在其中发送列名并获得上述结果。但是,以下功能不起作用:
myfn = function(ddt, 'vnum1', 'vfac1', 'vch1'){
mydt[,list(mean=mean('vnum1'), sd=sd('vnum1')),list('vfac1', 'vch1')]
}
如何发送列名(或列向量本身)以便从函数中获取结果?感谢您的帮助。
你可以试试
myfn <- function(dt, v1, v2, v3){
dt[, list(mean=mean(eval(as.name(v1))), sd=sd(eval(as.name(v1)))),
by=c(deparse(substitute(v2)), deparse(substitute(v3)))]
}
myfn(mydt, 'vnum1', vfac1, vch1)
# vfac1 vch1 mean sd
#1: 2 C 0.52725962 0.54536269
#2: 1 D -1.50197212 0.71297571
#3: 1 E 1.16354778 2.13889714
#4: 2 D 0.22424664 0.31039463
#5: 1 A 0.23359711 1.10743823
#6: 3 E -0.56994386 0.07656659
#7: 2 E 0.29615501 0.67455339
#8: 3 C -0.67394994 NA
#9: 1 C 0.17334177 0.73057650
#10: 4 D 0.16974065 0.74408077
#11: 4 A -0.56525356 NA
#12: 3 D -0.07897854 0.91401552
#13: 4 C -0.64065713 1.15972463
#14: 4 E -0.03087801 0.67895741
#15: 4 B -0.14897461 1.27683063
#16: 3 B 0.28487787 0.69502367
#17: 2 A -0.27824564 0.46022423
#18: 1 B 0.64822113 NA
#19: 2 B 0.05520500 NA
在更改列名时也有效
setnames(mydt, names(mydt), letters[1:3])
head(myfn(mydt, 'a', b, c),2)
# b c mean sd
#1: 2 C 0.5272596 0.5453627
#2: 1 D -1.5019721 0.7129757
或者您可以删除 deparse(substitute(..
并传递变量是带引号的字符串
myfn <- function(dt, v1, v2, v3){
dt[, list(mean=mean(eval(as.name(v1))), sd=sd(eval(as.name(v1)))),
by=c(v2, v3)]
}
myfn(mydt, 'vnum1', 'vfac1', 'vch1')
这是另一个可以接受不带引号的字符串的变体
myfn <- function(dt, v1, v2, v3){
args <- as.list(match.call())
e1 <- c(deparse(args$v2), deparse(args$v3))
dt[, .(mean=mean(eval(args$v1)), sd=sd(eval(args$v1))), by=e1]
}
head(myfn(mydt, vnum1, vfac1, vch1),2)
# vfac1 vch1 mean sd
#1: 2 C 0.5272596 0.5453627
#2: 1 D -1.5019721 0.7129757
我认为您可以通过使用 .SD
来简化它并使其更加 data.table
惯用。您也不需要在 by
语句中进行评估,因为 data.table
不会在全局环境中查找变量以进行聚合(与 j
语句不同)。就这么简单
myfn <- function(dt, v1, v2, v3){
dt[, .(mean = mean(.SD[[v1]]), sd = sd(.SD[[v1]])), c(v2, v3)]
}
myfn(mydt, "vnum1", "vfac1", "vch1")
## vfac1 vch1 mean sd
## 1: 2 C 0.5272596 0.54536269
## 2: 1 D -1.5019721 0.71297571
## 3: 1 E 1.1635478 2.13889714
## 4: 2 D 0.2242466 0.31039463
## 5: 1 A 0.2335971 1.10743823
## 6: 3 E -0.5699439 0.07656659
...
正在测试其他列名
setnames(mydt, letters[1:3])
head(myfn(mydt, "a", "b", "c"), 2)
# b c mean sd
# 1: 2 C 0.5272596 0.5453627
# 2: 1 D -1.5019721 0.7129757
或者,您也可以使用 get
,如
myfn <- function(dt, v1, v2, v3){
dt[, .(mean = mean(get(v1)), sd = sd(get(v1))), c(v2, v3)]
}
尽管如此,j
语句中的效率明智的@akruns eval(as.name())
组合应该是最快的,直到 Arun/Matt 将优化 .SD
。