collapse:逐行修改列以及来自多列的组合值
collapse: Modifying Columns by row along with combine values from multiple columns
我想使用 tidytable into collapse 翻译以下 R
代码:高级和快速数据转换。
tidytable代码
library(tidytable)
library(collapse)
Out1 <-
wlddev %>%
mutate_rowwise.(New1 = sum(c_across.(PCGDP:GINI), na.rm = TRUE))
Out1 %>%
select.(New1)
# A tidytable: 13,176 x 1
New1
<dbl>
1 32.4
2 33.0
3 33.5
4 34.0
5 34.5
6 34.9
7 35.4
8 35.9
9 36.4
10 36.9
# ... with 13,166 more rows
收起代码
library(collapse)
Out2 <-
wlddev %>%
ftransform(New1 = fsum(across(PCGDP:GINI), na.rm = TRUE))
Error in `context_peek()`:
! `across()` must only be used inside dplyr verbs.
Run `rlang::last_error()` to see where the error occurred.
任何提示请。
collapse
中的 ?fsum
按列求和
fsum is a generic function that computes the (column-wise) sum of all values in x, (optionally) grouped by g and/or weighted by w (e.g. to calculate survey totals).
基于tidytable
代码,它是rowwise
,所以一种选择是select(slt
)感兴趣的列,t
ranspose,转换为 tibble/data.frame
并使用 fsum
并创建一个新列
library(collapse)
Out2 <- wlddev %>%
slt(PCGDP:GINI) %>%
t %>%
as_tibble %>%
fsum(.) %>%
ftransform(wlddev, New1 = .)
sum
returns 0 当所有元素都是 NA
而 fsum
默认使用 na.rm = TRUE
并且它 returns NA 如果所有元素是 NA
> fsum(c(NA, NA))
[1] NA
> sum(c(NA, NA), na.rm = TRUE)
[1] 0
因此,如果我们将第二个数据中的NA
改为0,输出结果将与OP的'Out1'
相同
> Out2$New1[is.na(Out2$New1)] <- 0
> all.equal(Out1, Out2, check.attributes = FALSE)
[1] TRUE
根据@akrun 的回答,我想出了一个更快的解决方案。
Out3 <-
wlddev %>%
slt(PCGDP:GINI) %>%
qDT() %>%
t %>%
fsum(.) %>%
ftransform(.data = wlddev, New1 = .) %>%
qDT() %>%
replace_NA(X = ., value = 0, cols = "New1")
速度比较
library(microbenchmark)
microbenchmark(
Out1 =
wlddev %>%
mutate_rowwise.(New1 = sum(c_across.(PCGDP:GINI), na.rm = TRUE))
, Out2 =
wlddev %>%
slt(PCGDP:GINI) %>%
t %>%
as_tibble %>%
fsum(.) %>%
ftransform(wlddev, New1 = .)
, Out3 =
wlddev %>%
slt(PCGDP:GINI) %>%
qDT() %>%
t %>%
fsum(.) %>%
ftransform(.data = wlddev, New1 = .) %>%
qDT() %>%
replace_NA(X = ., value = 0, cols = "New1")
)
Unit: microseconds
expr min lq mean median uq max neval
Out1 72618.0 78268.75 81296.992 79888.50 81671.10 162397.8 100
Out2 33549.7 35520.75 37763.537 37728.25 39021.90 55001.3 100
Out3 241.2 310.85 360.225 357.40 387.35 780.1 100
我想知道你为什么需要想出这么复杂的东西。您在 base R 中有类似 rowSums
的函数,并且在 kit
:
中有并行统计函数
library(collapse)
library(magrittr)
library(kit, include.only = "psum")
library(microbenchmark)
microbenchmark(
A = wlddev %>%
ftransform(New1 = rowSums(qM(slt(., PCGDP:GINI)), na.rm = TRUE)),
B = wlddev %>%
ftransform(New1 = psum(slt(., PCGDP:GINI), na.rm = TRUE)),
C = wlddev %>%
ftransform(New1 = psum(PCGDP, LIFEEX, GINI, na.rm = TRUE))
)
#> Unit: microseconds
#> expr min lq mean median uq max neval
#> A 68.88 97.8875 194.24037 102.2335 113.8775 4646.366 100
#> B 25.83 30.1350 35.43548 34.9115 38.6630 56.416 100
#> C 22.55 25.8095 29.99396 30.5860 32.9025 53.792 100
由 reprex package (v2.0.1)
于 2022-02-05 创建
我想使用 tidytable into collapse 翻译以下 R
代码:高级和快速数据转换。
tidytable代码
library(tidytable)
library(collapse)
Out1 <-
wlddev %>%
mutate_rowwise.(New1 = sum(c_across.(PCGDP:GINI), na.rm = TRUE))
Out1 %>%
select.(New1)
# A tidytable: 13,176 x 1
New1
<dbl>
1 32.4
2 33.0
3 33.5
4 34.0
5 34.5
6 34.9
7 35.4
8 35.9
9 36.4
10 36.9
# ... with 13,166 more rows
收起代码
library(collapse)
Out2 <-
wlddev %>%
ftransform(New1 = fsum(across(PCGDP:GINI), na.rm = TRUE))
Error in `context_peek()`:
! `across()` must only be used inside dplyr verbs.
Run `rlang::last_error()` to see where the error occurred.
任何提示请。
collapse
中的 ?fsum
按列求和
fsum is a generic function that computes the (column-wise) sum of all values in x, (optionally) grouped by g and/or weighted by w (e.g. to calculate survey totals).
基于tidytable
代码,它是rowwise
,所以一种选择是select(slt
)感兴趣的列,t
ranspose,转换为 tibble/data.frame
并使用 fsum
并创建一个新列
library(collapse)
Out2 <- wlddev %>%
slt(PCGDP:GINI) %>%
t %>%
as_tibble %>%
fsum(.) %>%
ftransform(wlddev, New1 = .)
sum
returns 0 当所有元素都是 NA
而 fsum
默认使用 na.rm = TRUE
并且它 returns NA 如果所有元素是 NA
> fsum(c(NA, NA))
[1] NA
> sum(c(NA, NA), na.rm = TRUE)
[1] 0
因此,如果我们将第二个数据中的NA
改为0,输出结果将与OP的'Out1'
> Out2$New1[is.na(Out2$New1)] <- 0
> all.equal(Out1, Out2, check.attributes = FALSE)
[1] TRUE
根据@akrun 的回答,我想出了一个更快的解决方案。
Out3 <-
wlddev %>%
slt(PCGDP:GINI) %>%
qDT() %>%
t %>%
fsum(.) %>%
ftransform(.data = wlddev, New1 = .) %>%
qDT() %>%
replace_NA(X = ., value = 0, cols = "New1")
速度比较
library(microbenchmark)
microbenchmark(
Out1 =
wlddev %>%
mutate_rowwise.(New1 = sum(c_across.(PCGDP:GINI), na.rm = TRUE))
, Out2 =
wlddev %>%
slt(PCGDP:GINI) %>%
t %>%
as_tibble %>%
fsum(.) %>%
ftransform(wlddev, New1 = .)
, Out3 =
wlddev %>%
slt(PCGDP:GINI) %>%
qDT() %>%
t %>%
fsum(.) %>%
ftransform(.data = wlddev, New1 = .) %>%
qDT() %>%
replace_NA(X = ., value = 0, cols = "New1")
)
Unit: microseconds
expr min lq mean median uq max neval
Out1 72618.0 78268.75 81296.992 79888.50 81671.10 162397.8 100
Out2 33549.7 35520.75 37763.537 37728.25 39021.90 55001.3 100
Out3 241.2 310.85 360.225 357.40 387.35 780.1 100
我想知道你为什么需要想出这么复杂的东西。您在 base R 中有类似 rowSums
的函数,并且在 kit
:
library(collapse)
library(magrittr)
library(kit, include.only = "psum")
library(microbenchmark)
microbenchmark(
A = wlddev %>%
ftransform(New1 = rowSums(qM(slt(., PCGDP:GINI)), na.rm = TRUE)),
B = wlddev %>%
ftransform(New1 = psum(slt(., PCGDP:GINI), na.rm = TRUE)),
C = wlddev %>%
ftransform(New1 = psum(PCGDP, LIFEEX, GINI, na.rm = TRUE))
)
#> Unit: microseconds
#> expr min lq mean median uq max neval
#> A 68.88 97.8875 194.24037 102.2335 113.8775 4646.366 100
#> B 25.83 30.1350 35.43548 34.9115 38.6630 56.416 100
#> C 22.55 25.8095 29.99396 30.5860 32.9025 53.792 100
由 reprex package (v2.0.1)
于 2022-02-05 创建