为什么 %>% 不适用于 apply()?
Why does %>% not work with apply()?
考虑以下数据:
library(dplyr)
df <- structure(list(Total = c(3450, 1728, 122, 5300),
A1 = c(1092, 497, 4, 1593),
A2 = c(596, 156, 29, 781),
A3 = c(801, 417, 36, 1254),
A4 = c(107, 11, 0, 118),
A5 = c(614, 217, 21, 852),
A6 = c(132, 47, 0, 179),
A7 = c(108, 383, 32, 523)),
.Names = c("Total", paste0("A", 1:7)),
row.names = c(paste0("B", 1:3), "Total"),
class = "data.frame")
Total A1 A2 A3 A4 A5 A6 A7
B1 3450 1092 596 801 107 614 132 108
B2 1728 497 156 417 11 217 47 383
B3 122 4 29 36 0 21 0 32
Total 5300 1593 781 1254 118 852 179 523
题目如下:以行Total
为分母,计算同一列的每个单元格占行Total
.
的百分比
在这种情况下,我会做
df_names <- rownames(df)
df2 <- apply(
round(
sweep(df,
STATS = as.matrix(df[rownames(df) == "Total",]),
MARGIN = 2,
FUN = "/") * 100,
digits = 2),
MARGIN = 2,
FUN = paste0,
"%")
rownames(df2) <- df_names
df2 <- as.data.frame(df2)
Total A1 A2 A3 A4 A5 A6 A7
B1 65.09% 68.55% 76.31% 63.88% 90.68% 72.07% 73.74% 20.65%
B2 32.6% 31.2% 19.97% 33.25% 9.32% 25.47% 26.26% 73.23%
B3 2.3% 0.25% 3.71% 2.87% 0% 2.46% 0% 6.12%
Total 100% 100% 100% 100% 100% 100% 100% 100%
但是,虽然这行得通,但我更喜欢使用 %>%
:
df3 <- sweep(df,
STATS = as.matrix(df[rownames(df) == "Total",]),
MARGIN = 2,
FUN = "/") * 100 %>%
round(digits = 2) %>%
apply(., MARGIN = 2, FUN = paste0, "%")
Error in apply(., MARGIN = 2, FUN = paste0, "%") :
dim(X) must have a positive length
为什么上面使用 %>%
的方法不起作用?
问题在于尾随数字 100 及其进入管道的方式。用 ()
包围并应用可以正常工作,或者对于涉及较少的解决方案:
(sweep(df,
STATS = as.matrix(df[rownames(df) == "Total",]),
MARGIN = 2,
FUN = "/") * 100) %>% round(digits = 2) %>%
mutate_all(function(x)sprintf('%s%%', x))
Total A1 A2 A3 A4 A5 A6 A7
1 65.09% 68.55% 76.31% 63.88% 90.68% 72.07% 73.74% 20.65%
2 32.6% 31.2% 19.97% 33.25% 9.32% 25.47% 26.26% 73.23%
3 2.3% 0.25% 3.71% 2.87% 0% 2.46% 0% 6.12%
4 100% 100% 100% 100% 100% 100% 100% 100%
编辑以说明原因..
%>%
接受两个参数,并生成一个环境。参数是 lhs
和 rhs
,其中 left-hand-side 是输入值
sweep(df,
STATS = as.matrix(df[rownames(df) == "Total",]),
MARGIN = 2,
FUN = "/") * 100 %>% print
[1] 100
Total A1 A2 A3 A4 A5 A6 A7
B1 65.094340 68.5499058 76.312420 63.875598 90.677966 72.065728 73.74302 20.650096
B2 32.603774 31.1989956 19.974392 33.253589 9.322034 25.469484 26.25698 73.231358
B3 2.301887 0.2510986 3.713188 2.870813 0.000000 2.464789 0.00000 6.118547
Total 100.000000 100.0000000 100.000000 100.000000 100.000000 100.000000 100.00000 100.000000
注意 [1] 100
现在 parens
(sweep(df,
STATS = as.matrix(df[rownames(df) == "Total",]),
MARGIN = 2,
FUN = "/") * 100) %>% print
Total A1 A2 A3 A4 A5 A6 A7
B1 65.094340 68.5499058 76.312420 63.875598 90.677966 72.065728 73.74302 20.650096
B2 32.603774 31.1989956 19.974392 33.253589 9.322034 25.469484 26.25698 73.231358
B3 2.301887 0.2510986 3.713188 2.870813 0.000000 2.464789 0.00000 6.118547
Total 100.000000 100.0000000 100.000000 100.000000 100.000000 100.000000 100.00000 100.000000
括号也有效
sweep(df,
STATS = as.matrix(df[rownames(df) == "Total",]),
MARGIN = 2,
FUN = "/") %>% {
round(. * 100, 2) %>% mutate_all(function(x)sprintf('%s%%',x))
}
Total A1 A2 A3 A4 A5 A6 A7
1 65.09% 68.55% 76.31% 63.88% 90.68% 72.07% 73.74% 20.65%
2 32.6% 31.2% 19.97% 33.25% 9.32% 25.47% 26.26% 73.23%
3 2.3% 0.25% 3.71% 2.87% 0% 2.46% 0% 6.12%
4 100% 100% 100% 100% 100% 100% 100% 100%
考虑以下数据:
library(dplyr)
df <- structure(list(Total = c(3450, 1728, 122, 5300),
A1 = c(1092, 497, 4, 1593),
A2 = c(596, 156, 29, 781),
A3 = c(801, 417, 36, 1254),
A4 = c(107, 11, 0, 118),
A5 = c(614, 217, 21, 852),
A6 = c(132, 47, 0, 179),
A7 = c(108, 383, 32, 523)),
.Names = c("Total", paste0("A", 1:7)),
row.names = c(paste0("B", 1:3), "Total"),
class = "data.frame")
Total A1 A2 A3 A4 A5 A6 A7
B1 3450 1092 596 801 107 614 132 108
B2 1728 497 156 417 11 217 47 383
B3 122 4 29 36 0 21 0 32
Total 5300 1593 781 1254 118 852 179 523
题目如下:以行Total
为分母,计算同一列的每个单元格占行Total
.
在这种情况下,我会做
df_names <- rownames(df)
df2 <- apply(
round(
sweep(df,
STATS = as.matrix(df[rownames(df) == "Total",]),
MARGIN = 2,
FUN = "/") * 100,
digits = 2),
MARGIN = 2,
FUN = paste0,
"%")
rownames(df2) <- df_names
df2 <- as.data.frame(df2)
Total A1 A2 A3 A4 A5 A6 A7
B1 65.09% 68.55% 76.31% 63.88% 90.68% 72.07% 73.74% 20.65%
B2 32.6% 31.2% 19.97% 33.25% 9.32% 25.47% 26.26% 73.23%
B3 2.3% 0.25% 3.71% 2.87% 0% 2.46% 0% 6.12%
Total 100% 100% 100% 100% 100% 100% 100% 100%
但是,虽然这行得通,但我更喜欢使用 %>%
:
df3 <- sweep(df,
STATS = as.matrix(df[rownames(df) == "Total",]),
MARGIN = 2,
FUN = "/") * 100 %>%
round(digits = 2) %>%
apply(., MARGIN = 2, FUN = paste0, "%")
Error in apply(., MARGIN = 2, FUN = paste0, "%") :
dim(X) must have a positive length
为什么上面使用 %>%
的方法不起作用?
问题在于尾随数字 100 及其进入管道的方式。用 ()
包围并应用可以正常工作,或者对于涉及较少的解决方案:
(sweep(df,
STATS = as.matrix(df[rownames(df) == "Total",]),
MARGIN = 2,
FUN = "/") * 100) %>% round(digits = 2) %>%
mutate_all(function(x)sprintf('%s%%', x))
Total A1 A2 A3 A4 A5 A6 A7
1 65.09% 68.55% 76.31% 63.88% 90.68% 72.07% 73.74% 20.65%
2 32.6% 31.2% 19.97% 33.25% 9.32% 25.47% 26.26% 73.23%
3 2.3% 0.25% 3.71% 2.87% 0% 2.46% 0% 6.12%
4 100% 100% 100% 100% 100% 100% 100% 100%
编辑以说明原因..
%>%
接受两个参数,并生成一个环境。参数是 lhs
和 rhs
,其中 left-hand-side 是输入值
sweep(df,
STATS = as.matrix(df[rownames(df) == "Total",]),
MARGIN = 2,
FUN = "/") * 100 %>% print
[1] 100
Total A1 A2 A3 A4 A5 A6 A7
B1 65.094340 68.5499058 76.312420 63.875598 90.677966 72.065728 73.74302 20.650096
B2 32.603774 31.1989956 19.974392 33.253589 9.322034 25.469484 26.25698 73.231358
B3 2.301887 0.2510986 3.713188 2.870813 0.000000 2.464789 0.00000 6.118547
Total 100.000000 100.0000000 100.000000 100.000000 100.000000 100.000000 100.00000 100.000000
注意 [1] 100
现在 parens
(sweep(df,
STATS = as.matrix(df[rownames(df) == "Total",]),
MARGIN = 2,
FUN = "/") * 100) %>% print
Total A1 A2 A3 A4 A5 A6 A7
B1 65.094340 68.5499058 76.312420 63.875598 90.677966 72.065728 73.74302 20.650096
B2 32.603774 31.1989956 19.974392 33.253589 9.322034 25.469484 26.25698 73.231358
B3 2.301887 0.2510986 3.713188 2.870813 0.000000 2.464789 0.00000 6.118547
Total 100.000000 100.0000000 100.000000 100.000000 100.000000 100.000000 100.00000 100.000000
括号也有效
sweep(df,
STATS = as.matrix(df[rownames(df) == "Total",]),
MARGIN = 2,
FUN = "/") %>% {
round(. * 100, 2) %>% mutate_all(function(x)sprintf('%s%%',x))
}
Total A1 A2 A3 A4 A5 A6 A7
1 65.09% 68.55% 76.31% 63.88% 90.68% 72.07% 73.74% 20.65%
2 32.6% 31.2% 19.97% 33.25% 9.32% 25.47% 26.26% 73.23%
3 2.3% 0.25% 3.71% 2.87% 0% 2.46% 0% 6.12%
4 100% 100% 100% 100% 100% 100% 100% 100%