尝试使用 dplyr 在 R 中查找行总和,然后过滤掉列
Trying to find row sums in R using dplyr, then filter out columns
所以我认为这会更简单一些,但事实证明并非如此。
我想获取每行的行总数并过滤行总数不等于 0 的行,但由于第一列是标识符,我只需要对除第一列以外的所有行求和(使用 dplyr ).此外,任何列 = 0 也应被过滤掉。
df %>%
mutate(Total = rowSums(.[2:.N])) %>%
filter(Total != 0)
但是我收到以下错误:
Error in 2:.N : argument of length 0
这是 df:
df <- structure(list(Sequence = c(12, 600, 620, 630, 660, 770, 780,
800, 6900), Period_1 = c(1, 0, 0, 0, 0, 0, 0, 0, 3), Period_2 = c(2,
0, 0, 0, 0, 0, 0, 0, 4), Period_3 = c(23232, 0, 0, 0, 0, 0, 0,
0, -103), Period_4 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_5 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_6 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_7 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_8 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_9 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_10 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_11 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_12 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_13 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_14 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_15 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_16 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_17 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_18 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_19 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_20 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_21 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_22 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_23 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_24 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_25 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_26 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_27 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_28 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_29 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_30 = c(NA, -1170303.36, -449711.72,
0, 0, -30548246.94, -42287004.62, 2.98818, NA), Period_31 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_32 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_33 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_34 = c(NA,
0, 0, 497378.9, 191127.5, 0, 0, 0, NA), Period_35 = c(NA, 0,
0, 0, 0, 0, 0, 0, NA), Period_36 = c(NA, 0, 0, 0, 0, 0, 0, 0,
NA), Period_37 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_38 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_39 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_40 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_41 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_42 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_43 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_44 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_45 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_46 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_47 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_48 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_49 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_50 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_51 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_52 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_53 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_54 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_55 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_56 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_57 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_58 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_59 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_60 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_61 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_62 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_63 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_64 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_65 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_66 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_67 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_68 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_69 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_70 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_71 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_72 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_73 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_74 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_75 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_76 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_77 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_78 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_79 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_80 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_81 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_82 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_83 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_84 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_85 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_86 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_87 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_88 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_89 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_90 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_91 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_92 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_93 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_94 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_95 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_96 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_97 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_98 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_99 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_100 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_101 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_102 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_103 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_104 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_105 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_106 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_107 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_108 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_109 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_110 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_111 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_112 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_113 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_114 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_115 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_116 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_117 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_118 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA)), row.names = c(NA,
-9L), class = c("tbl_df", "tbl", "data.frame"))
我已经研究了几种方法来执行此操作,但我无法在没有错误的情况下运行它,因为我排除了第一列。列筛选器的行为也类似,即应删除总数等于 0 的任何列。
library(janitor)
# Drop rows where the row total is 0
# adorn_totals() skips the first column, assuming it's an identifier
out <- df %>%
adorn_totals(c("row", "col")) %>%
filter(Total != 0)
# Then drop columns where the column total is zero
out[, unlist(out[nrow(out),]) != 0]
我将您的输入数据缩短为:
df <- data.frame(Sequence = c(12, 600, 620, 630, 660, 770, 780,
800, 6900), Period_1 = c(1, 0, 0, 0, 0, 0, 0, 0, 3), Period_2 = c(2,
0, 0, 0, 0, 0, 0, 0, 4), Period_3 = c(23232, 0, 0, 0, 0, 0, 0,
0, -103), Period_4 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA))
所以我认为这会更简单一些,但事实证明并非如此。
我想获取每行的行总数并过滤行总数不等于 0 的行,但由于第一列是标识符,我只需要对除第一列以外的所有行求和(使用 dplyr ).此外,任何列 = 0 也应被过滤掉。
df %>%
mutate(Total = rowSums(.[2:.N])) %>%
filter(Total != 0)
但是我收到以下错误:
Error in 2:.N : argument of length 0
这是 df:
df <- structure(list(Sequence = c(12, 600, 620, 630, 660, 770, 780,
800, 6900), Period_1 = c(1, 0, 0, 0, 0, 0, 0, 0, 3), Period_2 = c(2,
0, 0, 0, 0, 0, 0, 0, 4), Period_3 = c(23232, 0, 0, 0, 0, 0, 0,
0, -103), Period_4 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_5 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_6 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_7 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_8 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_9 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_10 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_11 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_12 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_13 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_14 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_15 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_16 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_17 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_18 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_19 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_20 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_21 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_22 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_23 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_24 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_25 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_26 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_27 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_28 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_29 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_30 = c(NA, -1170303.36, -449711.72,
0, 0, -30548246.94, -42287004.62, 2.98818, NA), Period_31 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_32 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_33 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_34 = c(NA,
0, 0, 497378.9, 191127.5, 0, 0, 0, NA), Period_35 = c(NA, 0,
0, 0, 0, 0, 0, 0, NA), Period_36 = c(NA, 0, 0, 0, 0, 0, 0, 0,
NA), Period_37 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_38 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_39 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_40 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_41 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_42 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_43 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_44 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_45 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_46 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_47 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_48 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_49 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_50 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_51 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_52 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_53 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_54 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_55 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_56 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_57 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_58 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_59 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_60 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_61 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_62 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_63 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_64 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_65 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_66 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_67 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_68 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_69 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_70 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_71 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_72 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_73 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_74 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_75 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_76 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_77 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_78 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_79 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_80 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_81 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_82 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_83 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_84 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_85 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_86 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_87 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_88 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_89 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_90 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_91 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_92 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_93 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_94 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_95 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_96 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_97 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_98 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_99 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_100 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_101 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_102 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_103 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_104 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_105 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_106 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_107 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_108 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_109 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_110 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_111 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_112 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_113 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_114 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_115 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA), Period_116 = c(NA,
0, 0, 0, 0, 0, 0, 0, NA), Period_117 = c(NA, 0, 0, 0, 0, 0, 0,
0, NA), Period_118 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA)), row.names = c(NA,
-9L), class = c("tbl_df", "tbl", "data.frame"))
我已经研究了几种方法来执行此操作,但我无法在没有错误的情况下运行它,因为我排除了第一列。列筛选器的行为也类似,即应删除总数等于 0 的任何列。
library(janitor)
# Drop rows where the row total is 0
# adorn_totals() skips the first column, assuming it's an identifier
out <- df %>%
adorn_totals(c("row", "col")) %>%
filter(Total != 0)
# Then drop columns where the column total is zero
out[, unlist(out[nrow(out),]) != 0]
我将您的输入数据缩短为:
df <- data.frame(Sequence = c(12, 600, 620, 630, 660, 770, 780,
800, 6900), Period_1 = c(1, 0, 0, 0, 0, 0, 0, 0, 3), Period_2 = c(2,
0, 0, 0, 0, 0, 0, 0, 4), Period_3 = c(23232, 0, 0, 0, 0, 0, 0,
0, -103), Period_4 = c(NA, 0, 0, 0, 0, 0, 0, 0, NA))