Rowsums 以循环中的列名为条件
Rowsums conditional on column name in a loop
这是这个问题的后续问题:
我的数据框名为 wiod
,如下所示:
VAR1 VAR2 AUS1 ... AUS56 BEL1 ... BEL56 NLD1 ... NLD56
A D 23 ... 99 0 ... 444 123 ... 675
B D 55 ... 6456 0 ... 557 567 ... 4345
我想计算变量的行总和 AUS, BEL, NLD
,然后删除旧变量。像这样:
wiot <- wiot %>%
mutate(AUS = rowSums(.[grep("AUS", names(.))])) %>%
mutate(BEL = rowSums(.[grep("BEL", names(.))])) %>%
mutate(NLD = rowSums(.[grep("NLD", names(.))])) %>%
select(Var1, Var2, AUS, BEL, NLD)
当然,变量组的数量很多,不只是这三个(准确的说是43个)。有没有不使用 43 mutate 命令的方便的方法来做到这一点?
你可以试试这个:
vec <- c("AUS", "BEL", "NLD")
cbind(df[,grep("VAR", names(df))],
sapply(vec, function(x) rowSums(df[,grep(x, names(df))])))
# VAR1 VAR2 AUS BEL NLD
#1 A D 122 444 798
#2 B D 6511 557 4912
您只需要用您的 43 个变量加载 vec
。
数据
df <- structure(list(VAR1 = structure(1:2, .Label = c("A", "B"), class = "factor"),
VAR2 = structure(c(1L, 1L), .Label = "D", class = "factor"),
AUS1 = c(23L, 55L), AUS56 = c(99L, 6456L), BEL1 = c(0L, 0L
), BEL56 = c(444L, 557L), NLD1 = c(123L, 567L), NLD56 = c(675L,
4345L)), .Names = c("VAR1", "VAR2", "AUS1", "AUS56", "BEL1",
"BEL56", "NLD1", "NLD56"), class = "data.frame", row.names = c(NA,
-2L))
它可以更轻松地从宽格式转换为长格式(收集),然后进行汇总,并在需要时转换回宽格式(展开):
library(dplyr)
library(tidyr)
# dataframe from @989
df1 %>%
gather(key = myKey, value = myValue, -c(VAR1, VAR2)) %>%
mutate(myGroup = gsub("\d", "", myKey)) %>%
group_by(VAR1, VAR2, myGroup) %>%
summarise(mySum = sum(myValue)) %>%
spread(key = myGroup, value = mySum)
# Source: local data frame [2 x 5]
# Groups: VAR1, VAR2 [2]
#
# VAR1 VAR2 AUS BEL NLD
# * <fctr> <fctr> <int> <int> <int>
# 1 A D 122 444 798
# 2 B D 6511 557 4912
这是另一个使用 tidyverse
功能但没有 gather
ing
的版本
library(tidyverse)
fSumN1 <- function(dat, pat){
pat1 <- paste(pat, collapse="|")
newN <- paste0(pats, "_sum")
dat1 <- dat %>%
select(-matches(pat1))
dat %>%
select(matches(pat1)) %>%
split.default(sub("\d+", "", names(.))) %>%
map_df(rowSums) %>%
rename_at(.vars = pat, funs(paste0(pat, "_sum"))) %>%
bind_cols(dat1, .)
}
pats <- c("AUS", "AUT")
fSumN1(dfN, pats)
# VAR1 VAR2 VAR3 VAR4 AUS_sum AUT_sum
#1 A D 0 FCK 1246 3076
#2 B D 0 XYC 6678 3349
数据
dfN <- structure(list(VAR1 = c("A", "B"), VAR2 = c("D", "D"), AUS1 = c(23L,
55L), AUS2 = c(234L, 76L), AUS3 = c(34L, 55L), AUS4 = c(856L,
36L), AUS56 = c(99L, 6456L), VAR3 = c(0L, 0L), VAR4 = c("FCK",
"XYC"), AUT1 = c(598L, 774L), AUT2 = c(992L, 503L), AUT3 = c(819L,
944L), AUT4 = c(368L, 717L), AUT56 = c(299L, 411L)), .Names = c("VAR1",
"VAR2", "AUS1", "AUS2", "AUS3", "AUS4", "AUS56", "VAR3", "VAR4",
"AUT1", "AUT2", "AUT3", "AUT4", "AUT56"), row.names = c(NA, -2L
), class = "data.frame")
这是这个问题的后续问题:
我的数据框名为 wiod
,如下所示:
VAR1 VAR2 AUS1 ... AUS56 BEL1 ... BEL56 NLD1 ... NLD56
A D 23 ... 99 0 ... 444 123 ... 675
B D 55 ... 6456 0 ... 557 567 ... 4345
我想计算变量的行总和 AUS, BEL, NLD
,然后删除旧变量。像这样:
wiot <- wiot %>%
mutate(AUS = rowSums(.[grep("AUS", names(.))])) %>%
mutate(BEL = rowSums(.[grep("BEL", names(.))])) %>%
mutate(NLD = rowSums(.[grep("NLD", names(.))])) %>%
select(Var1, Var2, AUS, BEL, NLD)
当然,变量组的数量很多,不只是这三个(准确的说是43个)。有没有不使用 43 mutate 命令的方便的方法来做到这一点?
你可以试试这个:
vec <- c("AUS", "BEL", "NLD")
cbind(df[,grep("VAR", names(df))],
sapply(vec, function(x) rowSums(df[,grep(x, names(df))])))
# VAR1 VAR2 AUS BEL NLD
#1 A D 122 444 798
#2 B D 6511 557 4912
您只需要用您的 43 个变量加载 vec
。
数据
df <- structure(list(VAR1 = structure(1:2, .Label = c("A", "B"), class = "factor"),
VAR2 = structure(c(1L, 1L), .Label = "D", class = "factor"),
AUS1 = c(23L, 55L), AUS56 = c(99L, 6456L), BEL1 = c(0L, 0L
), BEL56 = c(444L, 557L), NLD1 = c(123L, 567L), NLD56 = c(675L,
4345L)), .Names = c("VAR1", "VAR2", "AUS1", "AUS56", "BEL1",
"BEL56", "NLD1", "NLD56"), class = "data.frame", row.names = c(NA,
-2L))
它可以更轻松地从宽格式转换为长格式(收集),然后进行汇总,并在需要时转换回宽格式(展开):
library(dplyr)
library(tidyr)
# dataframe from @989
df1 %>%
gather(key = myKey, value = myValue, -c(VAR1, VAR2)) %>%
mutate(myGroup = gsub("\d", "", myKey)) %>%
group_by(VAR1, VAR2, myGroup) %>%
summarise(mySum = sum(myValue)) %>%
spread(key = myGroup, value = mySum)
# Source: local data frame [2 x 5]
# Groups: VAR1, VAR2 [2]
#
# VAR1 VAR2 AUS BEL NLD
# * <fctr> <fctr> <int> <int> <int>
# 1 A D 122 444 798
# 2 B D 6511 557 4912
这是另一个使用 tidyverse
功能但没有 gather
ing
library(tidyverse)
fSumN1 <- function(dat, pat){
pat1 <- paste(pat, collapse="|")
newN <- paste0(pats, "_sum")
dat1 <- dat %>%
select(-matches(pat1))
dat %>%
select(matches(pat1)) %>%
split.default(sub("\d+", "", names(.))) %>%
map_df(rowSums) %>%
rename_at(.vars = pat, funs(paste0(pat, "_sum"))) %>%
bind_cols(dat1, .)
}
pats <- c("AUS", "AUT")
fSumN1(dfN, pats)
# VAR1 VAR2 VAR3 VAR4 AUS_sum AUT_sum
#1 A D 0 FCK 1246 3076
#2 B D 0 XYC 6678 3349
数据
dfN <- structure(list(VAR1 = c("A", "B"), VAR2 = c("D", "D"), AUS1 = c(23L,
55L), AUS2 = c(234L, 76L), AUS3 = c(34L, 55L), AUS4 = c(856L,
36L), AUS56 = c(99L, 6456L), VAR3 = c(0L, 0L), VAR4 = c("FCK",
"XYC"), AUT1 = c(598L, 774L), AUT2 = c(992L, 503L), AUT3 = c(819L,
944L), AUT4 = c(368L, 717L), AUT56 = c(299L, 411L)), .Names = c("VAR1",
"VAR2", "AUS1", "AUS2", "AUS3", "AUS4", "AUS56", "VAR3", "VAR4",
"AUT1", "AUT2", "AUT3", "AUT4", "AUT56"), row.names = c(NA, -2L
), class = "data.frame")