通过使用 R 组合所有变量来求和值

Sum value by Combine all variable using R

有人可以帮助我使用 R 进行数据操作吗?我有这样的数据(data.train

datex   <- rep(c(rep("01/01/17",6),rep("02/01/17",6),rep("03/01/17",6)),1)
datex <- as.Date(datex, "%d/%m/%y")
Ax  <- rep("A1",18)
Bx <- rep(c(rep("B1",3),rep("B2",3)),3)
Cx <- rep(c("C1","C2","C3"),6)
valx <- 100
for(i in 1:17){valx[i+1] <- valx[i]+1}
data.train <- data.frame(datex, Ax, Bx, Cx, valx)

我需要变量的所有组合,最终形式是这样的

我试过这个代码:

### Library
library(dplyr)
## datex
datex   <- rep(c(rep("01/01/17",6),rep("02/01/17",6),rep("03/01/17",6)),1)
datex <- as.Date(datex, "%d/%m/%y")
Ax  <- rep("A1",18)
Bx <- rep(c(rep("B1",3),rep("B2",3)),3)
Cx <- rep(c("C1","C2","C3"),6)
valx <- 100
for(i in 1:17){valx[i+1] <- valx[i]+1}
data.train <- data.frame(datex, Ax, Bx, Cx, valx)
names.group <- names(data.train)[1:length(data.train)-1]
data.group <- Map(combn, list(names.group), seq_along(names.group), simplify = F) %>% unlist(recursive = F)
find.index <- sapply(data.group, function(x, find.y){
  any(find.y %in% x)
}, find.y = c("datex"))
index.group <- NULL
for(i in 2:length(find.index)){
  if(find.index[i] == "TRUE"){
    index.group[i] <- i
  }
}
index.group[is.na(index.group)] <- 0
for(i in 1:length(data.group)){
  if(index.group[i] == 0){
    data.group[[i]] <- 0
  } else {
    data.group[[i]] <- data.group[[i]]
  }
}
data.group2 <- data.group[sapply(data.group, function(x) any(x != 0))] 
combination.result <- lapply(data.group2, FUN = function(x) {
  do.call(what = group_by_, args = c(list(data.train), x)) %>% summarise(sumVar = sum(valx))
})
combination.result

但我不生产我想要的东西。谢谢

您可以生成长度为 1 的组合,然后生成长度为 2 的组合。使用 paste 创建您的 Variable 列。然后rbindlist你所有的结果得到最终的输出。

library(data.table)
setDT(data.train)
sumCombi <- function(x, mySep="_") {
    data.train[ , sum(Val), by=c("Date", x)][,
        list(Date, 
            Variable=do.call(paste, c(.SD[,x,with=FALSE], list(sep=mySep))), 
            SumVal=V1)]
}

rbindlist(c(
    #combinations with 1 element in each combi
    lapply(c("A", "B", "C"), sumCombi)
    ,
    #combinations with 2 elements in each combi
    lapply(combn(c("A","B","C"), 2, simplify=FALSE), sumCombi)
), use.names=FALSE)

或更多generically/programmatically:

#assuming that your columns are in the middle of the columns while excl. first and last columns
myCols <- names(data.train)[-c(1, ncol(data.train))]

rbindlist(unlist(
    lapply(seq_along(myCols), function(n)
        combn(myCols, n, sumCombi, simplify=FALSE)
    ), recursive=FALSE), 
use.names=FALSE)