使用 apply 函数向 df 列表中的每个 df 添加一个新列

Add a new column to each df in a list of dfs using apply function

你好,我有一个数据框列表,我想在其中为每个数据框添加新列。我当前的 for 循环方法可以完成工作,但是我一直在寻找一种优雅的方法,来自 apply 函数族。

这是一个代表-


week_no<-function(x){
  year<-as.numeric(format(x, "%Y"))
  Jan1_day<-format(as.Date(paste(year,"-01-01", sep = "")), "%A")
  
  if (Jan1_day=="Monday"){
    
    week<-as.numeric(format(x,"%j")) %/% 7 + 1
    
  } else {
    dateseq<-data.frame(seq(as.Date(paste(year, "-01-01", sep = "")), as.Date(paste(year, "-01-08", sep = "")), by="+1 day"))
    colnames(dateseq)<-"dates"
    dateseq$day<-format(dateseq$dates,"%A")
    if(x < dateseq[dateseq$day=="Monday",1]){
      week<-1
    } else {
      sub_Monday<-subset(dateseq, day=="Monday")
      sub_Monday<-sub_Monday[order(sub_Monday$dates),]
      first_Monday<-sub_Monday[1,1]
      week<-(as.numeric(format(x,"%j"))-as.numeric(format(first_Monday,"%j")))%/% 7+2
    }
    
  }
}


mapp_dfs <- list(
              l1= data.frame(Timestamp= c("1993-08-30T00","2002-01-16T00","2010-01-13T00","2016-11-08T00","2019-05-13T00"),
                         Value= c("13.53","1.55", "5.63", "7.32", "7.89"),
                         `Q code`= c("1","2","3","4","5")),
              l2= data.frame(Timestamp= c("1994-07-10T00","2003-01-26T00","2011-01-13T00","2016-11-08T00","2019-05-23T00"),
                             Value= c("13.53","1.55", "5.63", "9.31", "5.63"),
                             `Q code`= c("1","1","3","4","1")),
              l3= data.frame(Timestamp= c("1995-08-30T00","2004-01-16T00","2012-01-13T00","2013-11-08T00","2019-06-03T00"),
                             Value= c("1.36","5.63", "5.63", "7.32", "5.22"),
                             `Q code`= c("2","2","5","4","4"))
            )


lapply(mapp_dfs, transform, week_nums = week_no(as.Date(unlist(strsplit(Timestamp, "T"))[ c(TRUE,FALSE) ])))

#********************** This method works *******************

for(i in seq_along(mapp_dfs)){

    mapp_dfs[[i]]$week_nums <-  sapply(as.Date(unlist(strsplit(mapp_dfs[[i]]$Timestamp, "T"))[ c(TRUE,FALSE) ]),
                          function(x) week_no(x))

}

我确实尝试了一些方法,但它们会导致错误


lapply(mapp_dfs, function(x) 
  cbind(x, week_nums = week_no(as.Date(unlist(strsplit(x$Timestamp, "T"))[ c(TRUE,FALSE) ]))))

#Error in seq.Date(as.Date(paste(year, "-01-01", sep = "")), as.Date(paste(year,  : 
#  'from' must be of length 1
#In addition: Warning message:
#In if (Jan1_day == "Monday") { :
 
#Error in seq.Date(as.Date(paste(year, "-01-01", sep = "")), as.Date(paste(year,  : 
# 'from' must be of length 1 
mapply(cbind, mapp_dfs, "week_nums"=week_no(as.Date(unlist(strsplit(Timestamp, "T"))[ c(TRUE,FALSE) ])), SIMPLIFY=F)

# Error in strsplit(Timestamp, "T") : object 'Timestamp' not found 

函数 week_no 未向量化,因此您需要某种循环来遍历 strsplit 之后的每个值。在 for 循环中你使用 sapply,所以我们可以在这里使用相同的。

lapply(mapp_dfs, function(x) cbind(x, 
       week_nums = sapply(as.Date(unlist(strsplit(x$Timestamp, "T"))[c(TRUE,FALSE)]), week_no)))

#$l1
#      Timestamp Value Q.code week_nums
#1 1993-08-30T00 13.53      1        36
#2 2002-01-16T00  1.55      2         3
#3 2010-01-13T00  5.63      3         3
#4 2016-11-08T00  7.32      4        46
#5 2019-05-13T00  7.89      5        20

#$l2
#      Timestamp Value Q.code week_nums
#1 1994-07-10T00 13.53      1        28
#2 2003-01-26T00  1.55      1         4
#3 2011-01-13T00  5.63      3         3
#4 2016-11-08T00  9.31      4        46
#5 2019-05-23T00  5.63      1        21

#$l3
#      Timestamp Value Q.code week_nums
#1 1995-08-30T00  1.36      2        36
#2 2004-01-16T00  5.63      2         3
#3 2012-01-13T00  5.63      5         3
#4 2013-11-08T00  7.32      4        45
#5 2019-06-03T00  5.22      4        23