使用 apply 函数向 df 列表中的每个 df 添加一个新列
Add a new column to each df in a list of dfs using apply function
你好,我有一个数据框列表,我想在其中为每个数据框添加新列。我当前的 for 循环方法可以完成工作,但是我一直在寻找一种优雅的方法,来自 apply
函数族。
这是一个代表-
week_no<-function(x){
year<-as.numeric(format(x, "%Y"))
Jan1_day<-format(as.Date(paste(year,"-01-01", sep = "")), "%A")
if (Jan1_day=="Monday"){
week<-as.numeric(format(x,"%j")) %/% 7 + 1
} else {
dateseq<-data.frame(seq(as.Date(paste(year, "-01-01", sep = "")), as.Date(paste(year, "-01-08", sep = "")), by="+1 day"))
colnames(dateseq)<-"dates"
dateseq$day<-format(dateseq$dates,"%A")
if(x < dateseq[dateseq$day=="Monday",1]){
week<-1
} else {
sub_Monday<-subset(dateseq, day=="Monday")
sub_Monday<-sub_Monday[order(sub_Monday$dates),]
first_Monday<-sub_Monday[1,1]
week<-(as.numeric(format(x,"%j"))-as.numeric(format(first_Monday,"%j")))%/% 7+2
}
}
}
mapp_dfs <- list(
l1= data.frame(Timestamp= c("1993-08-30T00","2002-01-16T00","2010-01-13T00","2016-11-08T00","2019-05-13T00"),
Value= c("13.53","1.55", "5.63", "7.32", "7.89"),
`Q code`= c("1","2","3","4","5")),
l2= data.frame(Timestamp= c("1994-07-10T00","2003-01-26T00","2011-01-13T00","2016-11-08T00","2019-05-23T00"),
Value= c("13.53","1.55", "5.63", "9.31", "5.63"),
`Q code`= c("1","1","3","4","1")),
l3= data.frame(Timestamp= c("1995-08-30T00","2004-01-16T00","2012-01-13T00","2013-11-08T00","2019-06-03T00"),
Value= c("1.36","5.63", "5.63", "7.32", "5.22"),
`Q code`= c("2","2","5","4","4"))
)
lapply(mapp_dfs, transform, week_nums = week_no(as.Date(unlist(strsplit(Timestamp, "T"))[ c(TRUE,FALSE) ])))
#********************** This method works *******************
for(i in seq_along(mapp_dfs)){
mapp_dfs[[i]]$week_nums <- sapply(as.Date(unlist(strsplit(mapp_dfs[[i]]$Timestamp, "T"))[ c(TRUE,FALSE) ]),
function(x) week_no(x))
}
我确实尝试了一些方法,但它们会导致错误
lapply(mapp_dfs, function(x)
cbind(x, week_nums = week_no(as.Date(unlist(strsplit(x$Timestamp, "T"))[ c(TRUE,FALSE) ]))))
#Error in seq.Date(as.Date(paste(year, "-01-01", sep = "")), as.Date(paste(year, :
# 'from' must be of length 1
#In addition: Warning message:
#In if (Jan1_day == "Monday") { :
#Error in seq.Date(as.Date(paste(year, "-01-01", sep = "")), as.Date(paste(year, :
# 'from' must be of length 1
mapply(cbind, mapp_dfs, "week_nums"=week_no(as.Date(unlist(strsplit(Timestamp, "T"))[ c(TRUE,FALSE) ])), SIMPLIFY=F)
# Error in strsplit(Timestamp, "T") : object 'Timestamp' not found
函数 week_no
未向量化,因此您需要某种循环来遍历 strsplit
之后的每个值。在 for
循环中你使用 sapply
,所以我们可以在这里使用相同的。
lapply(mapp_dfs, function(x) cbind(x,
week_nums = sapply(as.Date(unlist(strsplit(x$Timestamp, "T"))[c(TRUE,FALSE)]), week_no)))
#$l1
# Timestamp Value Q.code week_nums
#1 1993-08-30T00 13.53 1 36
#2 2002-01-16T00 1.55 2 3
#3 2010-01-13T00 5.63 3 3
#4 2016-11-08T00 7.32 4 46
#5 2019-05-13T00 7.89 5 20
#$l2
# Timestamp Value Q.code week_nums
#1 1994-07-10T00 13.53 1 28
#2 2003-01-26T00 1.55 1 4
#3 2011-01-13T00 5.63 3 3
#4 2016-11-08T00 9.31 4 46
#5 2019-05-23T00 5.63 1 21
#$l3
# Timestamp Value Q.code week_nums
#1 1995-08-30T00 1.36 2 36
#2 2004-01-16T00 5.63 2 3
#3 2012-01-13T00 5.63 5 3
#4 2013-11-08T00 7.32 4 45
#5 2019-06-03T00 5.22 4 23
你好,我有一个数据框列表,我想在其中为每个数据框添加新列。我当前的 for 循环方法可以完成工作,但是我一直在寻找一种优雅的方法,来自 apply
函数族。
这是一个代表-
week_no<-function(x){
year<-as.numeric(format(x, "%Y"))
Jan1_day<-format(as.Date(paste(year,"-01-01", sep = "")), "%A")
if (Jan1_day=="Monday"){
week<-as.numeric(format(x,"%j")) %/% 7 + 1
} else {
dateseq<-data.frame(seq(as.Date(paste(year, "-01-01", sep = "")), as.Date(paste(year, "-01-08", sep = "")), by="+1 day"))
colnames(dateseq)<-"dates"
dateseq$day<-format(dateseq$dates,"%A")
if(x < dateseq[dateseq$day=="Monday",1]){
week<-1
} else {
sub_Monday<-subset(dateseq, day=="Monday")
sub_Monday<-sub_Monday[order(sub_Monday$dates),]
first_Monday<-sub_Monday[1,1]
week<-(as.numeric(format(x,"%j"))-as.numeric(format(first_Monday,"%j")))%/% 7+2
}
}
}
mapp_dfs <- list(
l1= data.frame(Timestamp= c("1993-08-30T00","2002-01-16T00","2010-01-13T00","2016-11-08T00","2019-05-13T00"),
Value= c("13.53","1.55", "5.63", "7.32", "7.89"),
`Q code`= c("1","2","3","4","5")),
l2= data.frame(Timestamp= c("1994-07-10T00","2003-01-26T00","2011-01-13T00","2016-11-08T00","2019-05-23T00"),
Value= c("13.53","1.55", "5.63", "9.31", "5.63"),
`Q code`= c("1","1","3","4","1")),
l3= data.frame(Timestamp= c("1995-08-30T00","2004-01-16T00","2012-01-13T00","2013-11-08T00","2019-06-03T00"),
Value= c("1.36","5.63", "5.63", "7.32", "5.22"),
`Q code`= c("2","2","5","4","4"))
)
lapply(mapp_dfs, transform, week_nums = week_no(as.Date(unlist(strsplit(Timestamp, "T"))[ c(TRUE,FALSE) ])))
#********************** This method works *******************
for(i in seq_along(mapp_dfs)){
mapp_dfs[[i]]$week_nums <- sapply(as.Date(unlist(strsplit(mapp_dfs[[i]]$Timestamp, "T"))[ c(TRUE,FALSE) ]),
function(x) week_no(x))
}
我确实尝试了一些方法,但它们会导致错误
lapply(mapp_dfs, function(x)
cbind(x, week_nums = week_no(as.Date(unlist(strsplit(x$Timestamp, "T"))[ c(TRUE,FALSE) ]))))
#Error in seq.Date(as.Date(paste(year, "-01-01", sep = "")), as.Date(paste(year, :
# 'from' must be of length 1
#In addition: Warning message:
#In if (Jan1_day == "Monday") { :
#Error in seq.Date(as.Date(paste(year, "-01-01", sep = "")), as.Date(paste(year, :
# 'from' must be of length 1
mapply(cbind, mapp_dfs, "week_nums"=week_no(as.Date(unlist(strsplit(Timestamp, "T"))[ c(TRUE,FALSE) ])), SIMPLIFY=F)
# Error in strsplit(Timestamp, "T") : object 'Timestamp' not found
函数 week_no
未向量化,因此您需要某种循环来遍历 strsplit
之后的每个值。在 for
循环中你使用 sapply
,所以我们可以在这里使用相同的。
lapply(mapp_dfs, function(x) cbind(x,
week_nums = sapply(as.Date(unlist(strsplit(x$Timestamp, "T"))[c(TRUE,FALSE)]), week_no)))
#$l1
# Timestamp Value Q.code week_nums
#1 1993-08-30T00 13.53 1 36
#2 2002-01-16T00 1.55 2 3
#3 2010-01-13T00 5.63 3 3
#4 2016-11-08T00 7.32 4 46
#5 2019-05-13T00 7.89 5 20
#$l2
# Timestamp Value Q.code week_nums
#1 1994-07-10T00 13.53 1 28
#2 2003-01-26T00 1.55 1 4
#3 2011-01-13T00 5.63 3 3
#4 2016-11-08T00 9.31 4 46
#5 2019-05-23T00 5.63 1 21
#$l3
# Timestamp Value Q.code week_nums
#1 1995-08-30T00 1.36 2 36
#2 2004-01-16T00 5.63 2 3
#3 2012-01-13T00 5.63 5 3
#4 2013-11-08T00 7.32 4 45
#5 2019-06-03T00 5.22 4 23