使用属性表在列表中的特定元素之间应用函数

Using attribute tables to apply a function between specific elements in lists

我有两个列表对象。 l1 包含已通过路径文件读入的信息。 l2 是一个值列表,其名称组件与 l1 中的值相似。我已经根据列表中元素的名称为两个列表分配了属性。我想使用我分配给列表的属性达到我的预期结果。

例如:我想在 l1 中具有属性 id 的元素之间应用函数 mean() 到具有属性 yearl2 中也是“2013”​​。当年份的属性是“2016”时,我想做类似的事情。

# File List
oldl1 <- list(2,3,4,5)
names(oldl1) <- c("C:/Users/2013_mean.csv",
               "C:/Users/2013_median.csv",
               "C:/Users/2016_mean.csv",
               "C:/Users/2016_median.csv"
               )

newl1 <- list(2,3,4,5,8,9)
names(newl1) <- c("C:/Users/2013_mean.csv",
               "C:/Users/2013_median.csv",
               "C:/Users/2016_mean.csv",
               "C:/Users/2016_median.csv",
               "C:/Users/2017_mean.csv",
               "C:/Users/2017_median.csv"
               )

attributes(l1) <- data.frame(id = sub("\.csv", "", basename(names(l1))),
                             year = trimws(basename(names(l1)), whitespace = "_.*"))

# Other List
l2 <- list(8,9,10,15,1)
names(l2) <- c("2013_A",
               "2013_B",
               "2013_C",
               "2016_D",
               "2016_E")

attributes(l2) <- data.frame(year =  trimws(names(l2), whitespace = "_.*"))


expected <- list(mean(c(l1[[1]], l2[[1]])),
                 mean(c(l1[[1]], l2[[2]])),
                 mean(c(l1[[1]], l2[[3]])),
                 mean(c(l1[[3]], l2[[4]])),
                 mean(c(l1[[3]], l2[[5]]))
)

我们可以使用属性进行拆分和匹配并取均值

yrs <- intersect(attr(l1, "year"), attr(l2, "year"))
i1 <- grepl("mean", attr(l1, "id"))
i12 <-  attr(l1, "year") %in% yrs
i1 <- i1 & i12

i2 <- attr(l2, "year") %in% yrs
l2new <- l2[i2]


l1new <- l1[i1]
attr(l1new, "year") <- attr(l1, "year")[i1]
out <- do.call(c, Map(function(x, y) lapply(x, function(z)
      mean(c(z, y))), split(l2new, attr(l2, 'year')[i2]), l1new))
names(out) <- NULL

-检查 OP 的预期

> identical(out, expected)
[1] TRUE

或者另一种选择是将具有属性的 list 转换为 data.frame,执行 merge 并使用 rowMeans,然后转换为 list as.list

as.list(rowMeans(merge(transform(data.frame(attributes(l2)),
   l2 = unlist(l2)), 
   subset(transform(data.frame(attributes(l1)), l1 = unlist(l1)), 
   grepl("mean", id), select = c(year, l1)), all.x = TRUE)[-1]))

-输出

[[1]]
[1] 5

[[2]]
[1] 5.5

[[3]]
[1] 6

[[4]]
[1] 9.5

[[5]]
[1] 2.5