R:根据其他行值重命名列表中的列

R: rename columns in list based on other row value

我从 matlab 导入了数据并且有一个大列表(超过 1000 个列表元素),我从中创建了以下示例数据集 data 只有两个列表元素。

data <- structure(list(TEST.DATA.1.1 = structure(list(ID = c(2, 2, 2), YEAR = c(1990, 1991, 1992), DATA.1 = c(10, 20, 30), DATA.NAME = structure(c(1L, 1L, 1L), class = "factor", .Label = "Test"), Remarks = c(1990, 1991, 1992)), .Names = c("ID", "YEAR", "DATA.1", "DATA.NAME", "Remarks"), row.names = c(NA, -3L), class = "data.frame"), TEST.DATA.2.1 = structure(list(ID = c(4, 4), YEAR = c(2000, 2001), DATA.1 = c(55, 60), DATA.2 = c(0, 2), DATA.3 = c(4, 6), DATA.NAME.structure..n1....Dim...c.1L..1L.. = structure(c(1L,1L), class = "factor", .Label = "n1"), DATA.NAME.structure..n2....Dim...c.1L..1L.. = structure(c(1L, 1L), class = "factor", .Label = "n2"), DATA.NAME.structure..n3....Dim...c.1L..1L.. = structure(c(1L,1L), class = "factor", .Label = "n3"), Remarks = c(2000,2001)), .Names = c("ID", "YEAR", "DATA.1", "DATA.2", "DATA.3", "DATA.NAME.structure..n1....Dim...c.1L..1L..", "DATA.NAME.structure..n2....Dim...c.1L..1L..", "DATA.NAME.structure..n3....Dim...c.1L..1L..", "Remarks"), row.names = c(NA, -2L), class = "data.frame")), .Names = c("TEST.DATA.1.1", "TEST.DATA.2.1"))

data
$TEST.DATA.1.1
  ID YEAR DATA.1 DATA.NAME Remarks
1  2 1990     10      Test    1990
2  2 1991     20      Test    1991
3  2 1992     30      Test    1992

$TEST.DATA.2.1
  ID YEAR DATA.1 DATA.2 DATA.3 DATA.NAME.structure..n1....Dim...c.1L..1L.. DATA.NAME.structure..n2....Dim...c.1L..1L.. DATA.NAME.structure..n3....Dim...c.1L..1L.. Remarks
1  4 2000     55      0      4                                          n1                                          n2                                          n3    2000
2  4 2001     60      2      6                                          n1                                          n2                                          n3    2001

我正在寻找一种方法来使用列 DATA.NAME 中的名称重命名数据列。有时有多个数据列和各自的名称,例如在第二个列表元素中,有时只有一个,例如在第一个元素中。我正在寻找一种方法来重命名大型列表(> 1000 个列表元素),然后删除 DATA.NAME 列,例如 data_new.

 data_new
  $TEST.DATA.1.1
      ID YEAR Test Remarks
    1  2 1990   10    1990
    2  2 1991   20    1991
    3  2 1992   30    1992

    $TEST.DATA.2.1
      ID YEAR n1 n2 n3 Remarks
    1  4 2000 55  0  4    2000
    2  4 2001 60  2  6    2001

使用 data.table 包的解决方案。

require(data.table)

data <- structure(list(TEST.DATA.1.1 = structure(list(ID = c(2, 2, 2), YEAR = c(1990, 1991, 1992), DATA.1 = c(10, 20, 30), DATA.NAME = structure(c(1L, 1L, 1L), class = "factor", .Label = "Test"), Remarks = c(1990, 1991, 1992)), .Names = c("ID", "YEAR", "DATA.1", "DATA.NAME", "Remarks"), row.names = c(NA, -3L), class = "data.frame"), TEST.DATA.2.1 = structure(list(ID = c(4, 4), YEAR = c(2000, 2001), DATA.1 = c(55, 60), DATA.2 = c(0, 2), DATA.3 = c(4, 6), DATA.NAME.structure..n1....Dim...c.1L..1L.. = structure(c(1L,1L), class = "factor", .Label = "n1"), DATA.NAME.structure..n2....Dim...c.1L..1L.. = structure(c(1L, 1L), class = "factor", .Label = "n2"), DATA.NAME.structure..n3....Dim...c.1L..1L.. = structure(c(1L,1L), class = "factor", .Label = "n3"), Remarks = c(2000,2001)), .Names = c("ID", "YEAR", "DATA.1", "DATA.2", "DATA.3", "DATA.NAME.structure..n1....Dim...c.1L..1L..", "DATA.NAME.structure..n2....Dim...c.1L..1L..", "DATA.NAME.structure..n3....Dim...c.1L..1L..", "Remarks"), row.names = c(NA, -2L), class = "data.frame")), .Names = c("TEST.DATA.1.1", "TEST.DATA.2.1"))

fun <- function(x) {
  x <- data.table(x)
  var1 <- grep("DATA.[0-9]", names(x), value = T)
  var2 <- as.character(unlist(x[1, grep("DATA.NAME", names(x)), with = F]))
  setnames(x, var1, var2)
  x[, grep("DATA.NAME", names(x)) := NULL, with = F]
  return(x)
}

data_new <- lapply(data, fun)

这应该有效...

library(dplyr)

for (i in 1:length(data))
{

  d <- data[[i]]

  # Find the new names
  new_names <- select(d, starts_with('DATA.NAME'))
  new_names <- unlist(new_names[1,])
  names(new_names) <- NULL
  new_names <- as.character(new_names)

  # Remove the columns containing the names
  d <- select(d, -starts_with('DATA.NAME'))

  # Pick which columns we want to replace
  old_names <- names(d)
  to_replace <- grep('DATA.[0-9]+', old_names)

  # Replace those names
  names(d)[to_replace] <- new_names

  #Replace the list element 
  data[[i]] <- d

}

这是基本的 R 方法:

for (i in seq_along(data)) {
    namecis <- grep('^DATA\.NAME',names(data[[i]]));
    datacis <- grep('^DATA\.\d+',names(data[[i]]));
    names(data[[i]])[datacis] <- as.character(unlist(data[[i]][1,namecis]));
    data[[i]][namecis] <- list(NULL);
};
data;
## $TEST.DATA.1.1
##   ID YEAR Test Remarks
## 1  2 1990   10    1990
## 2  2 1991   20    1991
## 3  2 1992   30    1992
##
## $TEST.DATA.2.1
##   ID YEAR n1 n2 n3 Remarks
## 1  4 2000 55  0  4    2000
## 2  4 2001 60  2  6    2001