R:从列到行重塑数据并根据名称添加其他数据
R: reshape data from column to row and add additional data based on name
我正在寻找重塑以下示例的方法 data
data <- structure(list(id = c(2L, 5L, 7L), name = structure(1:3, .Label = c("Test1","Test10", "Test8"), class = "factor"), source = structure(c(1L,3L, 2L), .Label = c("A", "T", "Z"), class = "factor")), .Names = c("id", "name", "source"), class = "data.frame", row.names = c(NA, -3L))
id name source
1 2 Test1 A
2 5 Test10 Z
3 7 Test8 T
进入如下结构
row.names 1.1 2.1 3.1
id 2 5 7
name Test1 Test10 Test8
source A Z T
我怎么能添加第二个 data.frame 说,data2
基于名称到 data
(仅包含匹配名称的数据)?
data2 <- structure(list(name = structure(1L, .Label = "adddata", class = "factor"), Test1 = 10L, Test10 = 12L, Test8 = 17L, Test12 = 7L), .Names = c("name", "Test1", "Test10", "Test8", "Test12"), class = "data.frame", row.names = c(NA, -1L))
data2
name Test1 Test10 Test8 Test12
1 adddata 10 12 17 7
所以最后像下面这样的东西 data.frame 只包含匹配的名字(来自 data2
的 Test12 被排除在外)就是这种情况
datanew
row.names 1.1 2.1 3.1
1 id 2 5 7
2 name Test1 Test10 Test8
3 source A Z T
4 adddata 10 12 17
编辑
我刚刚意识到我的输入数据包含这样的嵌套列表。有没有办法实现这个?
data <- structure(list(`1.1` = structure(list(id = structure(2, .Dim = c(1L, 1L)), name = structure("Test1", .Dim = c(1L, 1L)), source = structure("A", .Dim = c(1L, 1L))), .Names = c("id", "name", "source")), `2.1` = structure(list(id = structure(5, .Dim = c(1L, 1L)), name = structure("Test10", .Dim = c(1L, 1L)), source = structure("Z", .Dim = c(1L, 1L))), .Names = c("id", "name", "source")), `3.1` = structure(list(id = structure(7, .Dim = c(1L, 1L)), name = structure("Test8", .Dim = c(1L, 1L)), source = structure("T", .Dim = c(1L, 1L))), .Names = c("id", "name", "source"))), .Names = c("1.1", "2.1", "3.1"), class = "data.frame", row.names = c("id", "name", "source"))
'data.frame': 3 obs. of 3 variables:
$ 1.1:List of 3
..$ id : num [1, 1] 2
..$ name : chr [1, 1] "Test1"
..$ source: chr [1, 1] "A"
$ 2.1:List of 3
..$ id : num [1, 1] 5
..$ name : chr [1, 1] "Test10"
..$ source: chr [1, 1] "Z"
$ 3.1:List of 3
..$ id : num [1, 1] 7
..$ name : chr [1, 1] "Test8"
..$ source: chr [1, 1] "T"
您可以将第一个数据集 ('data') 和 rbind
输出 ('d1') 与我们使用 match
在 'data' 中的列 'name' 和 'data2'
的列名称之间
d1 <- as.data.frame(t(data), stringsAsFactors=FALSE)
res <- rbind(d1, setNames(data2[match(data$name, names(data2))], names(d1)))
rownames(res)[4] <- as.character(data2$name)
res
# V1 V2 V3
#id 2 5 7
#name Test1 Test10 Test8
#source A Z T
#adddata 10 12 17
或者另一个选项是从 data.table
加入
library(data.table)#v1.9.5+
DT <- setDT(data)[melt(data2, id.var='name', value.name='adddata',
variable.name='name')[-1], on='name', nomatch=0]
DT
# id name source adddata
#1: 2 Test1 A 10
#2: 5 Test10 Z 12
#3: 7 Test8 T 17
我会保留这种格式而不是转置它,因为列的 class 不同。如果我们转置,数字和非数字元素在一列中混合在一起,class 将是 factor
或 character
t(DT)
更新
基于编辑的 "data",我们可以 unlist
将 list
转换为 'data.frame'。然后,我们就可以按照之前的步骤使用了。
data <- setNames(as.data.frame(matrix(unlist(data), ncol=3,
byrow=TRUE)), row.names(data))
DT <- setDT(data)[melt(data2, id.var='name', value.name='adddata',
variable.name='name')[-1], on='name', nomatch=0]
DT
# id name source adddata
#1: 2 Test1 A 10
#2: 5 Test10 Z 12
#3: 7 Test8 T 17
我正在寻找重塑以下示例的方法 data
data <- structure(list(id = c(2L, 5L, 7L), name = structure(1:3, .Label = c("Test1","Test10", "Test8"), class = "factor"), source = structure(c(1L,3L, 2L), .Label = c("A", "T", "Z"), class = "factor")), .Names = c("id", "name", "source"), class = "data.frame", row.names = c(NA, -3L))
id name source
1 2 Test1 A
2 5 Test10 Z
3 7 Test8 T
进入如下结构
row.names 1.1 2.1 3.1
id 2 5 7
name Test1 Test10 Test8
source A Z T
我怎么能添加第二个 data.frame 说,data2
基于名称到 data
(仅包含匹配名称的数据)?
data2 <- structure(list(name = structure(1L, .Label = "adddata", class = "factor"), Test1 = 10L, Test10 = 12L, Test8 = 17L, Test12 = 7L), .Names = c("name", "Test1", "Test10", "Test8", "Test12"), class = "data.frame", row.names = c(NA, -1L))
data2
name Test1 Test10 Test8 Test12
1 adddata 10 12 17 7
所以最后像下面这样的东西 data.frame 只包含匹配的名字(来自 data2
的 Test12 被排除在外)就是这种情况
datanew
row.names 1.1 2.1 3.1
1 id 2 5 7
2 name Test1 Test10 Test8
3 source A Z T
4 adddata 10 12 17
编辑
我刚刚意识到我的输入数据包含这样的嵌套列表。有没有办法实现这个?
data <- structure(list(`1.1` = structure(list(id = structure(2, .Dim = c(1L, 1L)), name = structure("Test1", .Dim = c(1L, 1L)), source = structure("A", .Dim = c(1L, 1L))), .Names = c("id", "name", "source")), `2.1` = structure(list(id = structure(5, .Dim = c(1L, 1L)), name = structure("Test10", .Dim = c(1L, 1L)), source = structure("Z", .Dim = c(1L, 1L))), .Names = c("id", "name", "source")), `3.1` = structure(list(id = structure(7, .Dim = c(1L, 1L)), name = structure("Test8", .Dim = c(1L, 1L)), source = structure("T", .Dim = c(1L, 1L))), .Names = c("id", "name", "source"))), .Names = c("1.1", "2.1", "3.1"), class = "data.frame", row.names = c("id", "name", "source"))
'data.frame': 3 obs. of 3 variables:
$ 1.1:List of 3
..$ id : num [1, 1] 2
..$ name : chr [1, 1] "Test1"
..$ source: chr [1, 1] "A"
$ 2.1:List of 3
..$ id : num [1, 1] 5
..$ name : chr [1, 1] "Test10"
..$ source: chr [1, 1] "Z"
$ 3.1:List of 3
..$ id : num [1, 1] 7
..$ name : chr [1, 1] "Test8"
..$ source: chr [1, 1] "T"
您可以将第一个数据集 ('data') 和 rbind
输出 ('d1') 与我们使用 match
在 'data' 中的列 'name' 和 'data2'
d1 <- as.data.frame(t(data), stringsAsFactors=FALSE)
res <- rbind(d1, setNames(data2[match(data$name, names(data2))], names(d1)))
rownames(res)[4] <- as.character(data2$name)
res
# V1 V2 V3
#id 2 5 7
#name Test1 Test10 Test8
#source A Z T
#adddata 10 12 17
或者另一个选项是从 data.table
library(data.table)#v1.9.5+
DT <- setDT(data)[melt(data2, id.var='name', value.name='adddata',
variable.name='name')[-1], on='name', nomatch=0]
DT
# id name source adddata
#1: 2 Test1 A 10
#2: 5 Test10 Z 12
#3: 7 Test8 T 17
我会保留这种格式而不是转置它,因为列的 class 不同。如果我们转置,数字和非数字元素在一列中混合在一起,class 将是 factor
或 character
t(DT)
更新
基于编辑的 "data",我们可以 unlist
将 list
转换为 'data.frame'。然后,我们就可以按照之前的步骤使用了。
data <- setNames(as.data.frame(matrix(unlist(data), ncol=3,
byrow=TRUE)), row.names(data))
DT <- setDT(data)[melt(data2, id.var='name', value.name='adddata',
variable.name='name')[-1], on='name', nomatch=0]
DT
# id name source adddata
#1: 2 Test1 A 10
#2: 5 Test10 Z 12
#3: 7 Test8 T 17