如何加入整洁的数据集并合并列
How to join tidy datasets and merge the columns
我有两个整洁的小标题,其中一个匹配的键列 (ID) 和几个名称相同但行值不同的列。
我想按 ID 加入这两个 tibbles,并将 df2 的额外测量值、时间戳和值添加到 df1 中的相应列。
到目前为止,我已尝试 full_join、合并、left_join 等:
joined_df <- full_join(df1, df2, by="ID")
但这 returns 带有附加时间、值和测量列(time.x、value.x 等)的小标题。
但是,我想通过 ID 将这些额外的 df2 值添加到 df1 的现有列中,以便生成的 df 添加了行,但没有添加列。
这是一个例子:
df1 <- data.frame(ID = c(1, 2, 3, 4, 1, 2, 3, 4),
time = c(1,2,3,4,5,6,7,8),
value = c(1, 2, 3, 4, 5, 6, 7, 8)
measurement = c(x,s,d,g,u,b,z,e)
xy = c(g,h,j,k,t,d,g,t)
df2 <- data.frame(ID = c(1, 2, 3, 4, 1, 2, 3, 4),
time = c(11,12,13,14,15,16,17,18),
value = c(8, 7, 6, 5, 4, 3, 2, 1),
measurement = c(r,t,z,u,i,o,k,f)
ab = c(j,k,o,l,p,f,b,c)
我需要的是一个连接函数,它通过从 df2 添加的行数扩展 ID 列,并将来自 df2 的额外测量值、值和时间戳包含到 df1 的现有列中。
预期输出为:
df3 <- data.frame(ID = c(1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4),
time = c(1,2,3,4,5,6,7,8,11,12,13,14,15,16,17,18),
value = c(1, 2, 3, 4, 5, 6, 7, 8, 8, 7, 6, 5, 4, 3, 2, 1)
measurement = c(x,s,d,g,u,b,z,e,r,t,z,u,i,o,k,f)
xy = c(g,h,j,k,t,d,g,t,g,h,j,k,t,d,g,t)
ab = c(j,k,o,l,p,f,b,c,j,k,o,l,p,f,b,c))
不知何故我找不到某事。像那样。非常感谢您!
add_missing_columns <- function(from, to) {
to[setdiff(names(from), names(to))] <- from[setdiff(names(from), names(to))]
to
}
df2 <- add_missing_columns(from = df1, to = df2)
df1 <- add_missing_columns(from = df2, to = df1)
res <- rbind(df1, df2)
all.equal(df3, res)
# TRUE
有数据:
df1 <- data.frame(ID = c(1, 2, 3, 4, 1, 2, 3, 4),
time = c(1,2,3,4,5,6,7,8),
value = c(1, 2, 3, 4, 5, 6, 7, 8),
measurement = c(
"x", "s", "d", "g", "u", "b", "z", "e"),
xy = c(
"g", "h", "j", "k", "t", "d", "g", "t"),
stringsAsFactors = FALSE)
df2 <- data.frame(ID = c(1, 2, 3, 4, 1, 2, 3, 4),
time = c(11,12,13,14,15,16,17,18),
value = c(8, 7, 6, 5, 4, 3, 2, 1),
measurement = c(
"r", "t", "z", "u", "i", "o", "k", "f"),
ab = c(
"j", "k", "o", "l", "p", "f", "b", "c"),
stringsAsFactors = FALSE)
df3 <- data.frame(ID = c(1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4),
time = c(1,2,3,4,5,6,7,8,11,12,13,14,15,16,17,18),
value = c(1, 2, 3, 4, 5, 6, 7, 8, 8, 7, 6, 5, 4, 3, 2, 1),
measurement = c(
"x", "s", "d", "g", "u", "b", "z", "e", "r", "t", "z", "u", "i", "o", "k", "f"),
xy = c(
"g", "h", "j", "k", "t", "d", "g", "t", "g", "h", "j", "k", "t", "d", "g", "t"),
ab = c(
"j", "k", "o", "l", "p", "f", "b", "c", "j", "k", "o", "l", "p", "f", "b", "c"),
stringsAsFactors = FALSE)
我现在通过 bind_rows 和 left_join 的组合设法解决了这个问题:
df3 <- bind_rows(df1[,c(2,3,5,6)], df2[,c(1,3,4,5)])
df1_lookup <-
df1 %>% select(ID,xy,cv) %>% distinct()
df2_lookup <-
df2 %>% select(ID,ab) %>% distinct()
df3 %>% left_join(df1_lookup, by="ID") %>% left_join(df2_lookup, by="ID")
谢谢!
我有两个整洁的小标题,其中一个匹配的键列 (ID) 和几个名称相同但行值不同的列。 我想按 ID 加入这两个 tibbles,并将 df2 的额外测量值、时间戳和值添加到 df1 中的相应列。
到目前为止,我已尝试 full_join、合并、left_join 等:
joined_df <- full_join(df1, df2, by="ID")
但这 returns 带有附加时间、值和测量列(time.x、value.x 等)的小标题。
但是,我想通过 ID 将这些额外的 df2 值添加到 df1 的现有列中,以便生成的 df 添加了行,但没有添加列。
这是一个例子:
df1 <- data.frame(ID = c(1, 2, 3, 4, 1, 2, 3, 4),
time = c(1,2,3,4,5,6,7,8),
value = c(1, 2, 3, 4, 5, 6, 7, 8)
measurement = c(x,s,d,g,u,b,z,e)
xy = c(g,h,j,k,t,d,g,t)
df2 <- data.frame(ID = c(1, 2, 3, 4, 1, 2, 3, 4),
time = c(11,12,13,14,15,16,17,18),
value = c(8, 7, 6, 5, 4, 3, 2, 1),
measurement = c(r,t,z,u,i,o,k,f)
ab = c(j,k,o,l,p,f,b,c)
我需要的是一个连接函数,它通过从 df2 添加的行数扩展 ID 列,并将来自 df2 的额外测量值、值和时间戳包含到 df1 的现有列中。 预期输出为:
df3 <- data.frame(ID = c(1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4),
time = c(1,2,3,4,5,6,7,8,11,12,13,14,15,16,17,18),
value = c(1, 2, 3, 4, 5, 6, 7, 8, 8, 7, 6, 5, 4, 3, 2, 1)
measurement = c(x,s,d,g,u,b,z,e,r,t,z,u,i,o,k,f)
xy = c(g,h,j,k,t,d,g,t,g,h,j,k,t,d,g,t)
ab = c(j,k,o,l,p,f,b,c,j,k,o,l,p,f,b,c))
不知何故我找不到某事。像那样。非常感谢您!
add_missing_columns <- function(from, to) {
to[setdiff(names(from), names(to))] <- from[setdiff(names(from), names(to))]
to
}
df2 <- add_missing_columns(from = df1, to = df2)
df1 <- add_missing_columns(from = df2, to = df1)
res <- rbind(df1, df2)
all.equal(df3, res)
# TRUE
有数据:
df1 <- data.frame(ID = c(1, 2, 3, 4, 1, 2, 3, 4),
time = c(1,2,3,4,5,6,7,8),
value = c(1, 2, 3, 4, 5, 6, 7, 8),
measurement = c(
"x", "s", "d", "g", "u", "b", "z", "e"),
xy = c(
"g", "h", "j", "k", "t", "d", "g", "t"),
stringsAsFactors = FALSE)
df2 <- data.frame(ID = c(1, 2, 3, 4, 1, 2, 3, 4),
time = c(11,12,13,14,15,16,17,18),
value = c(8, 7, 6, 5, 4, 3, 2, 1),
measurement = c(
"r", "t", "z", "u", "i", "o", "k", "f"),
ab = c(
"j", "k", "o", "l", "p", "f", "b", "c"),
stringsAsFactors = FALSE)
df3 <- data.frame(ID = c(1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4),
time = c(1,2,3,4,5,6,7,8,11,12,13,14,15,16,17,18),
value = c(1, 2, 3, 4, 5, 6, 7, 8, 8, 7, 6, 5, 4, 3, 2, 1),
measurement = c(
"x", "s", "d", "g", "u", "b", "z", "e", "r", "t", "z", "u", "i", "o", "k", "f"),
xy = c(
"g", "h", "j", "k", "t", "d", "g", "t", "g", "h", "j", "k", "t", "d", "g", "t"),
ab = c(
"j", "k", "o", "l", "p", "f", "b", "c", "j", "k", "o", "l", "p", "f", "b", "c"),
stringsAsFactors = FALSE)
我现在通过 bind_rows 和 left_join 的组合设法解决了这个问题:
df3 <- bind_rows(df1[,c(2,3,5,6)], df2[,c(1,3,4,5)])
df1_lookup <-
df1 %>% select(ID,xy,cv) %>% distinct()
df2_lookup <-
df2 %>% select(ID,ab) %>% distinct()
df3 %>% left_join(df1_lookup, by="ID") %>% left_join(df2_lookup, by="ID")
谢谢!