根据 r 中另一个数据框中的特征获取每次观察的平均值
Get mean per observation based in characteristics in another dataframe in r
我有两个数据框
df1
# var1 var2
# 1 X01 Red
# 2 X02 Green
# 3 X03 Red
# 4 X04 Yellow
# 5 X05 Red
# 6 X06 Green
df2
# X01 X02 X03 ...
# 1 1 0.1 2.1
# 2 2 0.2 2.2
# 3 3 0.3 2.3
# 4 4 0.4 2.4
# 5 5 0.5 2.5
# 6 6 0.6 2.6
是否有可能在 df2 中得到一个名为 'Red' 的新变量作为每次观察的平均值?
df2
# X01 X02 X03 Red
# 1 1 0.1 2.1 mean obs1
# 2 2 0.2 2.2 mean obs2
# 3 3 0.3 2.3 mean obs3
# 4 4 0.4 2.4 mean obs4
# 5 5 0.5 2.5 mean obs5
# 6 6 0.6 2.6 mean obs6
我们可以使用 base R
split
根据第一个数据 'var1' 的 match
ing 值拆分第二个数据集,Filter
list
元素仅在超过 1 列时应用 rowMeans
,然后遍历 list
,应用 rowMeans
并将它们分配为 [=33 中的新列=]
lst1 <- split.default(df2, df1$var2[match(names(df2), df1$var1)])
lst2 <- Filter(function(x) ncol(x) > 1, lst1)
tmp <- lapply(lst2,
rowMeans, na.rm = TRUE)
df2[names(tmp)] <- tmp
-输出
> df2
X01 X02 X03 Red
1 1 0.1 2.1 1.55
2 2 0.2 2.2 2.10
3 3 0.3 2.3 2.65
4 4 0.4 2.4 3.20
5 5 0.5 2.5 3.75
6 6 0.6 2.6 4.30
或在 for
循环中
for(nm in unique(df1$var2)) {
v1 <- intersect(df1$var1[df1$var2 == nm], names(df2))
if(length(v1) > 1) df2[[nm]] <- rowMeans(df2[v1], na.rm = TRUE)
}
数据
df1 <- structure(list(var1 = c("X01", "X02", "X03", "X04", "X05", "X06"
), var2 = c("Red", "Green", "Red", "Yellow", "Red", "Green")),
class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6"))
df2 <- structure(list(X01 = 1:6, X02 = c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6
), X03 = c(2.1, 2.2, 2.3, 2.4, 2.5, 2.6)),
class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6"))
library(tidyverse)
df2 %>%
rowid_to_column() %>%
pivot_longer(-rowid, names_to = 'var1') %>%
left_join(df1) %>%
pivot_wider(rowid, names_from = var2, values_fn = mean) %>%
cbind(df2,.)
X01 X02 X03 rowid Red Green
1 1 0.1 2.1 1 1.55 0.1
2 2 0.2 2.2 2 2.10 0.2
3 3 0.3 2.3 3 2.65 0.3
4 4 0.4 2.4 4 3.20 0.4
5 5 0.5 2.5 5 3.75 0.5
6 6 0.6 2.6 6 4.30 0.6
我有两个数据框
df1
# var1 var2
# 1 X01 Red
# 2 X02 Green
# 3 X03 Red
# 4 X04 Yellow
# 5 X05 Red
# 6 X06 Green
df2
# X01 X02 X03 ...
# 1 1 0.1 2.1
# 2 2 0.2 2.2
# 3 3 0.3 2.3
# 4 4 0.4 2.4
# 5 5 0.5 2.5
# 6 6 0.6 2.6
是否有可能在 df2 中得到一个名为 'Red' 的新变量作为每次观察的平均值?
df2
# X01 X02 X03 Red
# 1 1 0.1 2.1 mean obs1
# 2 2 0.2 2.2 mean obs2
# 3 3 0.3 2.3 mean obs3
# 4 4 0.4 2.4 mean obs4
# 5 5 0.5 2.5 mean obs5
# 6 6 0.6 2.6 mean obs6
我们可以使用 base R
split
根据第一个数据 'var1' 的 match
ing 值拆分第二个数据集,Filter
list
元素仅在超过 1 列时应用 rowMeans
,然后遍历 list
,应用 rowMeans
并将它们分配为 [=33 中的新列=]
lst1 <- split.default(df2, df1$var2[match(names(df2), df1$var1)])
lst2 <- Filter(function(x) ncol(x) > 1, lst1)
tmp <- lapply(lst2,
rowMeans, na.rm = TRUE)
df2[names(tmp)] <- tmp
-输出
> df2
X01 X02 X03 Red
1 1 0.1 2.1 1.55
2 2 0.2 2.2 2.10
3 3 0.3 2.3 2.65
4 4 0.4 2.4 3.20
5 5 0.5 2.5 3.75
6 6 0.6 2.6 4.30
或在 for
循环中
for(nm in unique(df1$var2)) {
v1 <- intersect(df1$var1[df1$var2 == nm], names(df2))
if(length(v1) > 1) df2[[nm]] <- rowMeans(df2[v1], na.rm = TRUE)
}
数据
df1 <- structure(list(var1 = c("X01", "X02", "X03", "X04", "X05", "X06"
), var2 = c("Red", "Green", "Red", "Yellow", "Red", "Green")),
class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6"))
df2 <- structure(list(X01 = 1:6, X02 = c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6
), X03 = c(2.1, 2.2, 2.3, 2.4, 2.5, 2.6)),
class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6"))
library(tidyverse)
df2 %>%
rowid_to_column() %>%
pivot_longer(-rowid, names_to = 'var1') %>%
left_join(df1) %>%
pivot_wider(rowid, names_from = var2, values_fn = mean) %>%
cbind(df2,.)
X01 X02 X03 rowid Red Green
1 1 0.1 2.1 1 1.55 0.1
2 2 0.2 2.2 2 2.10 0.2
3 3 0.3 2.3 3 2.65 0.3
4 4 0.4 2.4 4 3.20 0.4
5 5 0.5 2.5 5 3.75 0.5
6 6 0.6 2.6 6 4.30 0.6