计算关于 R 中列的唯一观察对之间的关系
Calculating a relationship between unique pairs of observations about a colum in R
我正在尝试计算两种商品出现在同一组中的概率。
我有以下数据,
data <- data.frame(group = c(1,1,1,1,2,2,2,2,3,3,3,3),
commodity = c("Wheat", "Coal", "Steel", "Iron", "Wheat", "Coal", "Steel", "Iron", "Wheat", "Coal", "Steel", "Iron"),
quantity = c(5,10,0,5,20,5,10,0,0,10,15,15),
proportion = c(0.25,0.5,0,0.25,0.57,0.14,0.29,0,0,0.25,0.375,0.375))
我想计算每个唯一可能的商品对的比例(产品总和除以 2)。
结果应该是这样的,
result <- data.frame(commodity1 = c("Wheat", "Wheat", "Wheat", "Coal", "Coal", "Steel"),
commodity2 = c("Coal", "Steel", "Iron", "Steel", "Iron", "Iron"),
result = c(0.103,0.082,0.031,0.067,0.109,0.070))
以小麦 - 煤炭为例,计算结果为 (0.25 * 0.5/2)+(0.57 * 0.14/2)+(0 * 0.25/2)=0.103
我已将商品对隔离成一个单独的 data.frame 以将结果变异为并尝试进行 rowwise() 操作。
如有任何建议,我们将不胜感激。
虽然不太干净,但似乎可以使用
library(tidyverse)
#make an intermediate data.frame say `dd`
data %>% select(-quantity) %>%
pivot_longer(proportion) %>%
select(-name, -group) %>%
group_by(commodity) %>%
nest(data = c(value)) -> dd
t(combn(unique(data$commodity), 2)) %>% as.data.frame() %>%
mutate(result = map2_dbl(V1, V2,
~ sum(unlist(dd$data[match(.x, dd$commodity)]) * unlist(dd$data[match(.y, dd$commodity)]))/2
)
)
V1 V2 result
1 Wheat Coal 0.1024000
2 Wheat Steel 0.0826500
3 Wheat Iron 0.0312500
4 Coal Steel 0.0671750
5 Coal Iron 0.1093750
6 Steel Iron 0.0703125
library(tidyverse)
crossing(commodity1 = data$commodity, commodity2 = data$commodity, group = data$group) %>%
filter(commodity1 < commodity2) %>%
left_join(data, by = c("commodity1" = "commodity", "group")) %>%
left_join(data, by = c("commodity2" = "commodity", "group")) %>%
mutate(avg = proportion.x * proportion.y / 2) %>%
group_by(commodity1, commodity2) %>%
summarize(result = sum(avg), .groups = "drop")
# A tibble: 6 x 3
commodity1 commodity2 result
* <chr> <chr> <dbl>
1 Coal Iron 0.109
2 Coal Steel 0.0672
3 Coal Wheat 0.102
4 Iron Steel 0.0703
5 Iron Wheat 0.0312
6 Steel Wheat 0.0826
我正在尝试计算两种商品出现在同一组中的概率。
我有以下数据,
data <- data.frame(group = c(1,1,1,1,2,2,2,2,3,3,3,3),
commodity = c("Wheat", "Coal", "Steel", "Iron", "Wheat", "Coal", "Steel", "Iron", "Wheat", "Coal", "Steel", "Iron"),
quantity = c(5,10,0,5,20,5,10,0,0,10,15,15),
proportion = c(0.25,0.5,0,0.25,0.57,0.14,0.29,0,0,0.25,0.375,0.375))
我想计算每个唯一可能的商品对的比例(产品总和除以 2)。
结果应该是这样的,
result <- data.frame(commodity1 = c("Wheat", "Wheat", "Wheat", "Coal", "Coal", "Steel"),
commodity2 = c("Coal", "Steel", "Iron", "Steel", "Iron", "Iron"),
result = c(0.103,0.082,0.031,0.067,0.109,0.070))
以小麦 - 煤炭为例,计算结果为 (0.25 * 0.5/2)+(0.57 * 0.14/2)+(0 * 0.25/2)=0.103
我已将商品对隔离成一个单独的 data.frame 以将结果变异为并尝试进行 rowwise() 操作。
如有任何建议,我们将不胜感激。
虽然不太干净,但似乎可以使用
library(tidyverse)
#make an intermediate data.frame say `dd`
data %>% select(-quantity) %>%
pivot_longer(proportion) %>%
select(-name, -group) %>%
group_by(commodity) %>%
nest(data = c(value)) -> dd
t(combn(unique(data$commodity), 2)) %>% as.data.frame() %>%
mutate(result = map2_dbl(V1, V2,
~ sum(unlist(dd$data[match(.x, dd$commodity)]) * unlist(dd$data[match(.y, dd$commodity)]))/2
)
)
V1 V2 result
1 Wheat Coal 0.1024000
2 Wheat Steel 0.0826500
3 Wheat Iron 0.0312500
4 Coal Steel 0.0671750
5 Coal Iron 0.1093750
6 Steel Iron 0.0703125
library(tidyverse)
crossing(commodity1 = data$commodity, commodity2 = data$commodity, group = data$group) %>%
filter(commodity1 < commodity2) %>%
left_join(data, by = c("commodity1" = "commodity", "group")) %>%
left_join(data, by = c("commodity2" = "commodity", "group")) %>%
mutate(avg = proportion.x * proportion.y / 2) %>%
group_by(commodity1, commodity2) %>%
summarize(result = sum(avg), .groups = "drop")
# A tibble: 6 x 3
commodity1 commodity2 result
* <chr> <chr> <dbl>
1 Coal Iron 0.109
2 Coal Steel 0.0672
3 Coal Wheat 0.102
4 Iron Steel 0.0703
5 Iron Wheat 0.0312
6 Steel Wheat 0.0826