根据所有可能的组合将来自不同数据帧的数字相乘
Multiply numbers from different data frames based on all the possible combinations
我有 5 个数据框,如下所示:
df_mon <- data.frame(mon = as.factor(c(6, 7, 8, 9, 10)),
number = c(1.11, 1.02, 0.95, 0.92, 0.72))
df_year <- data.frame(year = as.factor(c(1, 2)),
number = c(1.61, 0.4))
df_cat <- data.frame(cat = c("A", "B", "C"),
number = c(1.11, 1.02, 0.44))
df_bin <- data.frame(bin = as.factor(c(1, 2)),
number = c(1.42, 0.56))
df_cat2 <- data.frame(cat2 = c("A", "B", "C", "D", "AA"),
number = c(0.11, 1.22, 1.34, 0.88, 0.75))
我需要将每个数据框中 'number' 列中的所有数字相互乘以。因此,查看每个数据集中第一列中所有可能的组合,然后取数并将它们相乘。最终结果数据框应如下所示(前 3 个已完成)
results_df <- data.frame(combi = c("mon6_year1_catA_bin1_cat2A", "mon6_year1_catA_bin1_cat2B", "mon6_year1_catA_bin1_cat2C"),
final_number = c(1.11*1.61*1.11*1.42*0.11, 1.11*1.61*1.11*1.42*1.22, 1.11*1.61*1.11*1.42*1.34))
我们可以看到 results_df
中的第一列显示了用于计算 final_number
的组合。第一个示例显示,mon_df cat 6
(1.11) 中的 'number'
列被提取并乘以以下内容:
- 类别 1 (1.61) 来自 df_year
- 类别 A (1.11) 来自 df_cat
- 类别 1 (1.42) 来自 df_bin
- A 类 (0.11) 来自 df_cat2
这个组合的答案是 1.11 x 1.61 x 1.11 x 1.42 x 0.11 = 0.3098。
第二行显示下一个可能的组合等等。
我不确定如何实现这一点,所以任何帮助将不胜感激!
也许你可以像下面那样尝试expand.grid
lst <- list(df_mon, df_year, df_cat, df_bin, df_cat2)
results_df <- data.frame(
combi = do.call(
paste,
c(do.call(
expand.grid,
lapply(lst, function(v) paste0(names(v[1]), v[, 1]))
), sep = "_")
),
final_number = Reduce(
"*",
do.call(
expand.grid,
lapply(lst, `[[`, 2)
)
)
)
这给出了
> head(results_df)
combi final_number
1 mon6_year1_catA_bin1_cat2A 0.30985097
2 mon7_year1_catA_bin1_cat2A 0.28472792
3 mon8_year1_catA_bin1_cat2A 0.26518777
4 mon9_year1_catA_bin1_cat2A 0.25681342
5 mon10_year1_catA_bin1_cat2A 0.20098441
6 mon6_year2_catA_bin1_cat2A 0.07698161
这是使用 dplyr
和 tidyr
的方法。
df_all <- df_mon %>%
full_join(df_year, by = character()) %>% # by = character() ensures cross join
full_join(df_cat, by = character()) %>%
full_join(df_bin, by = character()) %>%
full_join(df_cat2, by = character()) %>%
pivot_longer(cols = c(-mon, -year, -cat, -bin, -cat2)) %>%
group_by(mon, year, cat, bin, cat2) %>%
summarize(final_number = prod(value), .groups = "keep")
# A tibble: 300 x 6
# Groups: mon, year, cat, bin, cat2 [300]
mon year cat bin cat2 final_number
<fct> <fct> <chr> <fct> <chr> <dbl>
1 6 1 A 1 A 0.310
2 6 1 A 1 AA 2.11
3 6 1 A 1 B 3.44
4 6 1 A 1 C 3.77
5 6 1 A 1 D 2.48
6 6 1 A 2 A 0.122
7 6 1 A 2 AA 0.833
8 6 1 A 2 B 1.36
9 6 1 A 2 C 1.49
10 6 1 A 2 D 0.978
# ... with 290 more rows
它将来自其他 data.frames 的变量完整地保留为用于进一步分析的列,但是您可以创建 combi
列并添加一些 paste()
。
我有 5 个数据框,如下所示:
df_mon <- data.frame(mon = as.factor(c(6, 7, 8, 9, 10)),
number = c(1.11, 1.02, 0.95, 0.92, 0.72))
df_year <- data.frame(year = as.factor(c(1, 2)),
number = c(1.61, 0.4))
df_cat <- data.frame(cat = c("A", "B", "C"),
number = c(1.11, 1.02, 0.44))
df_bin <- data.frame(bin = as.factor(c(1, 2)),
number = c(1.42, 0.56))
df_cat2 <- data.frame(cat2 = c("A", "B", "C", "D", "AA"),
number = c(0.11, 1.22, 1.34, 0.88, 0.75))
我需要将每个数据框中 'number' 列中的所有数字相互乘以。因此,查看每个数据集中第一列中所有可能的组合,然后取数并将它们相乘。最终结果数据框应如下所示(前 3 个已完成)
results_df <- data.frame(combi = c("mon6_year1_catA_bin1_cat2A", "mon6_year1_catA_bin1_cat2B", "mon6_year1_catA_bin1_cat2C"),
final_number = c(1.11*1.61*1.11*1.42*0.11, 1.11*1.61*1.11*1.42*1.22, 1.11*1.61*1.11*1.42*1.34))
我们可以看到 results_df
中的第一列显示了用于计算 final_number
的组合。第一个示例显示,mon_df cat 6
(1.11) 中的 'number'
列被提取并乘以以下内容:
- 类别 1 (1.61) 来自 df_year
- 类别 A (1.11) 来自 df_cat
- 类别 1 (1.42) 来自 df_bin
- A 类 (0.11) 来自 df_cat2
这个组合的答案是 1.11 x 1.61 x 1.11 x 1.42 x 0.11 = 0.3098。 第二行显示下一个可能的组合等等。
我不确定如何实现这一点,所以任何帮助将不胜感激!
也许你可以像下面那样尝试expand.grid
lst <- list(df_mon, df_year, df_cat, df_bin, df_cat2)
results_df <- data.frame(
combi = do.call(
paste,
c(do.call(
expand.grid,
lapply(lst, function(v) paste0(names(v[1]), v[, 1]))
), sep = "_")
),
final_number = Reduce(
"*",
do.call(
expand.grid,
lapply(lst, `[[`, 2)
)
)
)
这给出了
> head(results_df)
combi final_number
1 mon6_year1_catA_bin1_cat2A 0.30985097
2 mon7_year1_catA_bin1_cat2A 0.28472792
3 mon8_year1_catA_bin1_cat2A 0.26518777
4 mon9_year1_catA_bin1_cat2A 0.25681342
5 mon10_year1_catA_bin1_cat2A 0.20098441
6 mon6_year2_catA_bin1_cat2A 0.07698161
这是使用 dplyr
和 tidyr
的方法。
df_all <- df_mon %>%
full_join(df_year, by = character()) %>% # by = character() ensures cross join
full_join(df_cat, by = character()) %>%
full_join(df_bin, by = character()) %>%
full_join(df_cat2, by = character()) %>%
pivot_longer(cols = c(-mon, -year, -cat, -bin, -cat2)) %>%
group_by(mon, year, cat, bin, cat2) %>%
summarize(final_number = prod(value), .groups = "keep")
# A tibble: 300 x 6
# Groups: mon, year, cat, bin, cat2 [300]
mon year cat bin cat2 final_number
<fct> <fct> <chr> <fct> <chr> <dbl>
1 6 1 A 1 A 0.310
2 6 1 A 1 AA 2.11
3 6 1 A 1 B 3.44
4 6 1 A 1 C 3.77
5 6 1 A 1 D 2.48
6 6 1 A 2 A 0.122
7 6 1 A 2 AA 0.833
8 6 1 A 2 B 1.36
9 6 1 A 2 C 1.49
10 6 1 A 2 D 0.978
# ... with 290 more rows
它将来自其他 data.frames 的变量完整地保留为用于进一步分析的列,但是您可以创建 combi
列并添加一些 paste()
。