根据条件 R 使用 left_join 合并数据
Merging data using left_join based on a condition R
我有 2 个数据框,我需要根据条件合并它们:
# Dataframe 1
kpi_id <- c("SL", "OOS", "SPA","SL", "OOS", "SPA")
date <- c("2021-01-01", "2021-01-01","2021-01-01","2021-02-01","2021-02-01","2021-02-01")
gbu <- c("gbu_1", "gbu_1","gbu_1","gbu_2","gbu_2","gbu_2")
kpi_val <- c (1,2,3,4,5,6)
df1 <- data.frame(kpi_id, date, gbu, kpi_val)
df1
> df1
kpi_id date gbu kpi_val
1 SL 2021-01-01 gbu_1 1
2 OOS 2021-01-01 gbu_1 2
3 SPA 2021-01-01 gbu_1 3
4 SL 2021-02-01 gbu_2 4
5 OOS 2021-02-01 gbu_2 5
6 SPA 2021-02-01 gbu_2 6
# Dataframe 2
gbu <- c("gbu_1", "gbu_2")
date <- c("2021-01-01", "2021-02-01")
kpi_sl <- c(7,8)
kpi_oos <- c(9,10)
kpi_spa <- c(11,12)
df2 <- data.frame(gbu, date, kpi_sl, kpi_oos, kpi_spa)
df2
> df2
gbu date kpi_sl kpi_oos kpi_spa
1 gbu_1 2021-01-01 7 9 11
2 gbu_2 2021-02-01 8 10 12
我需要在 df1
值中创建新列 kpi_val_joined
,根据条件:
if kpi_id == SL --> kpi_val_joined = kpi_sl
if kpi_id == OOS --> kpi_val_joined = kpi_oos
if kpi_id == SPA --> kpi_val_joined = kpi_spa
# Dataframe 3_result.
kpi_id date gbu kpi_val kpi_val_joined
1 SL 2021-01-01 gbu_1 1 7
2 OOS 2021-01-01 gbu_1 2 9
3 SPA 2021-01-01 gbu_1 3 11
4 SL 2021-02-01 gbu_2 4 8
5 OOS 2021-02-01 gbu_2 5 10
6 SPA 2021-02-01 gbu_2 6 12
我们可以把df2
转成长格式,把kpi_id
大写。然后,我们可以做一个简单的 left_join
来将新列 kpi_val_joined
放入 df1
.
library(tidyverse)
df1 %>%
left_join(
.,
df2 %>%
pivot_longer(
starts_with("kpi"),
names_to = c(NA, "kpi_id"),
values_to = "kpi_val_joined",
names_sep = '_'
) %>%
mutate(kpi_id = toupper(kpi_id)),
by = c("gbu", "date", "kpi_id")
)
输出
kpi_id date gbu kpi_val kpi_val_joined
1 SL 2021-01-01 gbu_1 1 7
2 OOS 2021-01-01 gbu_1 2 9
3 SPA 2021-01-01 gbu_1 3 11
4 SL 2021-02-01 gbu_2 4 8
5 OOS 2021-02-01 gbu_2 5 10
6 SPA 2021-02-01 gbu_2 6 12
更新
既然我们只想在特定的实例上加入,那么我们可以在df1
中创建一个helper_id
列来满足我们想要满足的条件。然后,我们可以加入 df2
.
的长形式
df1 %>%
mutate(helper_id = case_when(kpi_id == "SL" ~ "kpi_sl",
kpi_id == "OOS" ~ "kpi_oos",
kpi_id == "SPA" ~ "kpi_spa",
TRUE ~ NA_character_)) %>%
left_join(
.,
df2 %>%
pivot_longer(starts_with("kpi"), names_to = "helper_id", values_to = "kpi_val_joined"),
by = c("gbu", "date", "helper_id")
) %>%
select(-helper_id)
不需要做任何旋转;只需加入 date/gbu,然后使用 case_statement 创建新列,如下所示:
library(data.table)
setDT(df1)[setDT(df2), on=.(date,gbu), kpi_val_joined:=fcase(
kpi_id == "SL", kpi_sl,
kpi_id == "OOS", kpi_oos,
kpi_id == "SPA", kpi_spa
)]
输出:
kpi_id date gbu kpi_val kpi_val_joined
<char> <char> <char> <num> <num>
1: SL 2021-01-01 gbu_1 1 7
2: OOS 2021-01-01 gbu_1 2 9
3: SPA 2021-01-01 gbu_1 3 11
4: SL 2021-02-01 gbu_2 4 8
5: OOS 2021-02-01 gbu_2 5 10
6: SPA 2021-02-01 gbu_2 6 12
如果你愿意,你也可以用 tidyverse 做到这一点,不需要旋转。
inner_join(df1,df2, by=c("date","gbu")) %>%
mutate(kpi_val_joined = case_when(
kpi_id == "SL"~kpi_sl,
kpi_id == "OOS"~kpi_oos,
kpi_id == "SPA"~kpi_spa
)) %>%
select(-(kpi_sl:kpi_spa))
我有 2 个数据框,我需要根据条件合并它们:
# Dataframe 1
kpi_id <- c("SL", "OOS", "SPA","SL", "OOS", "SPA")
date <- c("2021-01-01", "2021-01-01","2021-01-01","2021-02-01","2021-02-01","2021-02-01")
gbu <- c("gbu_1", "gbu_1","gbu_1","gbu_2","gbu_2","gbu_2")
kpi_val <- c (1,2,3,4,5,6)
df1 <- data.frame(kpi_id, date, gbu, kpi_val)
df1
> df1
kpi_id date gbu kpi_val
1 SL 2021-01-01 gbu_1 1
2 OOS 2021-01-01 gbu_1 2
3 SPA 2021-01-01 gbu_1 3
4 SL 2021-02-01 gbu_2 4
5 OOS 2021-02-01 gbu_2 5
6 SPA 2021-02-01 gbu_2 6
# Dataframe 2
gbu <- c("gbu_1", "gbu_2")
date <- c("2021-01-01", "2021-02-01")
kpi_sl <- c(7,8)
kpi_oos <- c(9,10)
kpi_spa <- c(11,12)
df2 <- data.frame(gbu, date, kpi_sl, kpi_oos, kpi_spa)
df2
> df2
gbu date kpi_sl kpi_oos kpi_spa
1 gbu_1 2021-01-01 7 9 11
2 gbu_2 2021-02-01 8 10 12
我需要在 df1
值中创建新列 kpi_val_joined
,根据条件:
if kpi_id == SL --> kpi_val_joined = kpi_sl
if kpi_id == OOS --> kpi_val_joined = kpi_oos
if kpi_id == SPA --> kpi_val_joined = kpi_spa
# Dataframe 3_result.
kpi_id date gbu kpi_val kpi_val_joined
1 SL 2021-01-01 gbu_1 1 7
2 OOS 2021-01-01 gbu_1 2 9
3 SPA 2021-01-01 gbu_1 3 11
4 SL 2021-02-01 gbu_2 4 8
5 OOS 2021-02-01 gbu_2 5 10
6 SPA 2021-02-01 gbu_2 6 12
我们可以把df2
转成长格式,把kpi_id
大写。然后,我们可以做一个简单的 left_join
来将新列 kpi_val_joined
放入 df1
.
library(tidyverse)
df1 %>%
left_join(
.,
df2 %>%
pivot_longer(
starts_with("kpi"),
names_to = c(NA, "kpi_id"),
values_to = "kpi_val_joined",
names_sep = '_'
) %>%
mutate(kpi_id = toupper(kpi_id)),
by = c("gbu", "date", "kpi_id")
)
输出
kpi_id date gbu kpi_val kpi_val_joined
1 SL 2021-01-01 gbu_1 1 7
2 OOS 2021-01-01 gbu_1 2 9
3 SPA 2021-01-01 gbu_1 3 11
4 SL 2021-02-01 gbu_2 4 8
5 OOS 2021-02-01 gbu_2 5 10
6 SPA 2021-02-01 gbu_2 6 12
更新
既然我们只想在特定的实例上加入,那么我们可以在df1
中创建一个helper_id
列来满足我们想要满足的条件。然后,我们可以加入 df2
.
df1 %>%
mutate(helper_id = case_when(kpi_id == "SL" ~ "kpi_sl",
kpi_id == "OOS" ~ "kpi_oos",
kpi_id == "SPA" ~ "kpi_spa",
TRUE ~ NA_character_)) %>%
left_join(
.,
df2 %>%
pivot_longer(starts_with("kpi"), names_to = "helper_id", values_to = "kpi_val_joined"),
by = c("gbu", "date", "helper_id")
) %>%
select(-helper_id)
不需要做任何旋转;只需加入 date/gbu,然后使用 case_statement 创建新列,如下所示:
library(data.table)
setDT(df1)[setDT(df2), on=.(date,gbu), kpi_val_joined:=fcase(
kpi_id == "SL", kpi_sl,
kpi_id == "OOS", kpi_oos,
kpi_id == "SPA", kpi_spa
)]
输出:
kpi_id date gbu kpi_val kpi_val_joined
<char> <char> <char> <num> <num>
1: SL 2021-01-01 gbu_1 1 7
2: OOS 2021-01-01 gbu_1 2 9
3: SPA 2021-01-01 gbu_1 3 11
4: SL 2021-02-01 gbu_2 4 8
5: OOS 2021-02-01 gbu_2 5 10
6: SPA 2021-02-01 gbu_2 6 12
如果你愿意,你也可以用 tidyverse 做到这一点,不需要旋转。
inner_join(df1,df2, by=c("date","gbu")) %>%
mutate(kpi_val_joined = case_when(
kpi_id == "SL"~kpi_sl,
kpi_id == "OOS"~kpi_oos,
kpi_id == "SPA"~kpi_spa
)) %>%
select(-(kpi_sl:kpi_spa))