根据条件 R 使用 left_join 合并数据

Merging data using left_join based on a condition R

我有 2 个数据框,我需要根据条件合并它们:

# Dataframe 1
kpi_id <- c("SL",  "OOS", "SPA","SL",  "OOS", "SPA")
date <- c("2021-01-01",  "2021-01-01","2021-01-01","2021-02-01","2021-02-01","2021-02-01")
gbu <- c("gbu_1",  "gbu_1","gbu_1","gbu_2","gbu_2","gbu_2")
kpi_val <- c (1,2,3,4,5,6)

df1 <-  data.frame(kpi_id,  date, gbu, kpi_val)
df1

> df1
  kpi_id       date   gbu      kpi_val
1     SL   2021-01-01 gbu_1       1
2    OOS   2021-01-01 gbu_1       2
3    SPA   2021-01-01 gbu_1       3
4     SL   2021-02-01 gbu_2       4
5    OOS   2021-02-01 gbu_2       5
6    SPA   2021-02-01 gbu_2       6

# Dataframe 2
gbu <- c("gbu_1",  "gbu_2")
date <- c("2021-01-01", "2021-02-01")
kpi_sl <- c(7,8)
kpi_oos <- c(9,10)
kpi_spa <- c(11,12)

df2 <-  data.frame(gbu,  date, kpi_sl, kpi_oos, kpi_spa)
df2
> df2
    gbu       date        kpi_sl kpi_oos kpi_spa
1 gbu_1      2021-01-01      7       9      11
2 gbu_2      2021-02-01      8      10      12

我需要在 df1 值中创建新列 kpi_val_joined,根据条件:

if kpi_id == SL --> kpi_val_joined = kpi_sl
if kpi_id == OOS --> kpi_val_joined = kpi_oos
if kpi_id == SPA --> kpi_val_joined = kpi_spa

# Dataframe 3_result.


  kpi_id       date       gbu     kpi_val    kpi_val_joined
1     SL     2021-01-01   gbu_1       1              7
2    OOS     2021-01-01   gbu_1       2              9
3    SPA     2021-01-01   gbu_1       3             11

4     SL     2021-02-01   gbu_2       4              8
5    OOS     2021-02-01   gbu_2       5             10
6    SPA     2021-02-01   gbu_2       6             12

我们可以把df2转成长格式,把kpi_id大写。然后,我们可以做一个简单的 left_join 来将新列 kpi_val_joined 放入 df1.

library(tidyverse)

df1 %>%
  left_join(
    .,
    df2 %>%
      pivot_longer(
        starts_with("kpi"),
        names_to = c(NA, "kpi_id"),
        values_to = "kpi_val_joined",
        names_sep = '_'
      ) %>%
      mutate(kpi_id = toupper(kpi_id)),
    by = c("gbu", "date", "kpi_id")
  )

输出

  kpi_id       date   gbu kpi_val kpi_val_joined
1     SL 2021-01-01 gbu_1       1              7
2    OOS 2021-01-01 gbu_1       2              9
3    SPA 2021-01-01 gbu_1       3             11
4     SL 2021-02-01 gbu_2       4              8
5    OOS 2021-02-01 gbu_2       5             10
6    SPA 2021-02-01 gbu_2       6             12

更新

既然我们只想在特定的实例上加入,那么我们可以在df1中创建一个helper_id列来满足我们想要满足的条件。然后,我们可以加入 df2.

的长形式
df1 %>%
  mutate(helper_id = case_when(kpi_id == "SL" ~ "kpi_sl",
                               kpi_id == "OOS" ~ "kpi_oos",
                               kpi_id == "SPA" ~ "kpi_spa",
                               TRUE ~ NA_character_)) %>%
  left_join(
    .,
    df2 %>%
      pivot_longer(starts_with("kpi"), names_to = "helper_id", values_to = "kpi_val_joined"),
    by = c("gbu", "date", "helper_id")
  ) %>% 
  select(-helper_id)

不需要做任何旋转;只需加入 date/gbu,然后使用 case_statement 创建新列,如下所示:

library(data.table)

setDT(df1)[setDT(df2), on=.(date,gbu), kpi_val_joined:=fcase(
  kpi_id == "SL", kpi_sl,
  kpi_id == "OOS", kpi_oos,
  kpi_id == "SPA", kpi_spa
)]

输出:

   kpi_id       date    gbu kpi_val kpi_val_joined
   <char>     <char> <char>   <num>          <num>
1:     SL 2021-01-01  gbu_1       1              7
2:    OOS 2021-01-01  gbu_1       2              9
3:    SPA 2021-01-01  gbu_1       3             11
4:     SL 2021-02-01  gbu_2       4              8
5:    OOS 2021-02-01  gbu_2       5             10
6:    SPA 2021-02-01  gbu_2       6             12

如果你愿意,你也可以用 tidyverse 做到这一点,不需要旋转。

inner_join(df1,df2, by=c("date","gbu")) %>% 
  mutate(kpi_val_joined = case_when(
    kpi_id == "SL"~kpi_sl,
    kpi_id == "OOS"~kpi_oos,
    kpi_id == "SPA"~kpi_spa  
  )) %>% 
  select(-(kpi_sl:kpi_spa))