r dplyr::left_join 不符合我想要的方式

r dplyr::left_join doesnt match the way I want it

所以,这让我发疯了,我敢打赌答案很简单,但我就是不明白。我有两个想要加入的 df,df1df2。但是,当我执行 left_join 时,Korn 列中的值不会在 DatumSollPlotBehandlung 上连接,Entfernung,但有 NA,即使所有提到的列都存在于两个数据框中。我错过了什么?

df1 <- structure(list(Soll = structure(c(8L, 8L, 8L, 8L, 8L, 8L, 8L, 
8L, 8L, 8L), .Label = c("1189", "119", "1192", "1202", "149", 
"172", "2484", "552"), class = "factor"), Datum = structure(c(18725, 
18764, 18815, 18725, 18764, 18815, 18725, 18764, 18815, 18725
), class = "Date"), Plot = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
3L, 3L, 3L, 3L), .Label = c("1", "2", "3", "4", "5", "6", "7", 
"8"), class = "factor"), Behandlung = structure(c(1L, 1L, 1L, 
2L, 2L, 2L, 1L, 1L, 1L, 2L), .Label = c("a", "b"), class = "factor"), 
    Entfernung = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L), .Label = c("2", "5"), class = "factor"), DGUnkraut = c(0.1, 
    4, 12.2, 0.7, 0, 0, 0.2, 0, 0, 0.1), prec = c(2.04, 13.38, 
    121.46, 2.04, 13.38, 121.46, 2.04, 13.38, 121.46, 2.04)), class = "data.frame", row.names = c(NA, 
-10L))
df2 <-structure(list(Soll = structure(c(7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L), .Label = c(" 1189", " 119", " 1202", " 149", " 172", 
" 2484", " 552"), class = "factor"), Datum = structure(c(18815, 
18815, 18815, 18815, 18815, 18815, 18815, 18815, 18815, 18815
), class = "Date"), Plot = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 
4L, 4L, 5L, 5L), .Label = c("1", "2", "3", "4", "5", "6", "7", 
"8"), class = "factor"), Behandlung = structure(c(2L, 1L, 2L, 
1L, 2L, 1L, 2L, 1L, 2L, 1L), .Label = c("a", "b"), class = "factor"), 
    Entfernung = structure(c(1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 
    1L, 1L), .Label = c("2", "5"), class = "factor"), Korn = c(65.6, 
    64, 66.4, 57.4, 72.6, 68.9, 68.1, 73.6, 59.9, 34.6)), class = "data.frame", row.names = c(NA, 
-10L))

我的代码:

library(tidyverse)

left_join(df1, df2) -> df3

感谢任何帮助!非常感谢!干杯!

在您的第二个数据框中,列 Soll 在数字(例如“1189”)之前有一个 space,而在您的第一个数据框中不是这种情况。一旦这些一致,您的左连接应该可以工作。

要重新生成第二个数据框,请参见下文:

df2 <-structure(list(Soll = structure(c(7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L), .Label = c("1189", "119", "1202", "149", "172", 
"2484", "552"), class = "factor"), Datum = structure(c(18815, 
18815, 18815, 18815, 18815, 18815, 18815, 18815, 18815, 18815
), class = "Date"), Plot = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 
4L, 4L, 5L, 5L), .Label = c("1", "2", "3", "4", "5", "6", "7", 
"8"), class = "factor"), Behandlung = structure(c(2L, 1L, 2L, 
1L, 2L, 1L, 2L, 1L, 2L, 1L), .Label = c("a", "b"), class = "factor"), 
    Entfernung = structure(c(1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 
    1L, 1L), .Label = c("2", "5"), class = "factor"), Korn = c(65.6, 
    64, 66.4, 57.4, 72.6, 68.9, 68.1, 73.6, 59.9, 34.6)), class = 
"data.frame", row.names = c(NA, 
-10L))

最后,我们可以试试我们的左连接。现在我们可以看到附加的 Korn 的实际值:

joined_df <- df1 %>%
  left_join(df2)

   Soll      Datum Plot Behandlung Entfernung DGUnkraut   prec Korn
1   552 2021-04-08    1          a          2       0.1   2.04   NA
2   552 2021-05-17    1          a          2       4.0  13.38   NA
3   552 2021-07-07    1          a          2      12.2 121.46 64.0
4   552 2021-04-08    1          b          2       0.7   2.04   NA
5   552 2021-05-17    1          b          2       0.0  13.38   NA
6   552 2021-07-07    1          b          2       0.0 121.46 65.6
7   552 2021-04-08    3          a          2       0.2   2.04   NA
8   552 2021-05-17    3          a          2       0.0  13.38   NA
9   552 2021-07-07    3          a          2       0.0 121.46 68.9
10  552 2021-04-08    3          b          2       0.1   2.04   NA

正如@IanNoriega 和@r2evans 所提到的,df2 中有一个前导 space。您可以先使用 trimws.

删除它
df2$Soll <- trimws(df2$Soll)

现在,对于现在有效的连接。

dplyr::left_join(df1,
          df2,
          by = c("Soll", "Datum", "Plot", "Behandlung", "Entfernung"))

输出

   Soll      Datum Plot Behandlung Entfernung DGUnkraut   prec Korn
1   552 2021-04-08    1          a          2       0.1   2.04   NA
2   552 2021-05-17    1          a          2       4.0  13.38   NA
3   552 2021-07-07    1          a          2      12.2 121.46 64.0
4   552 2021-04-08    1          b          2       0.7   2.04   NA
5   552 2021-05-17    1          b          2       0.0  13.38   NA
6   552 2021-07-07    1          b          2       0.0 121.46 65.6
7   552 2021-04-08    3          a          2       0.2   2.04   NA
8   552 2021-05-17    3          a          2       0.0  13.38   NA
9   552 2021-07-07    3          a          2       0.0 121.46 68.9
10  552 2021-04-08    3          b          2       0.1   2.04   NA

您也可以在一个管道中完成所有操作。

df1 %>%
  dplyr::left_join(
    .,
    df2 %>% dplyr::mutate(Soll = trimws(Soll)),
    by = c("Soll", "Datum", "Plot", "Behandlung", "Entfernung")
  )

请看这里

使您的示例工作。你可以:

library(dplyr)
library(stringr)

df2 <- df2 %>%  as_tibble() %>% 
  mutate(soll = str_trim(soll))

left_join(df1, df2)
   soll      datum plot behandlung entfernung dg_unkraut   prec korn
1   552 2021-04-08    1          a          2        0.1   2.04   NA
2   552 2021-05-17    1          a          2        4.0  13.38   NA
3   552 2021-07-07    1          a          2       12.2 121.46 64.0
4   552 2021-04-08    1          b          2        0.7   2.04   NA
5   552 2021-05-17    1          b          2        0.0  13.38   NA
6   552 2021-07-07    1          b          2        0.0 121.46 65.6
7   552 2021-04-08    3          a          2        0.2   2.04   NA
8   552 2021-05-17    3          a          2        0.0  13.38   NA
9   552 2021-07-07    3          a          2        0.0 121.46 68.9
10  552 2021-04-08    3          b          2        0.1   2.04   NA