r dplyr::left_join 不符合我想要的方式
r dplyr::left_join doesnt match the way I want it
所以,这让我发疯了,我敢打赌答案很简单,但我就是不明白。我有两个想要加入的 df,df1
和 df2
。但是,当我执行 left_join
时,Korn
列中的值不会在 Datum
、Soll
、Plot
、Behandlung
上连接,Entfernung
,但有 NA,即使所有提到的列都存在于两个数据框中。我错过了什么?
df1 <- structure(list(Soll = structure(c(8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L), .Label = c("1189", "119", "1192", "1202", "149",
"172", "2484", "552"), class = "factor"), Datum = structure(c(18725,
18764, 18815, 18725, 18764, 18815, 18725, 18764, 18815, 18725
), class = "Date"), Plot = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
3L, 3L, 3L, 3L), .Label = c("1", "2", "3", "4", "5", "6", "7",
"8"), class = "factor"), Behandlung = structure(c(1L, 1L, 1L,
2L, 2L, 2L, 1L, 1L, 1L, 2L), .Label = c("a", "b"), class = "factor"),
Entfernung = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = c("2", "5"), class = "factor"), DGUnkraut = c(0.1,
4, 12.2, 0.7, 0, 0, 0.2, 0, 0, 0.1), prec = c(2.04, 13.38,
121.46, 2.04, 13.38, 121.46, 2.04, 13.38, 121.46, 2.04)), class = "data.frame", row.names = c(NA,
-10L))
df2 <-structure(list(Soll = structure(c(7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L), .Label = c(" 1189", " 119", " 1202", " 149", " 172",
" 2484", " 552"), class = "factor"), Datum = structure(c(18815,
18815, 18815, 18815, 18815, 18815, 18815, 18815, 18815, 18815
), class = "Date"), Plot = structure(c(1L, 1L, 2L, 2L, 3L, 3L,
4L, 4L, 5L, 5L), .Label = c("1", "2", "3", "4", "5", "6", "7",
"8"), class = "factor"), Behandlung = structure(c(2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L), .Label = c("a", "b"), class = "factor"),
Entfernung = structure(c(1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L,
1L, 1L), .Label = c("2", "5"), class = "factor"), Korn = c(65.6,
64, 66.4, 57.4, 72.6, 68.9, 68.1, 73.6, 59.9, 34.6)), class = "data.frame", row.names = c(NA,
-10L))
我的代码:
library(tidyverse)
left_join(df1, df2) -> df3
感谢任何帮助!非常感谢!干杯!
在您的第二个数据框中,列 Soll
在数字(例如“1189”)之前有一个 space,而在您的第一个数据框中不是这种情况。一旦这些一致,您的左连接应该可以工作。
要重新生成第二个数据框,请参见下文:
df2 <-structure(list(Soll = structure(c(7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L), .Label = c("1189", "119", "1202", "149", "172",
"2484", "552"), class = "factor"), Datum = structure(c(18815,
18815, 18815, 18815, 18815, 18815, 18815, 18815, 18815, 18815
), class = "Date"), Plot = structure(c(1L, 1L, 2L, 2L, 3L, 3L,
4L, 4L, 5L, 5L), .Label = c("1", "2", "3", "4", "5", "6", "7",
"8"), class = "factor"), Behandlung = structure(c(2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L), .Label = c("a", "b"), class = "factor"),
Entfernung = structure(c(1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L,
1L, 1L), .Label = c("2", "5"), class = "factor"), Korn = c(65.6,
64, 66.4, 57.4, 72.6, 68.9, 68.1, 73.6, 59.9, 34.6)), class =
"data.frame", row.names = c(NA,
-10L))
最后,我们可以试试我们的左连接。现在我们可以看到附加的 Korn
的实际值:
joined_df <- df1 %>%
left_join(df2)
Soll Datum Plot Behandlung Entfernung DGUnkraut prec Korn
1 552 2021-04-08 1 a 2 0.1 2.04 NA
2 552 2021-05-17 1 a 2 4.0 13.38 NA
3 552 2021-07-07 1 a 2 12.2 121.46 64.0
4 552 2021-04-08 1 b 2 0.7 2.04 NA
5 552 2021-05-17 1 b 2 0.0 13.38 NA
6 552 2021-07-07 1 b 2 0.0 121.46 65.6
7 552 2021-04-08 3 a 2 0.2 2.04 NA
8 552 2021-05-17 3 a 2 0.0 13.38 NA
9 552 2021-07-07 3 a 2 0.0 121.46 68.9
10 552 2021-04-08 3 b 2 0.1 2.04 NA
正如@IanNoriega 和@r2evans 所提到的,df2
中有一个前导 space。您可以先使用 trimws
.
删除它
df2$Soll <- trimws(df2$Soll)
现在,对于现在有效的连接。
dplyr::left_join(df1,
df2,
by = c("Soll", "Datum", "Plot", "Behandlung", "Entfernung"))
输出
Soll Datum Plot Behandlung Entfernung DGUnkraut prec Korn
1 552 2021-04-08 1 a 2 0.1 2.04 NA
2 552 2021-05-17 1 a 2 4.0 13.38 NA
3 552 2021-07-07 1 a 2 12.2 121.46 64.0
4 552 2021-04-08 1 b 2 0.7 2.04 NA
5 552 2021-05-17 1 b 2 0.0 13.38 NA
6 552 2021-07-07 1 b 2 0.0 121.46 65.6
7 552 2021-04-08 3 a 2 0.2 2.04 NA
8 552 2021-05-17 3 a 2 0.0 13.38 NA
9 552 2021-07-07 3 a 2 0.0 121.46 68.9
10 552 2021-04-08 3 b 2 0.1 2.04 NA
您也可以在一个管道中完成所有操作。
df1 %>%
dplyr::left_join(
.,
df2 %>% dplyr::mutate(Soll = trimws(Soll)),
by = c("Soll", "Datum", "Plot", "Behandlung", "Entfernung")
)
请看这里
使您的示例工作。你可以:
library(dplyr)
library(stringr)
df2 <- df2 %>% as_tibble() %>%
mutate(soll = str_trim(soll))
left_join(df1, df2)
soll datum plot behandlung entfernung dg_unkraut prec korn
1 552 2021-04-08 1 a 2 0.1 2.04 NA
2 552 2021-05-17 1 a 2 4.0 13.38 NA
3 552 2021-07-07 1 a 2 12.2 121.46 64.0
4 552 2021-04-08 1 b 2 0.7 2.04 NA
5 552 2021-05-17 1 b 2 0.0 13.38 NA
6 552 2021-07-07 1 b 2 0.0 121.46 65.6
7 552 2021-04-08 3 a 2 0.2 2.04 NA
8 552 2021-05-17 3 a 2 0.0 13.38 NA
9 552 2021-07-07 3 a 2 0.0 121.46 68.9
10 552 2021-04-08 3 b 2 0.1 2.04 NA
所以,这让我发疯了,我敢打赌答案很简单,但我就是不明白。我有两个想要加入的 df,df1
和 df2
。但是,当我执行 left_join
时,Korn
列中的值不会在 Datum
、Soll
、Plot
、Behandlung
上连接,Entfernung
,但有 NA,即使所有提到的列都存在于两个数据框中。我错过了什么?
df1 <- structure(list(Soll = structure(c(8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L), .Label = c("1189", "119", "1192", "1202", "149",
"172", "2484", "552"), class = "factor"), Datum = structure(c(18725,
18764, 18815, 18725, 18764, 18815, 18725, 18764, 18815, 18725
), class = "Date"), Plot = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
3L, 3L, 3L, 3L), .Label = c("1", "2", "3", "4", "5", "6", "7",
"8"), class = "factor"), Behandlung = structure(c(1L, 1L, 1L,
2L, 2L, 2L, 1L, 1L, 1L, 2L), .Label = c("a", "b"), class = "factor"),
Entfernung = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = c("2", "5"), class = "factor"), DGUnkraut = c(0.1,
4, 12.2, 0.7, 0, 0, 0.2, 0, 0, 0.1), prec = c(2.04, 13.38,
121.46, 2.04, 13.38, 121.46, 2.04, 13.38, 121.46, 2.04)), class = "data.frame", row.names = c(NA,
-10L))
df2 <-structure(list(Soll = structure(c(7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L), .Label = c(" 1189", " 119", " 1202", " 149", " 172",
" 2484", " 552"), class = "factor"), Datum = structure(c(18815,
18815, 18815, 18815, 18815, 18815, 18815, 18815, 18815, 18815
), class = "Date"), Plot = structure(c(1L, 1L, 2L, 2L, 3L, 3L,
4L, 4L, 5L, 5L), .Label = c("1", "2", "3", "4", "5", "6", "7",
"8"), class = "factor"), Behandlung = structure(c(2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L), .Label = c("a", "b"), class = "factor"),
Entfernung = structure(c(1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L,
1L, 1L), .Label = c("2", "5"), class = "factor"), Korn = c(65.6,
64, 66.4, 57.4, 72.6, 68.9, 68.1, 73.6, 59.9, 34.6)), class = "data.frame", row.names = c(NA,
-10L))
我的代码:
library(tidyverse)
left_join(df1, df2) -> df3
感谢任何帮助!非常感谢!干杯!
在您的第二个数据框中,列 Soll
在数字(例如“1189”)之前有一个 space,而在您的第一个数据框中不是这种情况。一旦这些一致,您的左连接应该可以工作。
要重新生成第二个数据框,请参见下文:
df2 <-structure(list(Soll = structure(c(7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L), .Label = c("1189", "119", "1202", "149", "172",
"2484", "552"), class = "factor"), Datum = structure(c(18815,
18815, 18815, 18815, 18815, 18815, 18815, 18815, 18815, 18815
), class = "Date"), Plot = structure(c(1L, 1L, 2L, 2L, 3L, 3L,
4L, 4L, 5L, 5L), .Label = c("1", "2", "3", "4", "5", "6", "7",
"8"), class = "factor"), Behandlung = structure(c(2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L), .Label = c("a", "b"), class = "factor"),
Entfernung = structure(c(1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L,
1L, 1L), .Label = c("2", "5"), class = "factor"), Korn = c(65.6,
64, 66.4, 57.4, 72.6, 68.9, 68.1, 73.6, 59.9, 34.6)), class =
"data.frame", row.names = c(NA,
-10L))
最后,我们可以试试我们的左连接。现在我们可以看到附加的 Korn
的实际值:
joined_df <- df1 %>%
left_join(df2)
Soll Datum Plot Behandlung Entfernung DGUnkraut prec Korn
1 552 2021-04-08 1 a 2 0.1 2.04 NA
2 552 2021-05-17 1 a 2 4.0 13.38 NA
3 552 2021-07-07 1 a 2 12.2 121.46 64.0
4 552 2021-04-08 1 b 2 0.7 2.04 NA
5 552 2021-05-17 1 b 2 0.0 13.38 NA
6 552 2021-07-07 1 b 2 0.0 121.46 65.6
7 552 2021-04-08 3 a 2 0.2 2.04 NA
8 552 2021-05-17 3 a 2 0.0 13.38 NA
9 552 2021-07-07 3 a 2 0.0 121.46 68.9
10 552 2021-04-08 3 b 2 0.1 2.04 NA
正如@IanNoriega 和@r2evans 所提到的,df2
中有一个前导 space。您可以先使用 trimws
.
df2$Soll <- trimws(df2$Soll)
现在,对于现在有效的连接。
dplyr::left_join(df1,
df2,
by = c("Soll", "Datum", "Plot", "Behandlung", "Entfernung"))
输出
Soll Datum Plot Behandlung Entfernung DGUnkraut prec Korn
1 552 2021-04-08 1 a 2 0.1 2.04 NA
2 552 2021-05-17 1 a 2 4.0 13.38 NA
3 552 2021-07-07 1 a 2 12.2 121.46 64.0
4 552 2021-04-08 1 b 2 0.7 2.04 NA
5 552 2021-05-17 1 b 2 0.0 13.38 NA
6 552 2021-07-07 1 b 2 0.0 121.46 65.6
7 552 2021-04-08 3 a 2 0.2 2.04 NA
8 552 2021-05-17 3 a 2 0.0 13.38 NA
9 552 2021-07-07 3 a 2 0.0 121.46 68.9
10 552 2021-04-08 3 b 2 0.1 2.04 NA
您也可以在一个管道中完成所有操作。
df1 %>%
dplyr::left_join(
.,
df2 %>% dplyr::mutate(Soll = trimws(Soll)),
by = c("Soll", "Datum", "Plot", "Behandlung", "Entfernung")
)
请看这里
使您的示例工作。你可以:
library(dplyr)
library(stringr)
df2 <- df2 %>% as_tibble() %>%
mutate(soll = str_trim(soll))
left_join(df1, df2)
soll datum plot behandlung entfernung dg_unkraut prec korn
1 552 2021-04-08 1 a 2 0.1 2.04 NA
2 552 2021-05-17 1 a 2 4.0 13.38 NA
3 552 2021-07-07 1 a 2 12.2 121.46 64.0
4 552 2021-04-08 1 b 2 0.7 2.04 NA
5 552 2021-05-17 1 b 2 0.0 13.38 NA
6 552 2021-07-07 1 b 2 0.0 121.46 65.6
7 552 2021-04-08 3 a 2 0.2 2.04 NA
8 552 2021-05-17 3 a 2 0.0 13.38 NA
9 552 2021-07-07 3 a 2 0.0 121.46 68.9
10 552 2021-04-08 3 b 2 0.1 2.04 NA