通过合并替换 NA 值的问题
Issue with replacing NA values by merging
我正在尝试根据下面的 borrower_country
和 year
合并两个数据集。第一个数据集是包含多个国家/地区的更大集合的一部分,我试图用集合 gdp_infl
中的值填充第一组 test
的 NA 值。由于某种原因,它不会合并,GDP_per_capita_USD
、GDP_per_capita_growth
和 inflation
列保持为 NA。
还有一个额外的细节,这是一个更大的合并的一部分,我将多个国家的 gdp_infl
数据与我的主要数据集结合在一起。所有其他国家都没有问题,但出于某种原因,日本却没有。我会把我试过的代码插入到底部。
structure(list(borrower_country = c("Japan", "Japan", "Japan",
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan",
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan",
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan",
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan",
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan",
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan",
"Japan", "Japan", "Japan", "Japan", "Japan"), year = c(1999L,
1999L, 1999L, 1999L, 1999L, 1999L, 2000L, 1999L, 1999L, 1999L,
1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L,
1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L,
1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L,
1999L, 1999L, 2000L, 2000L, 2000L, 2000L, 2000L, 2000L, 2001L,
2001L, 2001L, 2001L, 2001L), GDP_per_capita_USD = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), GDP_per_capita_growth = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), inflation = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
)), row.names = c(NA, 50L), class = "data.frame")
structure(list(borrower_country = c("Japan", "Japan", "Japan",
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan",
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan",
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan",
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan",
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan",
"Japan"), year = 1982:2020, GDP_per_capita_growth = c(2.6, 2.81,
3.83, 4.58, 2.78, 4.23, 6.34, 4.44, 4.55, 3.01, 0.47, -0.84,
0.71, 2.5, 2.87, 0.84, -1.4, -0.43, 2.61, 0.16, -0.11, 1.31,
2.17, 1.65, 1.36, 1.54, -1.14, -5.4, 4.17, 0.07, 1.66, 2.15,
0.51, 1.33, 0.64, 2.34, 0.53, 0.48, NA), GDP_per_capita_USD = c(9575.61,
10421.21, 10978.92, 11576.69, 17113.26, 20748.99, 25059.01, 24822.78,
25371.46, 28915.01, 31414.98, 35681.96, 39200.49, 43428.94, 38436.93,
35021.72, 31902.77, 36026.56, 38532.04, 33846.47, 32289.35, 34808.39,
37688.72, 37217.65, 35433.99, 35275.23, 39339.3, 40855.18, 44507.68,
48168, 48603.48, 40454.45, 38109.41, 34524.47, 38761.82, 38386.51,
39159.42, 40113.06, NA), inflation = c(2.74, 1.9, 2.26, 2.03,
0.6, 0.13, 0.68, 2.27, 3.08, 3.25, 1.76, 1.24, 0.7, -0.13, 0.14,
1.75, 0.66, -0.34, -0.68, -0.74, -0.92, -0.26, -0.01, -0.28,
0.25, 0.06, 1.38, -1.35, -0.72, -0.27, -0.05, 0.35, 2.76, 0.79,
-0.12, 0.47, 0.98, 0.48, -0.02)), row.names = c(NA, -39L), class = "data.frame")
密码
test <- df
test$GDP_per_capita_growth[test$borrower_country == "Japan"] <- NA
test$GDP_per_capita_USD[test$borrower_country == "Japan"] <- NA
test$inflation[test$borrower_country == "Japan"] <- NA
test <- test %>%
filter(borrower_country == "Japan")%>%
select(borrower_country,year,GDP_per_capita_USD,GDP_per_capita_growth,inflation)
temp <- merge(test,gdp_infl, by = c("borrower_country", "year","GDP_per_capita_USD","GDP_per_capita_growth","inflation"), all.x = TRUE)
我似乎无法解决这个问题,我们将不胜感激。
您可以尝试 rqdatatable
包中的 natural_join
natural_join(df1, df2, by = c("borrower_country", "year"))
borrower_country year GDP_per_capita_growth GDP_per_capita_USD inflation
1: Japan 1999 -0.43 36026.56 -0.34
2: Japan 1999 -0.43 36026.56 -0.34
3: Japan 1999 -0.43 36026.56 -0.34
4: Japan 1999 -0.43 36026.56 -0.34
5: Japan 1999 -0.43 36026.56 -0.34
6: Japan 1999 -0.43 36026.56 -0.34
7: Japan 1999 -0.43 36026.56 -0.34
8: Japan 1999 -0.43 36026.56 -0.34
9: Japan 1999 -0.43 36026.56 -0.34
我正在尝试根据下面的 borrower_country
和 year
合并两个数据集。第一个数据集是包含多个国家/地区的更大集合的一部分,我试图用集合 gdp_infl
中的值填充第一组 test
的 NA 值。由于某种原因,它不会合并,GDP_per_capita_USD
、GDP_per_capita_growth
和 inflation
列保持为 NA。
还有一个额外的细节,这是一个更大的合并的一部分,我将多个国家的 gdp_infl
数据与我的主要数据集结合在一起。所有其他国家都没有问题,但出于某种原因,日本却没有。我会把我试过的代码插入到底部。
structure(list(borrower_country = c("Japan", "Japan", "Japan",
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan",
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan",
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan",
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan",
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan",
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan",
"Japan", "Japan", "Japan", "Japan", "Japan"), year = c(1999L,
1999L, 1999L, 1999L, 1999L, 1999L, 2000L, 1999L, 1999L, 1999L,
1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L,
1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L,
1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L,
1999L, 1999L, 2000L, 2000L, 2000L, 2000L, 2000L, 2000L, 2001L,
2001L, 2001L, 2001L, 2001L), GDP_per_capita_USD = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), GDP_per_capita_growth = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), inflation = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
)), row.names = c(NA, 50L), class = "data.frame")
structure(list(borrower_country = c("Japan", "Japan", "Japan",
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan",
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan",
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan",
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan",
"Japan", "Japan", "Japan", "Japan", "Japan", "Japan", "Japan",
"Japan"), year = 1982:2020, GDP_per_capita_growth = c(2.6, 2.81,
3.83, 4.58, 2.78, 4.23, 6.34, 4.44, 4.55, 3.01, 0.47, -0.84,
0.71, 2.5, 2.87, 0.84, -1.4, -0.43, 2.61, 0.16, -0.11, 1.31,
2.17, 1.65, 1.36, 1.54, -1.14, -5.4, 4.17, 0.07, 1.66, 2.15,
0.51, 1.33, 0.64, 2.34, 0.53, 0.48, NA), GDP_per_capita_USD = c(9575.61,
10421.21, 10978.92, 11576.69, 17113.26, 20748.99, 25059.01, 24822.78,
25371.46, 28915.01, 31414.98, 35681.96, 39200.49, 43428.94, 38436.93,
35021.72, 31902.77, 36026.56, 38532.04, 33846.47, 32289.35, 34808.39,
37688.72, 37217.65, 35433.99, 35275.23, 39339.3, 40855.18, 44507.68,
48168, 48603.48, 40454.45, 38109.41, 34524.47, 38761.82, 38386.51,
39159.42, 40113.06, NA), inflation = c(2.74, 1.9, 2.26, 2.03,
0.6, 0.13, 0.68, 2.27, 3.08, 3.25, 1.76, 1.24, 0.7, -0.13, 0.14,
1.75, 0.66, -0.34, -0.68, -0.74, -0.92, -0.26, -0.01, -0.28,
0.25, 0.06, 1.38, -1.35, -0.72, -0.27, -0.05, 0.35, 2.76, 0.79,
-0.12, 0.47, 0.98, 0.48, -0.02)), row.names = c(NA, -39L), class = "data.frame")
密码
test <- df
test$GDP_per_capita_growth[test$borrower_country == "Japan"] <- NA
test$GDP_per_capita_USD[test$borrower_country == "Japan"] <- NA
test$inflation[test$borrower_country == "Japan"] <- NA
test <- test %>%
filter(borrower_country == "Japan")%>%
select(borrower_country,year,GDP_per_capita_USD,GDP_per_capita_growth,inflation)
temp <- merge(test,gdp_infl, by = c("borrower_country", "year","GDP_per_capita_USD","GDP_per_capita_growth","inflation"), all.x = TRUE)
我似乎无法解决这个问题,我们将不胜感激。
您可以尝试 rqdatatable
包中的 natural_join
natural_join(df1, df2, by = c("borrower_country", "year"))
borrower_country year GDP_per_capita_growth GDP_per_capita_USD inflation
1: Japan 1999 -0.43 36026.56 -0.34
2: Japan 1999 -0.43 36026.56 -0.34
3: Japan 1999 -0.43 36026.56 -0.34
4: Japan 1999 -0.43 36026.56 -0.34
5: Japan 1999 -0.43 36026.56 -0.34
6: Japan 1999 -0.43 36026.56 -0.34
7: Japan 1999 -0.43 36026.56 -0.34
8: Japan 1999 -0.43 36026.56 -0.34
9: Japan 1999 -0.43 36026.56 -0.34