合并数据框并按组划分行
Merge data frames and divide rows by group
我想将 df1
的值除以 df2
的值。在这个可重现的示例中,我能够对这些值求和。分工呢?提前致谢!
df1 <- data.frame(country = c("a", "b", "c"), year1 = c(1, 2, 3), year2 = c(1, 2, 3))
df2 <- data.frame(country = c("a", "b", "d"), year1 = c(1, 2, NA), year2 = c(1, 2, 3))
df3 <- bind_rows(df1, df2) %>%
mutate_if(is.numeric, tidyr::replace_na, 0) %>%
group_by(country) %>%
summarise_all(., sum, na.rm = TRUE) %>%
na_if(., 0)
预期结果是:
# A tibble: 4 x 3
country year1 year2
<chr> <dbl> <dbl>
1 a 1 1
2 b 1 1
3 c NA NA
4 d NA NA
由于有些组有 2 行,有些有 1 行,因此在 summarise/across
中使用 if/else
条件将 first
元素除以 last
if
有两个元素 or else
return NA
library(dplyr) # version 1.0.4
library(tidyr)
bind_rows(df1, df2) %>%
mutate(across(where(is.numeric), replace_na, 0)) %>%
group_by(country) %>%
summarise(across(everything(), ~ if(n() == 2) first(.)/last(.)
else NA_real_))
-输出
# A tibble: 4 x 3
# country year1 year2
#* <chr> <dbl> <dbl>
#1 a 1 1
#2 b 1 1
#3 c NA NA
#4 d NA NA
这是使用 merge
+ split.default
的基础 R 选项
df <- merge(df1, df2, by = "country", all = TRUE)
cbind(
df[1],
list2DF(lapply(
split.default(df[-1], gsub("\.(x|y)", "", names(df)[-1])),
function(v) do.call("/", v)
))
)
这给出了
country year1 year2
1 a 1 1
2 b 1 1
3 c NA NA
4 d NA NA
我想将 df1
的值除以 df2
的值。在这个可重现的示例中,我能够对这些值求和。分工呢?提前致谢!
df1 <- data.frame(country = c("a", "b", "c"), year1 = c(1, 2, 3), year2 = c(1, 2, 3))
df2 <- data.frame(country = c("a", "b", "d"), year1 = c(1, 2, NA), year2 = c(1, 2, 3))
df3 <- bind_rows(df1, df2) %>%
mutate_if(is.numeric, tidyr::replace_na, 0) %>%
group_by(country) %>%
summarise_all(., sum, na.rm = TRUE) %>%
na_if(., 0)
预期结果是:
# A tibble: 4 x 3
country year1 year2
<chr> <dbl> <dbl>
1 a 1 1
2 b 1 1
3 c NA NA
4 d NA NA
由于有些组有 2 行,有些有 1 行,因此在 summarise/across
中使用 if/else
条件将 first
元素除以 last
if
有两个元素 or else
return NA
library(dplyr) # version 1.0.4
library(tidyr)
bind_rows(df1, df2) %>%
mutate(across(where(is.numeric), replace_na, 0)) %>%
group_by(country) %>%
summarise(across(everything(), ~ if(n() == 2) first(.)/last(.)
else NA_real_))
-输出
# A tibble: 4 x 3
# country year1 year2
#* <chr> <dbl> <dbl>
#1 a 1 1
#2 b 1 1
#3 c NA NA
#4 d NA NA
这是使用 merge
+ split.default
df <- merge(df1, df2, by = "country", all = TRUE)
cbind(
df[1],
list2DF(lapply(
split.default(df[-1], gsub("\.(x|y)", "", names(df)[-1])),
function(v) do.call("/", v)
))
)
这给出了
country year1 year2
1 a 1 1
2 b 1 1
3 c NA NA
4 d NA NA