dplyr tidyr 扩大和总结特定列
dplyr tidyr to widen and sum specific columns
我正在努力使用 dplyr 和 tidyr 以这种形式获取 df:
myDf <- data.frame(id = c(1,1,1,1,2,2),
event = c('a','b','a','b','a','b'),
a_property = c(1,NA,2, NA, 3, NA),
b_property = c(NA,2,NA, 3, NA, 4))
> myDf
id event a_property b_property
1 a 1 NA
1 b NA 2
1 a 2 NA
1 b NA 3
2 a 3 NA
2 b NA 4
并转换为所需的格式:
id count_event_a count_event_b sum_property_a sum_property_b
1 2 2 3 5
2 1 1 5 4
myDf %>%
group_by(id) %>%
summarise(count_event_a = sum(!is.na(a_property)),
count_event_b = sum(!is.na(b_property)),
sum_property_a = sum(a_property, na.rm = TRUE),
sum_property_b = sum(b_property, na.rm = TRUE)) %>%
ungroup()
您的示例中有错字。答案应该是:
# A tibble: 2 × 5
id count_event_a count_event_b sum_property_a sum_property_b
<dbl> <int> <int> <dbl> <dbl>
1 1 2 2 3 5
2 2 1 1 3 4
更一般一点:
myDf %>%
gather(key, value, -id, -event) %>%
filter(!is.na(value)) %>%
group_by(id, event) %>%
summarise(count = n(),
sum = sum(value)) %>%
gather(key, value, -id, -event) %>%
unite(measure, key, event) %>%
spread(measure, value)
我正在努力使用 dplyr 和 tidyr 以这种形式获取 df:
myDf <- data.frame(id = c(1,1,1,1,2,2),
event = c('a','b','a','b','a','b'),
a_property = c(1,NA,2, NA, 3, NA),
b_property = c(NA,2,NA, 3, NA, 4))
> myDf
id event a_property b_property
1 a 1 NA
1 b NA 2
1 a 2 NA
1 b NA 3
2 a 3 NA
2 b NA 4
并转换为所需的格式:
id count_event_a count_event_b sum_property_a sum_property_b
1 2 2 3 5
2 1 1 5 4
myDf %>%
group_by(id) %>%
summarise(count_event_a = sum(!is.na(a_property)),
count_event_b = sum(!is.na(b_property)),
sum_property_a = sum(a_property, na.rm = TRUE),
sum_property_b = sum(b_property, na.rm = TRUE)) %>%
ungroup()
您的示例中有错字。答案应该是:
# A tibble: 2 × 5
id count_event_a count_event_b sum_property_a sum_property_b
<dbl> <int> <int> <dbl> <dbl>
1 1 2 2 3 5
2 2 1 1 3 4
更一般一点:
myDf %>%
gather(key, value, -id, -event) %>%
filter(!is.na(value)) %>%
group_by(id, event) %>%
summarise(count = n(),
sum = sum(value)) %>%
gather(key, value, -id, -event) %>%
unite(measure, key, event) %>%
spread(measure, value)