R rbind 和组使用 dplyr
R rbind and group by using dplyr
我有以下数据
library(dplyr)
df1 <- tibble(
year = c("2001","2001", "2001", "2001", "2002","2002", "2002", "2002"),
type = c("Animals", "Animals", "People", "People", "Animals", "Animals", "People", "People"),
type_group = c("Dogs", "Cats", "John", "Jane", "Dogs", "Cats", "John", "Jane"),
analysis1 = c(32.7, 67.5, 34.6, 56.5, 56.7, 78.5, 98.9, 87.3),
analysis2 = c(23.7, 89.4, 45.8, 98.6, 45.7, 45.7, 23.6, 23.6),
analysis3 = c(45.7, 45.7, 23.6, 23.6, 14.4, 45.4, 98.0, 12.2),
analysis4 = c(14.4, 45.4, 98.0, 12.2, 34.6, 44.3, 23.8, 16.3))
我正在使用 rbind
通过一些新计算创建新行,您将在下面的代码中看到这些计算。
我想知道是否有更简洁、更快捷的方法来执行此操作。我确定一定有……我的数据有大约 30 年的时间和大约 60 个变量,所以要使用我在这里开发的示例需要很长时间才能根据我的真实数据编写脚本:
df1 %>%
filter(year =="2001") %>%
rbind(c("2001", "People diff","John and Jane",
df1$analysis1[df1$type_group == 'John'] - df1$analysis1[df1$type_group == 'Jane'],
df1$analysis2[df1$type_group == 'John'] - df1$analysis2[df1$type_group == 'Jane'],
df1$analysis3[df1$type_group == 'John'] - df1$analysis3[df1$type_group == 'Jane'],
df1$analysis4[df1$type_group == 'John'] - df1$analysis4[df1$type_group == 'Jane'])) %>%
rbind(c("2001","Animals diff","Dogs and cats",
df1$analysis1[df1$type_group == 'Cats'] - df1$analysis1[df1$type_group == 'Dogs'],
df1$analysis2[df1$type_group == 'Cats'] - df1$analysis2[df1$type_group == 'Dogs'],
df1$analysis3[df1$type_group == 'Cats'] - df1$analysis3[df1$type_group == 'Dogs'],
df1$analysis4[df1$type_group == 'Cats'] - df1$analysis4[df1$type_group == 'Dogs'])) -> data_2001
df1 %>%
filter(year =="2002") %>%
rbind(c("2002", "People diff","John and Jane",
df1$analysis1[df1$type_group == 'John'] - df1$analysis1[df1$type_group == 'Jane'],
df1$analysis2[df1$type_group == 'John'] - df1$analysis2[df1$type_group == 'Jane'],
df1$analysis3[df1$type_group == 'John'] - df1$analysis3[df1$type_group == 'Jane'],
df1$analysis4[df1$type_group == 'John'] - df1$analysis4[df1$type_group == 'Jane'])) %>%
rbind(c("2002","Animals diff","Dogs and cats",
df1$analysis1[df1$type_group == 'Cats'] - df1$analysis1[df1$type_group == 'Dogs'],
df1$analysis2[df1$type_group == 'Cats'] - df1$analysis2[df1$type_group == 'Dogs'],
df1$analysis3[df1$type_group == 'Cats'] - df1$analysis3[df1$type_group == 'Dogs'],
df1$analysis4[df1$type_group == 'Cats'] - df1$analysis4[df1$type_group == 'Dogs'])) -> data_2002
rbind(data_2001, data_2002) -> final_data
感谢任何帮助!谢谢
首先,我认为你的分析是不正确的,除非是有意如此。在您的 rbind
中,您使用 df1$analysis1[df1$type_group == 'John']
包含了两年的数据,但仅将它们与一年绑定并调用,例如2001
.
快速简便的方法是使用 tidyr
包中的 spread
和 gather
,例如
library(tidyr)
df1 %>%
gather(analysis, value, -year, -type, -type_group) %>%
group_by(year, type, analysis) %>%
summarise( value = diff(value)) %>%
spread(analysis, value)
我有以下数据
library(dplyr)
df1 <- tibble(
year = c("2001","2001", "2001", "2001", "2002","2002", "2002", "2002"),
type = c("Animals", "Animals", "People", "People", "Animals", "Animals", "People", "People"),
type_group = c("Dogs", "Cats", "John", "Jane", "Dogs", "Cats", "John", "Jane"),
analysis1 = c(32.7, 67.5, 34.6, 56.5, 56.7, 78.5, 98.9, 87.3),
analysis2 = c(23.7, 89.4, 45.8, 98.6, 45.7, 45.7, 23.6, 23.6),
analysis3 = c(45.7, 45.7, 23.6, 23.6, 14.4, 45.4, 98.0, 12.2),
analysis4 = c(14.4, 45.4, 98.0, 12.2, 34.6, 44.3, 23.8, 16.3))
我正在使用 rbind
通过一些新计算创建新行,您将在下面的代码中看到这些计算。
我想知道是否有更简洁、更快捷的方法来执行此操作。我确定一定有……我的数据有大约 30 年的时间和大约 60 个变量,所以要使用我在这里开发的示例需要很长时间才能根据我的真实数据编写脚本:
df1 %>%
filter(year =="2001") %>%
rbind(c("2001", "People diff","John and Jane",
df1$analysis1[df1$type_group == 'John'] - df1$analysis1[df1$type_group == 'Jane'],
df1$analysis2[df1$type_group == 'John'] - df1$analysis2[df1$type_group == 'Jane'],
df1$analysis3[df1$type_group == 'John'] - df1$analysis3[df1$type_group == 'Jane'],
df1$analysis4[df1$type_group == 'John'] - df1$analysis4[df1$type_group == 'Jane'])) %>%
rbind(c("2001","Animals diff","Dogs and cats",
df1$analysis1[df1$type_group == 'Cats'] - df1$analysis1[df1$type_group == 'Dogs'],
df1$analysis2[df1$type_group == 'Cats'] - df1$analysis2[df1$type_group == 'Dogs'],
df1$analysis3[df1$type_group == 'Cats'] - df1$analysis3[df1$type_group == 'Dogs'],
df1$analysis4[df1$type_group == 'Cats'] - df1$analysis4[df1$type_group == 'Dogs'])) -> data_2001
df1 %>%
filter(year =="2002") %>%
rbind(c("2002", "People diff","John and Jane",
df1$analysis1[df1$type_group == 'John'] - df1$analysis1[df1$type_group == 'Jane'],
df1$analysis2[df1$type_group == 'John'] - df1$analysis2[df1$type_group == 'Jane'],
df1$analysis3[df1$type_group == 'John'] - df1$analysis3[df1$type_group == 'Jane'],
df1$analysis4[df1$type_group == 'John'] - df1$analysis4[df1$type_group == 'Jane'])) %>%
rbind(c("2002","Animals diff","Dogs and cats",
df1$analysis1[df1$type_group == 'Cats'] - df1$analysis1[df1$type_group == 'Dogs'],
df1$analysis2[df1$type_group == 'Cats'] - df1$analysis2[df1$type_group == 'Dogs'],
df1$analysis3[df1$type_group == 'Cats'] - df1$analysis3[df1$type_group == 'Dogs'],
df1$analysis4[df1$type_group == 'Cats'] - df1$analysis4[df1$type_group == 'Dogs'])) -> data_2002
rbind(data_2001, data_2002) -> final_data
感谢任何帮助!谢谢
首先,我认为你的分析是不正确的,除非是有意如此。在您的 rbind
中,您使用 df1$analysis1[df1$type_group == 'John']
包含了两年的数据,但仅将它们与一年绑定并调用,例如2001
.
快速简便的方法是使用 tidyr
包中的 spread
和 gather
,例如
library(tidyr)
df1 %>%
gather(analysis, value, -year, -type, -type_group) %>%
group_by(year, type, analysis) %>%
summarise( value = diff(value)) %>%
spread(analysis, value)