将 Group by 和 Slope 与 dplyr 一起使用以获取新列
Using Group by and Slope with dplyr to get new column
我正在寻找更直接的解决方案,使用 dplyr
从我的数据中获取标题为 slope
的列。数据集按 season
和 stat
类型分组。我当前的代码是:
library(tidyverse); library(broom)
full_table_raw <- structure(list(playerID = c("abreujo02", "abreujo02",
"abreujo02", "abreujo02", "abreujo02", "abreujo02", "abreujo02",
"abreujo02", "abreujo02", "abreujo02", "abreujo02", "abreujo02",
"arenano01", "arenano01", "arenano01", "arenano01", "arenano01",
"arenano01", "arenano01", "arenano01", "arenano01", "arenano01",
"arenano01", "arenano01", "blackch02", "blackch02", "blackch02",
"blackch02", "blackch02", "blackch02", "blackch02", "blackch02",
"blackch02", "blackch02", "blackch02", "blackch02"), season = c(2014L,
2014L, 2014L, 2014L, 2015L, 2015L, 2015L, 2015L, 2016L, 2016L, 2016L,
2016L, 2014L, 2014L, 2014L, 2014L, 2015L, 2015L, 2015L, 2015L,
2016L, 2016L, 2016L, 2016L, 2014L, 2014L, 2014L, 2014L, 2015L,
2015L, 2015L, 2015L, 2016L, 2016L, 2016L, 2016L), stat = c("HR",
"R", "RBI", "SB", "HR", "R", "RBI", "SB", "HR", "R", "RBI", "SB",
"HR", "R", "RBI", "SB", "HR", "R", "RBI", "SB", "HR", "R", "RBI",
"SB", "HR", "R", "RBI", "SB", "HR", "R", "RBI", "SB", "HR", "R",
"RBI", "SB"), points = c(3, 2, 3, 2, 2, 1, 2, 1, 1, 1, 2, 1,
1, 1, 1, 1, 3, 3, 3, 2, 3, 3, 3, 2, 2, 3, 2, 3, 1, 2, 1, 3, 2,
2, 1, 3), ranks = c(1, 2, 1, 2, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3,
3, 3, 1, 1, 1, 2, 1, 1, 1, 2, 2, 1, 2, 1, 3, 2, 3, 1, 2, 2, 3,
1), value = c(36, 80, 107, 3, 30, 88, 101, 0, 25, 67, 100, 0,
18, 58, 61, 2, 42, 97, 130, 2, 41, 116, 133, 2, 19, 82, 72, 28,
17, 93, 58, 43, 29, 111, 82, 17)), class = "data.frame", row.names = c(NA,
-36L))
sgp_table <- full_table_raw %>%
group_by(season, stat) %>%
do(tidy(lm(value ~ points, data = .))) %>%
filter(term == "points") %>% select(season, stat, estimate) %>%
rename(slope = estimate)
我正在寻找一种更简洁的方法来根据我当前的数据创建 slope
列。
这是一个使用nest/unnest
的选项
library(tidyverse)
library(broom)
full_table_raw %>%
group_by(season, stat) %>%
nest %>%
mutate(modelout = map(data, ~ lm(value~ points, data = .x) %>%
tidy %>%
filter(term == "points") %>%
select(slope = estimate))) %>%
select(-data) %>%
unnest
# A tibble: 12 x 3
# season stat slope
# <int> <chr> <dbl>
# 1 2014 HR 9.
# 2 2014 R 12
# 3 2014 RBI 23.
# 4 2014 SB 13.0
# 5 2015 HR 12.5
# 6 2015 R 4.50
# 7 2015 RBI 36
# 8 2015 SB 21.5
# 9 2016 HR 8.00
#10 2016 R 24.5
#11 2016 RBI 25.5
#12 2016 SB 8.5
不确定您是否认为这比您拥有的更干净,但是通过使用 nest
,您不需要 group_by
sgp_table <- full_table_raw %>%
nest(-season, -stat) %>%
mutate(slope = map(data, ~coef(lm(value ~ points, data = .x))[["points"]])) %>%
select(-data)
> sgp_table
season stat slope
1 2014 HR 9
2 2014 R 12
3 2014 RBI 23
4 2014 SB 13
5 2015 HR 12.5
6 2015 R 4.5
7 2015 RBI 36
8 2015 SB 21.5
9 2016 HR 8
10 2016 R 24.5
11 2016 RBI 25.5
12 2016 SB 8.5
我正在寻找更直接的解决方案,使用 dplyr
从我的数据中获取标题为 slope
的列。数据集按 season
和 stat
类型分组。我当前的代码是:
library(tidyverse); library(broom)
full_table_raw <- structure(list(playerID = c("abreujo02", "abreujo02",
"abreujo02", "abreujo02", "abreujo02", "abreujo02", "abreujo02",
"abreujo02", "abreujo02", "abreujo02", "abreujo02", "abreujo02",
"arenano01", "arenano01", "arenano01", "arenano01", "arenano01",
"arenano01", "arenano01", "arenano01", "arenano01", "arenano01",
"arenano01", "arenano01", "blackch02", "blackch02", "blackch02",
"blackch02", "blackch02", "blackch02", "blackch02", "blackch02",
"blackch02", "blackch02", "blackch02", "blackch02"), season = c(2014L,
2014L, 2014L, 2014L, 2015L, 2015L, 2015L, 2015L, 2016L, 2016L, 2016L,
2016L, 2014L, 2014L, 2014L, 2014L, 2015L, 2015L, 2015L, 2015L,
2016L, 2016L, 2016L, 2016L, 2014L, 2014L, 2014L, 2014L, 2015L,
2015L, 2015L, 2015L, 2016L, 2016L, 2016L, 2016L), stat = c("HR",
"R", "RBI", "SB", "HR", "R", "RBI", "SB", "HR", "R", "RBI", "SB",
"HR", "R", "RBI", "SB", "HR", "R", "RBI", "SB", "HR", "R", "RBI",
"SB", "HR", "R", "RBI", "SB", "HR", "R", "RBI", "SB", "HR", "R",
"RBI", "SB"), points = c(3, 2, 3, 2, 2, 1, 2, 1, 1, 1, 2, 1,
1, 1, 1, 1, 3, 3, 3, 2, 3, 3, 3, 2, 2, 3, 2, 3, 1, 2, 1, 3, 2,
2, 1, 3), ranks = c(1, 2, 1, 2, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3,
3, 3, 1, 1, 1, 2, 1, 1, 1, 2, 2, 1, 2, 1, 3, 2, 3, 1, 2, 2, 3,
1), value = c(36, 80, 107, 3, 30, 88, 101, 0, 25, 67, 100, 0,
18, 58, 61, 2, 42, 97, 130, 2, 41, 116, 133, 2, 19, 82, 72, 28,
17, 93, 58, 43, 29, 111, 82, 17)), class = "data.frame", row.names = c(NA,
-36L))
sgp_table <- full_table_raw %>%
group_by(season, stat) %>%
do(tidy(lm(value ~ points, data = .))) %>%
filter(term == "points") %>% select(season, stat, estimate) %>%
rename(slope = estimate)
我正在寻找一种更简洁的方法来根据我当前的数据创建 slope
列。
这是一个使用nest/unnest
library(tidyverse)
library(broom)
full_table_raw %>%
group_by(season, stat) %>%
nest %>%
mutate(modelout = map(data, ~ lm(value~ points, data = .x) %>%
tidy %>%
filter(term == "points") %>%
select(slope = estimate))) %>%
select(-data) %>%
unnest
# A tibble: 12 x 3
# season stat slope
# <int> <chr> <dbl>
# 1 2014 HR 9.
# 2 2014 R 12
# 3 2014 RBI 23.
# 4 2014 SB 13.0
# 5 2015 HR 12.5
# 6 2015 R 4.50
# 7 2015 RBI 36
# 8 2015 SB 21.5
# 9 2016 HR 8.00
#10 2016 R 24.5
#11 2016 RBI 25.5
#12 2016 SB 8.5
不确定您是否认为这比您拥有的更干净,但是通过使用 nest
,您不需要 group_by
sgp_table <- full_table_raw %>%
nest(-season, -stat) %>%
mutate(slope = map(data, ~coef(lm(value ~ points, data = .x))[["points"]])) %>%
select(-data)
> sgp_table
season stat slope
1 2014 HR 9
2 2014 R 12
3 2014 RBI 23
4 2014 SB 13
5 2015 HR 12.5
6 2015 R 4.5
7 2015 RBI 36
8 2015 SB 21.5
9 2016 HR 8
10 2016 R 24.5
11 2016 RBI 25.5
12 2016 SB 8.5