如何用不同的过滤器()总结(n = n())?
How to summarize(n=n()) with different filter()?
我要打印以下 tables。enter image description here
我尝试了几种方法,但我只能添加其中一列(“to_ORD”或“to_MDW”)以及该类型的信息。我如何编写代码以在相同的时间和相同的 table 上获取它们?
这是我的代码:
library(tidyverse)
library(dplyr)
install.packages("nycflights13")
library(nycflights13)
flights_1 <- flights %>%
group_by(carrier) %>%
filter(dest == "ORD") %>%
summarize(to_ORD = n())
flights_1
flights_2 <- flights %>%
group_by(carrier) %>%
filter(dest == "MDW") %>%
summarize(to_MDW = n())
flights_2
我也试过:(明显不对)
flights_1 <- flights %>%
group_by(carrier) %>%
filter(dest == "ORD"| dest == "MDW") %>%
summarize(to_ORD = n())
对于每个 carrier
计算 'ORD'
和 'MDW'
值的计数,并仅保留任何值大于 0 的行。
library(dplyr)
flights %>%
group_by(carrier) %>%
summarize(to_ORD = sum(dest == "ORD"),
to_MDW = sum(dest == "MDW")) %>%
filter(to_ORD > 0 | to_MDW > 0)
# carrier to_ORD to_MDW
# <chr> <int> <int>
#1 9E 1056 0
#2 AA 6059 0
#3 B6 905 0
#4 EV 2 0
#5 MQ 2276 0
#6 OO 1 0
#7 UA 6984 0
#8 WN 0 4113
沿着相同的路线,但更多的步骤来说明推理,即消除不必要的数据然后区分两个芝加哥地区的机场。
library(dplyr)
library(nycflights13)
flights %>%
filter(dest == "ORD" | dest == "MDW") %>%
group_by(carrier,dest) %>%
count() %>%
mutate(to_ORD = ifelse(dest == "ORD", n,0),
to_MDW = ifelse(dest == "MDW",n,0)) %>%
select(-dest,-n)
#> Adding missing grouping variables: `dest`
#> # A tibble: 8 x 4
#> # Groups: carrier, dest [8]
#> dest carrier to_ORD to_MDW
#> <chr> <chr> <dbl> <dbl>
#> 1 ORD 9E 1056 0
#> 2 ORD AA 6059 0
#> 3 ORD B6 905 0
#> 4 ORD EV 2 0
#> 5 ORD MQ 2276 0
#> 6 ORD OO 1 0
#> 7 ORD UA 6984 0
#> 8 MDW WN 0 4113
我们可以使用pivot_wider
library(dplyr)
library(nycflights13)
library(tidyr)
flights %>%
select(carrier, dest) %>%
filter(dest %in% c("ORD", "MDW")) %>%
pivot_wider(names_from = dest, values_from = dest,
values_fn = length, values_fill = 0)
-输出
# A tibble: 8 x 3
# carrier ORD MDW
# <chr> <int> <int>
#1 UA 6984 0
#2 AA 6059 0
#3 MQ 2276 0
#4 B6 905 0
#5 WN 0 4113
#6 9E 1056 0
#7 OO 1 0
#8 EV 2 0
或将 base R
与 table
和 subset
一起使用
table(subset(flights, dest %in% c("ORD", "MDW"), select = c(carrier, dest)))
我要打印以下 tables。enter image description here 我尝试了几种方法,但我只能添加其中一列(“to_ORD”或“to_MDW”)以及该类型的信息。我如何编写代码以在相同的时间和相同的 table 上获取它们?
这是我的代码:
library(tidyverse)
library(dplyr)
install.packages("nycflights13")
library(nycflights13)
flights_1 <- flights %>%
group_by(carrier) %>%
filter(dest == "ORD") %>%
summarize(to_ORD = n())
flights_1
flights_2 <- flights %>%
group_by(carrier) %>%
filter(dest == "MDW") %>%
summarize(to_MDW = n())
flights_2
我也试过:(明显不对)
flights_1 <- flights %>%
group_by(carrier) %>%
filter(dest == "ORD"| dest == "MDW") %>%
summarize(to_ORD = n())
对于每个 carrier
计算 'ORD'
和 'MDW'
值的计数,并仅保留任何值大于 0 的行。
library(dplyr)
flights %>%
group_by(carrier) %>%
summarize(to_ORD = sum(dest == "ORD"),
to_MDW = sum(dest == "MDW")) %>%
filter(to_ORD > 0 | to_MDW > 0)
# carrier to_ORD to_MDW
# <chr> <int> <int>
#1 9E 1056 0
#2 AA 6059 0
#3 B6 905 0
#4 EV 2 0
#5 MQ 2276 0
#6 OO 1 0
#7 UA 6984 0
#8 WN 0 4113
沿着相同的路线,但更多的步骤来说明推理,即消除不必要的数据然后区分两个芝加哥地区的机场。
library(dplyr)
library(nycflights13)
flights %>%
filter(dest == "ORD" | dest == "MDW") %>%
group_by(carrier,dest) %>%
count() %>%
mutate(to_ORD = ifelse(dest == "ORD", n,0),
to_MDW = ifelse(dest == "MDW",n,0)) %>%
select(-dest,-n)
#> Adding missing grouping variables: `dest`
#> # A tibble: 8 x 4
#> # Groups: carrier, dest [8]
#> dest carrier to_ORD to_MDW
#> <chr> <chr> <dbl> <dbl>
#> 1 ORD 9E 1056 0
#> 2 ORD AA 6059 0
#> 3 ORD B6 905 0
#> 4 ORD EV 2 0
#> 5 ORD MQ 2276 0
#> 6 ORD OO 1 0
#> 7 ORD UA 6984 0
#> 8 MDW WN 0 4113
我们可以使用pivot_wider
library(dplyr)
library(nycflights13)
library(tidyr)
flights %>%
select(carrier, dest) %>%
filter(dest %in% c("ORD", "MDW")) %>%
pivot_wider(names_from = dest, values_from = dest,
values_fn = length, values_fill = 0)
-输出
# A tibble: 8 x 3
# carrier ORD MDW
# <chr> <int> <int>
#1 UA 6984 0
#2 AA 6059 0
#3 MQ 2276 0
#4 B6 905 0
#5 WN 0 4113
#6 9E 1056 0
#7 OO 1 0
#8 EV 2 0
或将 base R
与 table
和 subset
table(subset(flights, dest %in% c("ORD", "MDW"), select = c(carrier, dest)))