多个条件下的子集
Subsetting under multiple conditions
我想要 return 在 Season
Winter1 和 Winter2 中看到的 Transmitter
代码的数量。答案应该是 6(在 Winter1 和 Winter2 中看到的 6 个不同的代码)。但是下面的命令returns 0:
length(unique(Dispersion[(Dispersion$Season == "Winter1") & (Dispersion$Season == "Winter2"),]$Transmitter))
什么命令适合我的问题?
structure(list(Transmitter = c("A69-1602-59814", "A69-1602-59814",
"A69-1602-59815", "A69-1602-59815", "A69-1602-59819", "A69-1602-59820",
"A69-1602-59821", "A69-1602-59822", "A69-1602-59823", "A69-1602-59824",
"A69-1602-59825", "A69-1602-59826", "A69-1602-59826", "A69-1602-59827",
"A69-1602-59828", "A69-1602-59828", "A69-1602-59830", "A69-1602-59831",
"A69-1602-59831", "A69-1602-59832", "A69-1602-59833", "A69-1602-59834",
"A69-1602-59835", "A69-1602-59835", "A69-1602-59836"), Batch.location = c("Lemmer",
"Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer",
"Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer",
"Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer",
"Lemmer", "Lemmer", "Lemmer"), Location.Dispersion = c("Lemmer",
"Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer",
"Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer",
"Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer",
"Lemmer", "Lemmer", "Lemmer"), Season = c("Winter1", "Winter2",
"Winter1", "Winter2", "Winter1", "Winter1", "Winter1", "Winter1",
"Winter1", "Winter1", "Winter1", "Winter1", "Winter2", "Winter1",
"Winter1", "Winter2", "Winter1", "Winter1", "Winter2", "Winter1",
"Winter1", "Winter1", "Winter1", "Winter2", "Winter1"), Freq = c(1961L,
2075L, 310L, 1L, 2880L, 305L, 366L, 834L, 19L, 2580L, 564L, 997L,
3475L, 6447L, 988L, 2991L, 355L, 3147L, 6155L, 903L, 484L, 321L,
76L, 1921L, 3329L)), row.names = c(NA, -25L), groups = structure(list(
Transmitter = c("A69-1602-59814", "A69-1602-59815", "A69-1602-59819",
"A69-1602-59820", "A69-1602-59821", "A69-1602-59822", "A69-1602-59823",
"A69-1602-59824", "A69-1602-59825", "A69-1602-59826", "A69-1602-59827",
"A69-1602-59828", "A69-1602-59830", "A69-1602-59831", "A69-1602-59832",
"A69-1602-59833", "A69-1602-59834", "A69-1602-59835", "A69-1602-59836"
), Batch.location = c("Lemmer", "Lemmer", "Lemmer", "Lemmer",
"Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer",
"Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer",
"Lemmer", "Lemmer", "Lemmer"), Location.Dispersion = c("Lemmer",
"Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer",
"Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer",
"Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer"
), .rows = structure(list(1:2, 3:4, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12:13, 14L, 15:16, 17L, 18:19, 20L, 21L, 22L, 23:24,
25L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -19L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
您需要按 Transmitter
分组(您尝试中遗漏)并确保两个值都在 Season
.
的每组中
dplyr
library(dplyr)
out <- dat %>%
group_by(Transmitter) %>%
filter(all(c("Winter1", "Winter2") %in% Season)) %>%
ungroup()
out
# # A tibble: 12 x 5
# Transmitter Batch.location Location.Dispersion Season Freq
# <chr> <chr> <chr> <chr> <int>
# 1 A69-1602-59814 Lemmer Lemmer Winter1 1961
# 2 A69-1602-59814 Lemmer Lemmer Winter2 2075
# 3 A69-1602-59815 Lemmer Lemmer Winter1 310
# 4 A69-1602-59815 Lemmer Lemmer Winter2 1
# 5 A69-1602-59826 Lemmer Lemmer Winter1 997
# 6 A69-1602-59826 Lemmer Lemmer Winter2 3475
# 7 A69-1602-59828 Lemmer Lemmer Winter1 988
# 8 A69-1602-59828 Lemmer Lemmer Winter2 2991
# 9 A69-1602-59831 Lemmer Lemmer Winter1 3147
# 10 A69-1602-59831 Lemmer Lemmer Winter2 6155
# 11 A69-1602-59835 Lemmer Lemmer Winter1 76
# 12 A69-1602-59835 Lemmer Lemmer Winter2 1921
从这里您可以使用 n_distinct
或其他东西来计算您需要的唯一 Transmitter
值。
summarize(out, n = n_distinct(Transmitter))
# # A tibble: 1 x 1
# n
# <int>
# 1 6
或者只是
length(unique(out$Transmitter))
# [1] 6
基础 R,选项 1
ind <- ave(dat$Season, dat$Transmitter,
FUN = function(z) all(c("Winter1", "Winter2") %in% z)) == "TRUE"
ind
# [1] TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE FALSE TRUE TRUE FALSE TRUE TRUE FALSE
# [21] FALSE FALSE TRUE TRUE FALSE
dat[ind,]
# ...
length(unique(dat[ind, "Transmitter"]))
# [1] 6
== "TRUE"
使用 字符 "TRUE"
是因为 ave
强制 return 值相同 class作为它的第一个参数,即dat$Season
。它在内部计算 logical
但随后被强制转换为字符串。 (只需 运行 ave(..)
而不 ==...
即可看到实际效果。)
基础 R,选项 2
sum(aggregate(Season ~ Transmitter, data = dat,
FUN = function(z) all(c("Winter1", "Winter2") %in% z))$Season)
# [1] 6
split
按季节,然后使用 intersect
和 length
。
with(dat,
do.call(\(...) intersect(...), unname(as.list(split(Transmitter, Season))))
) |> length()
# [1] 6
或使用 table
并计算 rowSums
等于二的行数。
with(dat, table(Transmitter, Season)) |>
(\(x) x[rowSums(x) == length(unique(dat$Season)), ])() |>
nrow()
# [1] 6
(Dispersion$Season == "Winter1") & (Dispersion$Season == "Winter2")
正在寻找 Season
在同一行(同时)中同时存在 "Winter1"
和 "Winter2"
的行,这就是为什么不这样做的原因不工作。由于您使用的是 dplyr
,我会这样做:
Dispersion %>%
group_by(Transmitter) %>%
filter(all(c("Winter1", "Winter2") %in% Season)) %>%
ungroup() %>%
summarize(n_trans = n_distinct(Transmitter))
# # A tibble: 1 × 1
# n_trans
# <int>
# 1 6
另一个base
解决方案:
sum(by(dat$Season, dat$Transmitter, FUN = \(x) { all(unique(dat$Season) %in% x) }))
# [1] 6
我想要 return 在 Season
Winter1 和 Winter2 中看到的 Transmitter
代码的数量。答案应该是 6(在 Winter1 和 Winter2 中看到的 6 个不同的代码)。但是下面的命令returns 0:
length(unique(Dispersion[(Dispersion$Season == "Winter1") & (Dispersion$Season == "Winter2"),]$Transmitter))
什么命令适合我的问题?
structure(list(Transmitter = c("A69-1602-59814", "A69-1602-59814",
"A69-1602-59815", "A69-1602-59815", "A69-1602-59819", "A69-1602-59820",
"A69-1602-59821", "A69-1602-59822", "A69-1602-59823", "A69-1602-59824",
"A69-1602-59825", "A69-1602-59826", "A69-1602-59826", "A69-1602-59827",
"A69-1602-59828", "A69-1602-59828", "A69-1602-59830", "A69-1602-59831",
"A69-1602-59831", "A69-1602-59832", "A69-1602-59833", "A69-1602-59834",
"A69-1602-59835", "A69-1602-59835", "A69-1602-59836"), Batch.location = c("Lemmer",
"Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer",
"Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer",
"Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer",
"Lemmer", "Lemmer", "Lemmer"), Location.Dispersion = c("Lemmer",
"Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer",
"Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer",
"Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer",
"Lemmer", "Lemmer", "Lemmer"), Season = c("Winter1", "Winter2",
"Winter1", "Winter2", "Winter1", "Winter1", "Winter1", "Winter1",
"Winter1", "Winter1", "Winter1", "Winter1", "Winter2", "Winter1",
"Winter1", "Winter2", "Winter1", "Winter1", "Winter2", "Winter1",
"Winter1", "Winter1", "Winter1", "Winter2", "Winter1"), Freq = c(1961L,
2075L, 310L, 1L, 2880L, 305L, 366L, 834L, 19L, 2580L, 564L, 997L,
3475L, 6447L, 988L, 2991L, 355L, 3147L, 6155L, 903L, 484L, 321L,
76L, 1921L, 3329L)), row.names = c(NA, -25L), groups = structure(list(
Transmitter = c("A69-1602-59814", "A69-1602-59815", "A69-1602-59819",
"A69-1602-59820", "A69-1602-59821", "A69-1602-59822", "A69-1602-59823",
"A69-1602-59824", "A69-1602-59825", "A69-1602-59826", "A69-1602-59827",
"A69-1602-59828", "A69-1602-59830", "A69-1602-59831", "A69-1602-59832",
"A69-1602-59833", "A69-1602-59834", "A69-1602-59835", "A69-1602-59836"
), Batch.location = c("Lemmer", "Lemmer", "Lemmer", "Lemmer",
"Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer",
"Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer",
"Lemmer", "Lemmer", "Lemmer"), Location.Dispersion = c("Lemmer",
"Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer",
"Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer",
"Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer", "Lemmer"
), .rows = structure(list(1:2, 3:4, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12:13, 14L, 15:16, 17L, 18:19, 20L, 21L, 22L, 23:24,
25L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -19L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
您需要按 Transmitter
分组(您尝试中遗漏)并确保两个值都在 Season
.
dplyr
library(dplyr)
out <- dat %>%
group_by(Transmitter) %>%
filter(all(c("Winter1", "Winter2") %in% Season)) %>%
ungroup()
out
# # A tibble: 12 x 5
# Transmitter Batch.location Location.Dispersion Season Freq
# <chr> <chr> <chr> <chr> <int>
# 1 A69-1602-59814 Lemmer Lemmer Winter1 1961
# 2 A69-1602-59814 Lemmer Lemmer Winter2 2075
# 3 A69-1602-59815 Lemmer Lemmer Winter1 310
# 4 A69-1602-59815 Lemmer Lemmer Winter2 1
# 5 A69-1602-59826 Lemmer Lemmer Winter1 997
# 6 A69-1602-59826 Lemmer Lemmer Winter2 3475
# 7 A69-1602-59828 Lemmer Lemmer Winter1 988
# 8 A69-1602-59828 Lemmer Lemmer Winter2 2991
# 9 A69-1602-59831 Lemmer Lemmer Winter1 3147
# 10 A69-1602-59831 Lemmer Lemmer Winter2 6155
# 11 A69-1602-59835 Lemmer Lemmer Winter1 76
# 12 A69-1602-59835 Lemmer Lemmer Winter2 1921
从这里您可以使用 n_distinct
或其他东西来计算您需要的唯一 Transmitter
值。
summarize(out, n = n_distinct(Transmitter))
# # A tibble: 1 x 1
# n
# <int>
# 1 6
或者只是
length(unique(out$Transmitter))
# [1] 6
基础 R,选项 1
ind <- ave(dat$Season, dat$Transmitter,
FUN = function(z) all(c("Winter1", "Winter2") %in% z)) == "TRUE"
ind
# [1] TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE FALSE TRUE TRUE FALSE TRUE TRUE FALSE
# [21] FALSE FALSE TRUE TRUE FALSE
dat[ind,]
# ...
length(unique(dat[ind, "Transmitter"]))
# [1] 6
== "TRUE"
使用 字符 "TRUE"
是因为 ave
强制 return 值相同 class作为它的第一个参数,即dat$Season
。它在内部计算 logical
但随后被强制转换为字符串。 (只需 运行 ave(..)
而不 ==...
即可看到实际效果。)
基础 R,选项 2
sum(aggregate(Season ~ Transmitter, data = dat,
FUN = function(z) all(c("Winter1", "Winter2") %in% z))$Season)
# [1] 6
split
按季节,然后使用 intersect
和 length
。
with(dat,
do.call(\(...) intersect(...), unname(as.list(split(Transmitter, Season))))
) |> length()
# [1] 6
或使用 table
并计算 rowSums
等于二的行数。
with(dat, table(Transmitter, Season)) |>
(\(x) x[rowSums(x) == length(unique(dat$Season)), ])() |>
nrow()
# [1] 6
(Dispersion$Season == "Winter1") & (Dispersion$Season == "Winter2")
正在寻找 Season
在同一行(同时)中同时存在 "Winter1"
和 "Winter2"
的行,这就是为什么不这样做的原因不工作。由于您使用的是 dplyr
,我会这样做:
Dispersion %>%
group_by(Transmitter) %>%
filter(all(c("Winter1", "Winter2") %in% Season)) %>%
ungroup() %>%
summarize(n_trans = n_distinct(Transmitter))
# # A tibble: 1 × 1
# n_trans
# <int>
# 1 6
另一个base
解决方案:
sum(by(dat$Season, dat$Transmitter, FUN = \(x) { all(unique(dat$Season) %in% x) }))
# [1] 6