仅使用 dplyr 跨 3 列应用条件时如何填充是和否?
How to populate Yes and No when applying conditions accros 3 columns only with dplyr?
我正在尝试创建一个新列,比如测试,其中包含基于 3 列的多个条件。我正在尝试仅使用 tidyverse 来实现这一目标。这是我的条件:
- 如果我在一列中有
Yes
,而不管其他类别(No/Unknown/NA
)在相同的列中:previous_cabg, previous_pci, previous_ami
然后在测试变量中分配 Yes
- 如果我在同一 ID 的所有列中都有
No
,则为测试变量分配 NO
- 如果我在一列中有
NO
并且在其他列中有相同 ID 的 NA/Unknown
然后在测试变量中分配 No
- 如果我在同一 ID 的所有列中都有
Yes
,则在测试变量中分配 Yes
- if I have
Yes in one column and
NA/Unknownfor the same id in each column then assign
Yes`in test variable
这是我拥有的数据集类型:
structure(list(id = c(112139L, 43919L, 92430L, 87137L, 95417L,
66955L, 16293L, 61396L, 25379L, 79229L, 27107L, 63243L, 50627L,
17968L, 83015L, 96549L, 7332L, 4873L, 98131L, 93506L, 52894L,
59327L, 85003L, 96623L, 82999L, 65769L, 67063L, 21744L, 62961L,
2229L, 103673L, 9367L, 60215L, 74044L, 58422L, 57530L, 100399L,
46483L, 108690L, 62017L, 46467L, 79562L, 4800L, 119158L, 103222L,
32908L, 14491L, 30293L, 52558L, 122304L, 42281L, 1553L, 111771L,
23087L, 30147L, 37842L, 51552L, 20148L, 28L, 7477L), previous_cabg = structure(c(1L,
1L, 1L, NA, 1L, NA, NA, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, NA, 1L, 1L, NA, 1L, NA, 1L, 1L, 1L, 1L, 1L, NA, 1L, 1L, 3L,
1L, 1L, NA, 1L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, NA, 1L, 1L, 1L, 1L, 1L), .Label = c("No",
"Unknown", "Yes"), class = "factor"), previous_pci = structure(c(1L,
1L, 2L, NA, 1L, NA, NA, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
2L, NA, 2L, 1L, NA, 2L, NA, 1L, 2L, 1L, 1L, 1L, NA, 2L, 1L, 1L,
2L, 2L, NA, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, NA, 1L, 1L, 2L, 1L, 1L), .Label = c("No",
"Yes", "Unknown"), class = "factor"), previous_ami = structure(c(2L,
2L, 1L, 2L, 2L, NA, 2L, 1L, 2L, 2L, NA, 1L, 2L, 2L, 2L, 2L, 2L,
1L, NA, 1L, 2L, NA, 1L, NA, 2L, 1L, 2L, 2L, 2L, NA, 1L, 1L, 1L,
2L, 1L, NA, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 3L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, NA, 2L, 2L, 2L, 1L, 2L), .Label = c("Yes",
"No", "Unknown"), class = "factor")), row.names = c(NA, -60L), problems = structure(list(
row = c(34136L, 121773L, 121779L), col = c("1.01 Hospital identifier",
"1.01 Hospital identifier", "1.01 Hospital identifier"),
expected = c("value in level set", "value in level set",
"value in level set"), actual = c("CMH", "CMH", "CMH"), file = c("'../../data/changed/minap_2020_2021_second.csv'",
"'../../data/changed/minap_2020_2021_second.csv'", "'../../data/changed/minap_2020_2021_second.csv'"
)), row.names = c(NA, -3L), class = c("tbl_df", "tbl", "data.frame"
)), class = c("tbl_df", "tbl", "data.frame"))
这是它的样子,但只有前 10 行,如果你仔细看,我在 3 列中有不同的匹配组
# A tibble: 60 x 4
id previous_cabg previous_pci previous_ami
<int> <fct> <fct> <fct>
1 112139 No No No
2 43919 No No No
3 92430 No Yes Yes
4 87137 NA NA No
5 95417 No No No
6 66955 NA NA NA
7 16293 NA NA No
8 61396 No Yes Yes
9 25379 No Yes No
10 79229 No No No
我希望仅使用 tidyverse 或混合使用 tidyverse 和 r base 来解决这个问题。
这是我试过的方法,但感觉不太明智。我认为这是不明智的,因为这段代码将成为自动化过程的一部分,如果我得到其他类别,而不是 Yes and No
,比如 Unknown
,因为它随后出现在下一个数据集提取中,那么我希望该代码将避免我上面给出的条件中的所有其他情况。
dplyr::mutate(first_attack =
dplyr::case_when(previous_cabg == 'No' | previous_pci == 'No' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'No' | previous_pci == 'Yes' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'No' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'Yes' | previous_ami == 'No' ~ 'Yes',
previous_cabg == 'No' | previous_pci == 'No' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'No' | previous_pci == 'Yes' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'No' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'Yes' | previous_ami == 'No' ~ 'Yes'
# deal with the unknown category
previous_cabg == 'Unknown' | previous_pci == 'Yes' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'Unknown' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'Yes' | previous_ami == 'No' ~ 'Yes',
previous_cabg == 'Unknown' | previous_pci == 'Unknown' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Unknown' | previous_pci == 'Yes' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'Unknown' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'Yes' | previous_ami == 'Unknown' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'No' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'No' | previous_ami == 'No' ~ 'Yes',
previous_cabg == 'No' | previous_pci == 'No' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'No' | previous_pci == 'Yes' | previous_ami == 'No' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'Unknown' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'Unknown' | previous_ami == 'Unknown' ~ 'Yes',
previous_cabg == 'Unknown' | previous_pci == 'Unknown' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Unknown' | previous_pci == 'Yes' | previous_ami == 'Unknown' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'Unknown' | previous_ami == 'Unknown' ~ 'Yes',
previous_cabg == 'Unknown' | previous_pci == 'Yes'| previous_ami == 'Unknown' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'No' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Unknown' | previous_pci == 'Yes'| previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'No' | previous_ami == 'No' ~ 'Yes',
previous_cabg == 'No' | previous_pci == 'Yes'| previous_ami == 'No' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'No' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'No' | previous_pci == 'Yes'| previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'Unknown' | previous_ami == 'Unknown' ~ 'Yes',
previous_cabg == 'Unknown' | previous_pci == 'Yes'| previous_ami == 'Unknown' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'Unknown' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Unknown' | previous_pci == 'Yes'| previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'No' | previous_pci == 'No' | previous_ami == 'No' ~ 'No',
previous_cabg == 'Yes' | previous_pci == 'Yes' | previous_ami == 'Yes' ~'Yes'
))
所以综上所述,您的条件是:
- 对于每一行,如果任何一列是'Yes',输出'Yes'
- 对于每一行,如果所有列都是 NA,则输出 NA
- 对于每一行,如果所有列都是'Unknown',输出'Unknown'
- 否则输出'No'
如果是这种情况,你可以这样做:
# Convert your data structure into a data.frame
dat <- as.data.frame(dat)
# Remove id col
id <- dat$id
dat <- subset(dat, select = -c(id))
# For each row, check if there is a 'Yes' under any column. If so, return 'Yes'; otherwise return 'No'
output <- apply(dat, 1, function(x) ifelse('Yes' %in% x, 'Yes', 'No'))
# For each row, check if NA under all column. If so, return TRUE; otherwise return FALSE.
isNA <- apply(dat, 1, function(x) ifelse(all(is.na(x)), TRUE, FALSE))
# Now merge output and isNA
output[isNA] <- NA
# For each row, check if 'Unknown' under all column. If so, return TRUE; otherwise return FALSE.
isUK <- apply(dat, 1, function(x) ifelse(all('Unknown' == x), TRUE, FALSE))
# Now merge output and isUK
output[isUK] <- 'Unknown'
# Append the output character vector to a new col of the data frame
dat$id <- id
dat$test <- output
这些操作是 rowwise()
,所以它们不是很有效,但是 tidyverse
中的这个解决方案应该干净地实现你想要的。
让我们用名称 dataset
来称呼您的示例数据集。然后是下面的工作流程
library(tidyverse)
# ...
# Code to generate your 'dataset'.
# ...
# Define custom logic across a single row.
get_first_attack <- function(values_across_row) {
# "Yes" overrides all other values.
if(isTRUE(any(values_across_row == "Yes"))){
return("Yes")
}
# "No" overrides all missing values: 'NA' and "Unknown".
else if(isTRUE(any(values_across_row == "No"))) {
return("No")
}
# "Unknown" overrides all other missing values: 'NA'.
else if(isTRUE(any(values_across_row == "Unknown"))) {
return("Unknown")
}
# All values are missing: 'NA'.
else {
return(as.character(NA))
}
}
dataset %>%
# Examine row by row.
dplyr::rowwise() %>%
# Compare values across each row according to the logic in 'get_first_attack()'.
dplyr::mutate(first_attack = get_first_attack(across(previous_cabg:previous_ami))) %>%
# Exit row-wise approach, to restore efficiency.
dplyr::ungroup() %>%
# Factor 'first_attack' exactly like its neighboring column.
dplyr::mutate(first_attack = factor(first_attack, levels = levels(previous_ami)))
应该给你这些结果
# A tibble: 60 x 5
id previous_cabg previous_pci previous_ami first_attack
<int> <fct> <fct> <fct> <fct>
1 112139 No No No No
2 43919 No No No No
3 92430 No Yes Yes Yes
4 87137 NA NA No No
5 95417 No No No No
6 66955 NA NA NA NA
7 16293 NA NA No No
8 61396 No Yes Yes Yes
9 25379 No Yes No Yes
10 79229 No No No No
# ... with 50 more rows
其中 first_attack
列被恰当地定义为具有三个级别的 factor
:"Yes"
、"No"
和 "Unknown"
。
我正在尝试创建一个新列,比如测试,其中包含基于 3 列的多个条件。我正在尝试仅使用 tidyverse 来实现这一目标。这是我的条件:
- 如果我在一列中有
Yes
,而不管其他类别(No/Unknown/NA
)在相同的列中:previous_cabg, previous_pci, previous_ami
然后在测试变量中分配Yes
- 如果我在同一 ID 的所有列中都有
No
,则为测试变量分配 NO - 如果我在一列中有
NO
并且在其他列中有相同 ID 的NA/Unknown
然后在测试变量中分配No
- 如果我在同一 ID 的所有列中都有
Yes
,则在测试变量中分配Yes
- if I have
Yes in one column and
NA/Unknownfor the same id in each column then assign
Yes`in test variable
这是我拥有的数据集类型:
structure(list(id = c(112139L, 43919L, 92430L, 87137L, 95417L,
66955L, 16293L, 61396L, 25379L, 79229L, 27107L, 63243L, 50627L,
17968L, 83015L, 96549L, 7332L, 4873L, 98131L, 93506L, 52894L,
59327L, 85003L, 96623L, 82999L, 65769L, 67063L, 21744L, 62961L,
2229L, 103673L, 9367L, 60215L, 74044L, 58422L, 57530L, 100399L,
46483L, 108690L, 62017L, 46467L, 79562L, 4800L, 119158L, 103222L,
32908L, 14491L, 30293L, 52558L, 122304L, 42281L, 1553L, 111771L,
23087L, 30147L, 37842L, 51552L, 20148L, 28L, 7477L), previous_cabg = structure(c(1L,
1L, 1L, NA, 1L, NA, NA, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, NA, 1L, 1L, NA, 1L, NA, 1L, 1L, 1L, 1L, 1L, NA, 1L, 1L, 3L,
1L, 1L, NA, 1L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, NA, 1L, 1L, 1L, 1L, 1L), .Label = c("No",
"Unknown", "Yes"), class = "factor"), previous_pci = structure(c(1L,
1L, 2L, NA, 1L, NA, NA, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
2L, NA, 2L, 1L, NA, 2L, NA, 1L, 2L, 1L, 1L, 1L, NA, 2L, 1L, 1L,
2L, 2L, NA, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, NA, 1L, 1L, 2L, 1L, 1L), .Label = c("No",
"Yes", "Unknown"), class = "factor"), previous_ami = structure(c(2L,
2L, 1L, 2L, 2L, NA, 2L, 1L, 2L, 2L, NA, 1L, 2L, 2L, 2L, 2L, 2L,
1L, NA, 1L, 2L, NA, 1L, NA, 2L, 1L, 2L, 2L, 2L, NA, 1L, 1L, 1L,
2L, 1L, NA, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 3L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, NA, 2L, 2L, 2L, 1L, 2L), .Label = c("Yes",
"No", "Unknown"), class = "factor")), row.names = c(NA, -60L), problems = structure(list(
row = c(34136L, 121773L, 121779L), col = c("1.01 Hospital identifier",
"1.01 Hospital identifier", "1.01 Hospital identifier"),
expected = c("value in level set", "value in level set",
"value in level set"), actual = c("CMH", "CMH", "CMH"), file = c("'../../data/changed/minap_2020_2021_second.csv'",
"'../../data/changed/minap_2020_2021_second.csv'", "'../../data/changed/minap_2020_2021_second.csv'"
)), row.names = c(NA, -3L), class = c("tbl_df", "tbl", "data.frame"
)), class = c("tbl_df", "tbl", "data.frame"))
这是它的样子,但只有前 10 行,如果你仔细看,我在 3 列中有不同的匹配组
# A tibble: 60 x 4
id previous_cabg previous_pci previous_ami
<int> <fct> <fct> <fct>
1 112139 No No No
2 43919 No No No
3 92430 No Yes Yes
4 87137 NA NA No
5 95417 No No No
6 66955 NA NA NA
7 16293 NA NA No
8 61396 No Yes Yes
9 25379 No Yes No
10 79229 No No No
我希望仅使用 tidyverse 或混合使用 tidyverse 和 r base 来解决这个问题。
这是我试过的方法,但感觉不太明智。我认为这是不明智的,因为这段代码将成为自动化过程的一部分,如果我得到其他类别,而不是 Yes and No
,比如 Unknown
,因为它随后出现在下一个数据集提取中,那么我希望该代码将避免我上面给出的条件中的所有其他情况。
dplyr::mutate(first_attack =
dplyr::case_when(previous_cabg == 'No' | previous_pci == 'No' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'No' | previous_pci == 'Yes' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'No' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'Yes' | previous_ami == 'No' ~ 'Yes',
previous_cabg == 'No' | previous_pci == 'No' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'No' | previous_pci == 'Yes' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'No' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'Yes' | previous_ami == 'No' ~ 'Yes'
# deal with the unknown category
previous_cabg == 'Unknown' | previous_pci == 'Yes' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'Unknown' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'Yes' | previous_ami == 'No' ~ 'Yes',
previous_cabg == 'Unknown' | previous_pci == 'Unknown' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Unknown' | previous_pci == 'Yes' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'Unknown' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'Yes' | previous_ami == 'Unknown' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'No' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'No' | previous_ami == 'No' ~ 'Yes',
previous_cabg == 'No' | previous_pci == 'No' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'No' | previous_pci == 'Yes' | previous_ami == 'No' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'Unknown' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'Unknown' | previous_ami == 'Unknown' ~ 'Yes',
previous_cabg == 'Unknown' | previous_pci == 'Unknown' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Unknown' | previous_pci == 'Yes' | previous_ami == 'Unknown' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'Unknown' | previous_ami == 'Unknown' ~ 'Yes',
previous_cabg == 'Unknown' | previous_pci == 'Yes'| previous_ami == 'Unknown' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'No' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Unknown' | previous_pci == 'Yes'| previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'No' | previous_ami == 'No' ~ 'Yes',
previous_cabg == 'No' | previous_pci == 'Yes'| previous_ami == 'No' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'No' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'No' | previous_pci == 'Yes'| previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'Unknown' | previous_ami == 'Unknown' ~ 'Yes',
previous_cabg == 'Unknown' | previous_pci == 'Yes'| previous_ami == 'Unknown' ~ 'Yes',
previous_cabg == 'Yes' | previous_pci == 'Unknown' | previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'Unknown' | previous_pci == 'Yes'| previous_ami == 'Yes' ~ 'Yes',
previous_cabg == 'No' | previous_pci == 'No' | previous_ami == 'No' ~ 'No',
previous_cabg == 'Yes' | previous_pci == 'Yes' | previous_ami == 'Yes' ~'Yes'
))
所以综上所述,您的条件是:
- 对于每一行,如果任何一列是'Yes',输出'Yes'
- 对于每一行,如果所有列都是 NA,则输出 NA
- 对于每一行,如果所有列都是'Unknown',输出'Unknown'
- 否则输出'No'
如果是这种情况,你可以这样做:
# Convert your data structure into a data.frame
dat <- as.data.frame(dat)
# Remove id col
id <- dat$id
dat <- subset(dat, select = -c(id))
# For each row, check if there is a 'Yes' under any column. If so, return 'Yes'; otherwise return 'No'
output <- apply(dat, 1, function(x) ifelse('Yes' %in% x, 'Yes', 'No'))
# For each row, check if NA under all column. If so, return TRUE; otherwise return FALSE.
isNA <- apply(dat, 1, function(x) ifelse(all(is.na(x)), TRUE, FALSE))
# Now merge output and isNA
output[isNA] <- NA
# For each row, check if 'Unknown' under all column. If so, return TRUE; otherwise return FALSE.
isUK <- apply(dat, 1, function(x) ifelse(all('Unknown' == x), TRUE, FALSE))
# Now merge output and isUK
output[isUK] <- 'Unknown'
# Append the output character vector to a new col of the data frame
dat$id <- id
dat$test <- output
这些操作是 rowwise()
,所以它们不是很有效,但是 tidyverse
中的这个解决方案应该干净地实现你想要的。
让我们用名称 dataset
来称呼您的示例数据集。然后是下面的工作流程
library(tidyverse)
# ...
# Code to generate your 'dataset'.
# ...
# Define custom logic across a single row.
get_first_attack <- function(values_across_row) {
# "Yes" overrides all other values.
if(isTRUE(any(values_across_row == "Yes"))){
return("Yes")
}
# "No" overrides all missing values: 'NA' and "Unknown".
else if(isTRUE(any(values_across_row == "No"))) {
return("No")
}
# "Unknown" overrides all other missing values: 'NA'.
else if(isTRUE(any(values_across_row == "Unknown"))) {
return("Unknown")
}
# All values are missing: 'NA'.
else {
return(as.character(NA))
}
}
dataset %>%
# Examine row by row.
dplyr::rowwise() %>%
# Compare values across each row according to the logic in 'get_first_attack()'.
dplyr::mutate(first_attack = get_first_attack(across(previous_cabg:previous_ami))) %>%
# Exit row-wise approach, to restore efficiency.
dplyr::ungroup() %>%
# Factor 'first_attack' exactly like its neighboring column.
dplyr::mutate(first_attack = factor(first_attack, levels = levels(previous_ami)))
应该给你这些结果
# A tibble: 60 x 5
id previous_cabg previous_pci previous_ami first_attack
<int> <fct> <fct> <fct> <fct>
1 112139 No No No No
2 43919 No No No No
3 92430 No Yes Yes Yes
4 87137 NA NA No No
5 95417 No No No No
6 66955 NA NA NA NA
7 16293 NA NA No No
8 61396 No Yes Yes Yes
9 25379 No Yes No Yes
10 79229 No No No No
# ... with 50 more rows
其中 first_attack
列被恰当地定义为具有三个级别的 factor
:"Yes"
、"No"
和 "Unknown"
。