r 将二进制响应折叠为单一响应
r collapse binary response to single response
这是我的数据集
State Response
AL NO
AR NO
AK NO
AZ NO
CA YES
CA NO
CO YES
CO NO
FL NO
GA NO
有些州既是又是否,例如 CA、CO.。我如何折叠每个州的是、否值并将是、否更改为仅是。这是我的预期输出
State Response
AL NO
AR NO
AK NO
AZ NO
CA YES
CO YES
FL NO
GA NO
非常感谢任何建议。谢谢
我们可以 arrange
'State' 的行和 'Response' 的逻辑向量,然后 filter
唯一的行
library(dplyr)
df1 %>%
arrange(State, Response != "YES") %>%
filter(!duplicated(State))
-输出
State Response
1 AK NO
2 AL NO
3 AR NO
4 AZ NO
5 CA YES
6 CO YES
7 FL NO
8 GA NO
数据
df1 <- structure(list(State = c("AL", "AR", "AK", "AZ", "CA", "CA",
"CO", "CO", "FL", "GA"), Response = c("NO", "NO", "NO", "NO",
"YES", "NO", "YES", "NO", "NO", "NO")), class = "data.frame", row.names = c(NA,
-10L))
我建议 distinct()
(.keep_all = TRUE
保留所有列,但删除重复的行)来自 dplyr
。例如:
State <- c("AL", "AR", "AK", "AZ", "CA", "CA", "CO", "CO", "FL", "GA")
Response <- c("NO", "NO", "NO", "NO", "YES", "NO", "YES", "NO", "NO", "NO")
data <- bind_cols(
State, Response
) %>%
rename(
State = `...1`,
Response = `...2`
) %>%
distinct(State, .keep_all= TRUE)
data
输出:
> data
# A tibble: 8 × 2
State Response
<chr> <chr>
1 AL NO
2 AR NO
3 AK NO
4 AZ NO
5 CA YES
6 CO YES
7 FL NO
8 GA NO
另一种解决方案:
library(dplyr)
df %>%
count(State) %>%
mutate(Response = ifelse(n>1,"YES","NO")) %>%
select(-n)
已编辑:
library(dplyr)
df %>%
count(State) %>%
mutate(Response = ifelse(n>1,"YES",NA)) %>%
replace(is.na(.),"NO") %>%
select(-n)
已编辑 2 - 决赛:
library(dplyr)
df %>%
add_count(State) %>%
mutate(Response = ifelse(n>1,"YES",Response)) %>%
distinct(select(.,-n))
如果 any
状态中的值为“YES”,您可以 return “YES”。
library(dplyr)
df %>%
group_by(State) %>%
summarise(Response = if(any(Response == 'YES')) 'YES' else 'NO')
# State Response
# <chr> <chr>
#1 AK NO
#2 AL NO
#3 AR NO
#4 AZ NO
#5 CA YES
#6 CO YES
#7 FL NO
#8 GA NO
因为 "YES" > "NO"
在这种情况下您可以使用 max
。
df %>% group_by(State) %>% summarise(Response = max(Response))
或以 R 为基数 -
aggregate(Response~State, df, max)
这是我的数据集
State Response
AL NO
AR NO
AK NO
AZ NO
CA YES
CA NO
CO YES
CO NO
FL NO
GA NO
有些州既是又是否,例如 CA、CO.。我如何折叠每个州的是、否值并将是、否更改为仅是。这是我的预期输出
State Response
AL NO
AR NO
AK NO
AZ NO
CA YES
CO YES
FL NO
GA NO
非常感谢任何建议。谢谢
我们可以 arrange
'State' 的行和 'Response' 的逻辑向量,然后 filter
唯一的行
library(dplyr)
df1 %>%
arrange(State, Response != "YES") %>%
filter(!duplicated(State))
-输出
State Response
1 AK NO
2 AL NO
3 AR NO
4 AZ NO
5 CA YES
6 CO YES
7 FL NO
8 GA NO
数据
df1 <- structure(list(State = c("AL", "AR", "AK", "AZ", "CA", "CA",
"CO", "CO", "FL", "GA"), Response = c("NO", "NO", "NO", "NO",
"YES", "NO", "YES", "NO", "NO", "NO")), class = "data.frame", row.names = c(NA,
-10L))
我建议 distinct()
(.keep_all = TRUE
保留所有列,但删除重复的行)来自 dplyr
。例如:
State <- c("AL", "AR", "AK", "AZ", "CA", "CA", "CO", "CO", "FL", "GA")
Response <- c("NO", "NO", "NO", "NO", "YES", "NO", "YES", "NO", "NO", "NO")
data <- bind_cols(
State, Response
) %>%
rename(
State = `...1`,
Response = `...2`
) %>%
distinct(State, .keep_all= TRUE)
data
输出:
> data
# A tibble: 8 × 2
State Response
<chr> <chr>
1 AL NO
2 AR NO
3 AK NO
4 AZ NO
5 CA YES
6 CO YES
7 FL NO
8 GA NO
另一种解决方案:
library(dplyr)
df %>%
count(State) %>%
mutate(Response = ifelse(n>1,"YES","NO")) %>%
select(-n)
已编辑:
library(dplyr)
df %>%
count(State) %>%
mutate(Response = ifelse(n>1,"YES",NA)) %>%
replace(is.na(.),"NO") %>%
select(-n)
已编辑 2 - 决赛:
library(dplyr)
df %>%
add_count(State) %>%
mutate(Response = ifelse(n>1,"YES",Response)) %>%
distinct(select(.,-n))
如果 any
状态中的值为“YES”,您可以 return “YES”。
library(dplyr)
df %>%
group_by(State) %>%
summarise(Response = if(any(Response == 'YES')) 'YES' else 'NO')
# State Response
# <chr> <chr>
#1 AK NO
#2 AL NO
#3 AR NO
#4 AZ NO
#5 CA YES
#6 CO YES
#7 FL NO
#8 GA NO
因为 "YES" > "NO"
在这种情况下您可以使用 max
。
df %>% group_by(State) %>% summarise(Response = max(Response))
或以 R 为基数 -
aggregate(Response~State, df, max)