如果列采用特定值,则分配新变量

Assigning new variable if a column takes specific values

我正在尝试生成一个新变量来根据组标识符识别家庭中的 'single parents'。如果在没有 'Head' 和“配偶”的组中有一个 'Child',我希望变量取值 1。我尝试使用 dplyr 但无法得出解决方案.

relation<-c("Head","Spouse","Child","Head","Spouse","Head","Child")
group<-c(1,1,1,2,2,3,3)
my_data<-as.data.frame(cbind(group,relation))

my_data %>%
  group_by(group) %>%
  mutate(single_parent = case_when(relation %in% "Child" & !(relation %in% "Head" & relation %in% "Spouse")~1))

# desired output
my_data$single_parent<-c(0,0,0,0,0,1,1)

感谢您的帮助。

我们可以做到

library(dplyr)
my_data <- my_data %>% 
  group_by(group) %>% 
  mutate(single_parent =  +((!all(c("Head", "Spouse") %in% relation & 
     'Child' %in% relation)) & 'Child' %in% relation)) %>%
  ungroup

-输出

my_data
# A tibble: 7 × 3
  group relation single_parent
  <dbl> <chr>            <int>
1     1 Head                 0
2     1 Spouse               0
3     1 Child                0
4     2 Head                 0
5     2 Spouse               0
6     3 Head                 1
7     3 Child                1

数据

my_data <- data.frame(group, relation)

这是另一个 tidyverse 选项:

library(tidyverse)

my_data %>%
  group_by(group) %>%
  mutate(single_parent = ifelse(relation == "Child" & sum(n()) == 2, 1, NA)) %>% 
  fill(single_parent, .direction = "downup", 0) %>% 
  mutate(single_parent = replace_na(single_parent, 0))

或者使用 table 的基数 R 和 tidyverse 组合的另一种选择:

data.frame(group = unique(my_data$group), single_parent = +(table(my_data)[,1] == 1 & rowSums(table(my_data)[,-1]) == 1)) %>%
  left_join(my_data, ., by = "group")

输出

  group relation single_parent
  <chr> <chr>            <dbl>
1 1     Head                 0
2 1     Spouse               0
3 1     Child                0
4 2     Head                 0
5 2     Spouse               0
6 3     Head                 1
7 3     Child                1