尝试根据另一列中字符的第一个单词使用casewhen在Rstudio中添加一列

Trying to add a column in Rstudio using casewhen based on the first word of a character in another column

背景不多,我正在进行一个食物选择实验,包括牛排、披萨、汉堡等不同类型的食物。我想找出哪个试验使用了哪种食物,以便我可以进行GLM 调查食物类型是否对食物选择有影响。

问题是我在数据中对这些类型的食物进行了不同的拼写,首字母是否大写(我知道 n00b 错误)。此外,我正在尝试从包含许多词的“crossCheckExperiment”列中提取 2 个可能的词。

这是我的数据集


    structure(list(session_id = c(53047, 53047, 53047, 53047, 53047,
    53047, 53047, 53047, 53047, 53047, 53047, 53047, 53047, 53047,
    53047, 53047, 53047, 53047, 53047, 53047), project_id = c(495,
    495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495,
    495, 495, 495, 495, 495, 495), exp_name = c("Vegan label and food choice",
    "Vegan label and food choice", "Vegan label and food choice",
    "Vegan label and food choice", "Vegan label and food choice",
    "Vegan label and food choice", "Vegan label and food choice",
    "Vegan label and food choice", "Vegan label and food choice",
    "Vegan label and food choice", "Vegan label and food choice",
    "Vegan label and food choice", "Vegan label and food choice",
    "Vegan label and food choice", "Vegan label and food choice",
    "Vegan label and food choice", "Vegan label and food choice",
    "Vegan label and food choice", "Vegan label and food choice",
    "Vegan label and food choice"), exp_id = c(569, 569, 569, 569,
    569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 569,
    569, 569, 569), user_id = c(46946, 46946, 46946, 46946, 46946,
    46946, 46946, 46946, 46946, 46946, 46946, 46946, 46946, 46946,
    46946, 46946, 46946, 46946, 46946, 46946), user_sex = c("male",
    "male", "male", "male", "male", "male", "male", "male", "male",
    "male", "male", "male", "male", "male", "male", "male", "male",
    "male", "male", "male"), user_status = c("guest", "guest", "guest",
    "guest", "guest", "guest", "guest", "guest", "guest", "guest",
    "guest", "guest", "guest", "guest", "guest", "guest", "guest",
    "guest", "guest", "guest"), user_age = c(21, 21, 21, 21, 21,
    21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21),
    trial_name = c("Steak_V_L_03_NV_NL_04", "Steak_V_L_01_NV_NL_02",
    "Chicken_V_NL_02_NV_L_01", "Sausage_V_L_01_NV_NL_02", "Curry_V_NL_06_NV_L_05",
    "Steak_NV_L_04_NV_NL_03", "Curry_V_NL_02_NV_L_01", "Pizza_V_NL_04_NV_L_03",
    "Pizza_V_L_05_NV_NL_06", "Steak_NV_L_02_NV_NL_01", "Burger_V_NL_04_NV_L_03",
    "Curry_NV_L_06_NV_NL_05", "Sausage_V_L_06_V_NL_05", "Steak_V_L_05_NV_NL_06",
    "Curry_V_L_01_NV_NL_02", "Burger_V_L_02_V_NL_01", "Steak_V_NL_02_NV_L_01",
    "Sausage_V_L_03_NV_NL_04", "Pizza_NV_L_06_NV_NL_05", "Pizza_NV_L_02_NV_NL_01"
    ), trial_n = c(29, 25, 50, 1, 46, 32, 38, 22, 69, 28, 14,
    48, 67, 33, 37, 11, 26, 5, 72, 20), order = c(1, 2, 3, 4,
    5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20
    ), dv = c(0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0,
    0, 0, 1, 1), rt = c(2054, 2417, 2007, 3003, 2391, 3238, 3316,
    2371, 4109, 3052, 3274, 3658, 2824, 1888, 4198, 5815, 2651,
    6591, 5766, 3682), side = c(1, 2, 2, 1, 1, 1, 1, 2, 2, 1,
    2, 1, 2, 1, 1, 2, 2, 1, 2, 2), dt = structure(c(1607441861,
    1607441863, 1607441865, 1607441868, 1607441871, 1607441874,
    1607441877, 1607441880, 1607441884, 1607441887, 1607441890,
    1607441894, 1607441896, 1607441898, 1607441903, 1607441909,
    1607441911, 1607441918, 1607441924, 1607441927), class = c("POSIXct",
    "POSIXt"), tzone = "UTC"), nreps = c(53047 = 144L, 53047 = 144L,
    53047 = 144L, 53047 = 144L, 53047 = 144L, 53047 = 144L,
    53047 = 144L, 53047 = 144L, 53047 = 144L, 53047 = 144L,
    53047 = 144L, 53047 = 144L, 53047 = 144L, 53047 = 144L,
    53047 = 144L, 53047 = 144L, 53047 = 144L, 53047 = 144L,
    53047 = 144L, 53047 = 144L), subjBelief_left = c("vegan",
    "vegan", "vegan", "vegan", "vegan", "no vegan", "vegan",
    "vegan", "vegan", "no vegan", "vegan", "no vegan", "vegan",
    "vegan", "vegan", "vegan", "vegan", "vegan", "no vegan",
    "no vegan"), subjBelief_right = c("no vegan", "no vegan",
    "no vegan", "vegan", "no vegan", "no vegan", "no vegan",
    "vegan", "no vegan", "no vegan", "no vegan", "no vegan",
    "vegan", "no vegan", "vegan", "vegan", "no vegan", "no vegan",
    "no vegan", "no vegan"), X1 = c("Vegan_steak_nolabel_3",
    "Vegan_steak_nolabel_1", "Vegan_chicken_nolabel_2", "Vegan_sausage_nolabel_1",
    "Vegan_curry_nolabel_6", "Nonvegan_steak_nolabel_4", "Vegan_curry_nolabel_2",
    "Vegan_pizza_nolabel_4", "Vegan_pizza_nolabel_5", "Nonvegan_steak_nolabel_2",
    "Vegan_burger_nolabel_4", "Nonvegan_curry_nolabel_6", "Vegan_sausage_nolabel_6",
    "Vegan_steak_nolabel_5", "Vegan_curry_nolabel_1", "Vegan_burger_nolabel_2",
    "Vegan_steak_nolabel_2", "Vegan_sausage_nolabel_3", "Nonvegan_pizza_nolabel_6",
    "Nonvegan_pizza_nolabel_2"), X2 = c("Nonvegan_steak_nolabel_4",
    "Nonvegan_steak_nolabel_2", "Nonvegan_chicken_nolabel_1",
    "Nonvegan_sausage_nolabel_2", "Nonvegan_curry_nolabel_5",
    "Nonvegan_steak_nolabel_3", "Nonvegan_curry_nolabel_1", "Nonvegan_pizza_nolabel_3",
    "Nonvegan_pizza_nolabel_6", "Nonvegan_steak_nolabel_1", "Nonvegan_burger_nolabel_3",
    "Nonvegan_curry_nolabel_5", "Vegan_sausage_nolabel_5", "Nonvegan_steak_nolabel_6",
    "Nonvegan_curry_nolabel_2", "Vegan_burger_nolabel_1", "Nonvegan_steak_nolabel_1",
    "Nonvegan_sausage_nolabel_4", "Nonvegan_pizza_nolabel_5",
    "Nonvegan_pizza_nolabel_1"), crossCheckExperiment = c("Steak_V_L_03_NV_NL_04",
    "Steak_V_L_01_NV_NL_02", "Chicken_V_NL_02_NV_L_01", "Sausage_V_L_01_NV_NL_02",
    "Curry_V_NL_06_NV_L_05", "Steak_NV_L_04_NV_NL_03", "Curry_V_NL_02_NV_L_01",
    "Pizza_V_NL_04_NV_L_03", "Pizza_V_L_05_NV_NL_06", "Steak_NV_L_02_NV_NL_01",
    "Burger_V_NL_04_NV_L_03", "Curry_NV_L_06_NV_NL_05", "Sausage_V_L_06_V_NL_05",
    "Steak_V_L_05_NV_NL_06", "Curry_V_L_01_NV_NL_02", "Burger_V_L_02_V_NL_01",
    "Steak_V_NL_02_NV_L_01", "Sausage_V_L_03_NV_NL_04", "Pizza_NV_L_06_NV_NL_05",
    "Pizza_NV_L_02_NV_NL_01"), checkSubjId = c(53047, 53047,
    53047, 53047, 53047, 53047, 53047, 53047, 53047, 53047, 53047,
    53047, 53047, 53047, 53047, 53047, 53047, 53047, 53047, 53047
    ), subjectVeganQuantification = c(5, 5, 5, 5, 5, 5, 5, 5,
    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), left_food = c("V", "V",
    "V", "V", "V", "NV", "V", "V", "V", "NV", "V", "NV", "V",
    "V", "V", "V", "V", "V", "NV", "NV"), left_label = c("L",
    "L", "NL", "L", "NL", "L", "NL", "NL", "L", "L", "NL", "L",
    "L", "L", "L", "L", "NL", "L", "L", "L"), right_food = c("NV",
    "NV", "NV", "NV", "NV", "NV", "NV", "NV", "NV", "NV", "NV",
    "NV", "V", "NV", "NV", "V", "NV", "NV", "NV", "NV"), right_label = c("NL",
    "NL", "L", "NL", "L", "NL", "L", "L", "NL", "NL", "L", "NL",
    "NL", "NL", "NL", "NL", "L", "NL", "NL", "NL"), dv_inv = c(1,
    1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0),
    appear_selected = c("no vegan", "no vegan", "no vegan", "vegan",
    "no vegan", "no vegan", "vegan", "vegan", "vegan", "no vegan",
    "no vegan", "no vegan", "vegan", "no vegan", "vegan", "vegan",
    "no vegan", "no vegan", "no vegan", "no vegan"), label_selected = c("NL",
    "NL", "L", "L", "L", "NL", "NL", "NL", "L", "NL", "L", "NL",
    "L", "NL", "NL", "NL", "L", "NL", "L", "L"), counter = c(1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
    dv_recoded = c(0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0,
    1, 1, 0, 0, 0, 0), cong = c(1, 1, 0, 0, 0, 0, 0, 0, 1, 0,
    0, 0, 0, 1, 0, 0, 0, 1, 0, 0), veg_cong = c(0, 0, 0, 1, 0,
    0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0), incong = c(0,
    0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0),
    noveg_cong = c(0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,
    0, 0, 0, 0, 1, 1), control_cong = c(1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), congVar = c("cong",
    "cong", "incong", "veg_cong", "incong", "noveg_cong", "incong",
    "veg_cong", "cong", "noveg_cong", "incong", "noveg_cong",
    "veg_cong", "cong", "veg_cong", "veg_cong", "incong", "cong",
    "noveg_cong", "noveg_cong"), cong2 = c(TRUE, TRUE, FALSE,
    FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE,
    FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE), veg_cong2 = c(FALSE,
    FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE,
    FALSE, FALSE, TRUE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE,
    FALSE), incong2 = c(FALSE, FALSE, TRUE, FALSE, TRUE, FALSE,
    TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE,
    FALSE, TRUE, FALSE, FALSE, FALSE), noveg_cong2 = c(FALSE,
    FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE,
    FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE,
    TRUE), fac = structure(c(1L, 1L, 3L, 2L, 3L, 4L, 3L, 2L,
    1L, 4L, 3L, 4L, 2L, 1L, 2L, 2L, 3L, 1L, 4L, 4L), .Label = c("cong",
    "veg_cong", "incong", "noveg_cong"), class = "factor"), trialType = c(NA,
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
    NA, NA, NA, NA)), row.names = c(NA, 20L), class = "data.frame")

这是我目前拥有的代码

  my_data_filt <- mutate(Group =
                      case_when(if (my_data_filt) crossCheckExperiment == 'Steak', 'steak' ~ "steak", 
                                if (my_data_filt) crossCheckExperiment == 'Burger', 'burger' ~ "burger",
                                if (my_data_filt) crossCheckExperiment == 'Chicken', 'chicken' ~ "chicken",
                                if (my_data_filt) crossCheckExperiment == 'Pizza', 'pizza' ~ "pizza",
                                if (my_data_filt) crossCheckExperiment == 'Sausage', 'sausage' ~ "sausage",
                                if (my_data_filt) crossCheckExperiment == 'Curry', 'curry' ~ "sausage")
)
  

但是,我一直收到此错误

Error in if (my_data_filt) crossCheckExperiment == "Steak" : 
  argument is not interpretable as logical


您的代码中存在多个问题。首先是您没有在 mutate() 的第一个参数中包含数据框。第二个是 case_when() 没有被正确使用。第三个是 == 仅在您寻找等于一个值的东西时才有效。如果您希望它等于多个值之一,请使用 %in% 运算符。

mutate(my_data_filt, 
       Group = case_when(crossCheckExperiment %in% c('Steak', 'steak') ~ "steak", 
                         crossCheckExperiment %in% c('Burger', 'burger') ~ "burger",
                         etc....))

如果变量中包含感兴趣的单词以及其他内容,stringr 中的 str_detect() 函数会很有帮助:

mutate(my_data_filt, 
           Group = case_when(str_detect(crossCheckExperiment, '(S|s)teak') ~ "steak", 
                             str_detect(crossCheckExperiment, '(B|b)urger') ~ "burger", 
                             etc....))