如何计算每个 id 的重复事件
How to count repeated events per id
我有一个从 REDCap 中提取的数据框,我已经在 RStudio 中导入了 CSV。
列出了唯一的参与者 ID,然后是事件。我需要列出每个重复事件的数量,例如 acute_event_infect_arm_4 1010002 是第一个拒绝事件,然后 _4b 是第二个事件,_4c 是第三个,依此类推。
我需要为跟进、拒绝和 CMV/EBV 事件执行此操作
这是带有 3 个 id 的数据帧的小快照(实际 df 有 1000 个 id)
structure(list(id = c(1010002, 1010002, 1010002, 1010002, 1010002,
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002,
1010002, 1010002, 1010002, 1010002, 1010006, 1010006, 1010006,
1010006, 1010006, 1010006, 1010006, 1010006, 1010006, 1010006,
1010006, 1010008, 1010008, 1010008, 1010008, 1010008, 1010008,
1010008, 1010008, 1010008, 1010008, 1010008), redcap_event_name =
c("pre_transplant_arm_4",
"transplant_arm_4", "transplant_2_arm_4", "end_of_followup_fo_arm_4",
"last_encounter_arm_4", "acute_event_reject_arm_4",
"acute_event_reject_arm_4b",
"acute_event_infect_arm_4", "acute_event_infect_arm_4b",
"acute_event_infect_arm_4c",
"acute_event_infect_arm_4d", "acute_event_infect_arm_4e",
"acute_event_infect_arm_4f",
"acute_event_infect_arm_4g", "acute_event_cmvebv_arm_4",
"acute_event_cmvebv_arm_4b",
"pre_transplant_arm_4", "transplant_arm_4", "1_month_followup_arm_4",
"2_year_followup_arm_4", "last_encounter_arm_4", "acute_event_reject_arm_4",
"acute_event_reject_arm_4b", "acute_event_infect_arm_4",
"acute_event_infect_arm_4b",
"acute_event_infect_arm_4c", "acute_event_cmvebv_arm_4",
"pre_transplant_arm_4",
"transplant_arm_4", "3_month_followup_arm_4", "6_month_followup_arm_4",
"1_year_followup_arm_4", "2_year_followup_arm_4", "3_year_followup_arm_4",
"last_encounter_arm_4", "acute_event_reject_arm_4",
"acute_event_infect_arm_4",
"acute_event_cmvebv_arm_4")), row.names = c(NA, -38L), class = c("tbl_df",
"tbl", "data.frame"))
这是我需要在 redcap_repeat 列中添加的内容
@akrun 请在下面举一些例子(缺少红色粗体)
这是一个选项
library(tidyverse)
df1 %>%
group_by(id, grp1 = str_remove(redcap_event_name, "[a-z]$|^\d+_")) %>%
mutate(redcap_repeat =if(any(str_detect(redcap_event_name, "[a-z]$|^[0-9]")) &
n() > 1) as.character(row_number()) else "") %>%
ungroup %>%
group_by(id, grp1 = str_remove(redcap_event_name, "^\d+_(month|year)_")) %>%
mutate(redcap_repeat = case_when(redcap_repeat != "" & n() > 1 ~
as.character(row_number()),
TRUE ~ redcap_repeat)) %>%
ungroup %>%
select(-grp1) %>%
as.data.frame
-输出
# id redcap_event_name redcap_repeat
#1 1010002 pre_transplant_arm_4
#2 1010002 transplant_arm_4
#3 1010002 transplant_2_arm_4
#4 1010002 end_of_followup_fo_arm_4
#5 1010002 last_encounter_arm_4
#6 1010002 acute_event_reject_arm_4 1
#7 1010002 acute_event_reject_arm_4b 2
#8 1010002 acute_event_infect_arm_4 1
#9 1010002 acute_event_infect_arm_4b 2
#10 1010002 acute_event_infect_arm_4c 3
#11 1010002 acute_event_infect_arm_4d 4
#12 1010002 acute_event_infect_arm_4e 5
#13 1010002 acute_event_infect_arm_4f 6
#14 1010002 acute_event_infect_arm_4g 7
#15 1010002 acute_event_cmvebv_arm_4 1
#16 1010002 acute_event_cmvebv_arm_4b 2
#17 1010006 pre_transplant_arm_4
#18 1010006 transplant_arm_4
#19 1010006 1_month_followup_arm_4
#20 1010006 2_year_followup_arm_4
#21 1010006 last_encounter_arm_4
#22 1010006 acute_event_reject_arm_4 1
#23 1010006 acute_event_reject_arm_4b 2
#24 1010006 acute_event_infect_arm_4 1
#25 1010006 acute_event_infect_arm_4b 2
#26 1010006 acute_event_infect_arm_4c 3
#27 1010006 acute_event_cmvebv_arm_4
#28 1010008 pre_transplant_arm_4
#29 1010008 transplant_arm_4
#30 1010008 3_month_followup_arm_4 1
#31 1010008 6_month_followup_arm_4 2
#32 1010008 1_year_followup_arm_4 3
#33 1010008 2_year_followup_arm_4 4
#34 1010008 3_year_followup_arm_4 5
#35 1010008 last_encounter_arm_4
#36 1010008 acute_event_reject_arm_4
#37 1010008 acute_event_infect_arm_4
#38 1010008 acute_event_cmvebv_arm_4
我有一个从 REDCap 中提取的数据框,我已经在 RStudio 中导入了 CSV。 列出了唯一的参与者 ID,然后是事件。我需要列出每个重复事件的数量,例如 acute_event_infect_arm_4 1010002 是第一个拒绝事件,然后 _4b 是第二个事件,_4c 是第三个,依此类推。
我需要为跟进、拒绝和 CMV/EBV 事件执行此操作
这是带有 3 个 id 的数据帧的小快照(实际 df 有 1000 个 id)
structure(list(id = c(1010002, 1010002, 1010002, 1010002, 1010002,
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002,
1010002, 1010002, 1010002, 1010002, 1010006, 1010006, 1010006,
1010006, 1010006, 1010006, 1010006, 1010006, 1010006, 1010006,
1010006, 1010008, 1010008, 1010008, 1010008, 1010008, 1010008,
1010008, 1010008, 1010008, 1010008, 1010008), redcap_event_name =
c("pre_transplant_arm_4",
"transplant_arm_4", "transplant_2_arm_4", "end_of_followup_fo_arm_4",
"last_encounter_arm_4", "acute_event_reject_arm_4",
"acute_event_reject_arm_4b",
"acute_event_infect_arm_4", "acute_event_infect_arm_4b",
"acute_event_infect_arm_4c",
"acute_event_infect_arm_4d", "acute_event_infect_arm_4e",
"acute_event_infect_arm_4f",
"acute_event_infect_arm_4g", "acute_event_cmvebv_arm_4",
"acute_event_cmvebv_arm_4b",
"pre_transplant_arm_4", "transplant_arm_4", "1_month_followup_arm_4",
"2_year_followup_arm_4", "last_encounter_arm_4", "acute_event_reject_arm_4",
"acute_event_reject_arm_4b", "acute_event_infect_arm_4",
"acute_event_infect_arm_4b",
"acute_event_infect_arm_4c", "acute_event_cmvebv_arm_4",
"pre_transplant_arm_4",
"transplant_arm_4", "3_month_followup_arm_4", "6_month_followup_arm_4",
"1_year_followup_arm_4", "2_year_followup_arm_4", "3_year_followup_arm_4",
"last_encounter_arm_4", "acute_event_reject_arm_4",
"acute_event_infect_arm_4",
"acute_event_cmvebv_arm_4")), row.names = c(NA, -38L), class = c("tbl_df",
"tbl", "data.frame"))
这是我需要在 redcap_repeat 列中添加的内容
@akrun 请在下面举一些例子(缺少红色粗体)
这是一个选项
library(tidyverse)
df1 %>%
group_by(id, grp1 = str_remove(redcap_event_name, "[a-z]$|^\d+_")) %>%
mutate(redcap_repeat =if(any(str_detect(redcap_event_name, "[a-z]$|^[0-9]")) &
n() > 1) as.character(row_number()) else "") %>%
ungroup %>%
group_by(id, grp1 = str_remove(redcap_event_name, "^\d+_(month|year)_")) %>%
mutate(redcap_repeat = case_when(redcap_repeat != "" & n() > 1 ~
as.character(row_number()),
TRUE ~ redcap_repeat)) %>%
ungroup %>%
select(-grp1) %>%
as.data.frame
-输出
# id redcap_event_name redcap_repeat
#1 1010002 pre_transplant_arm_4
#2 1010002 transplant_arm_4
#3 1010002 transplant_2_arm_4
#4 1010002 end_of_followup_fo_arm_4
#5 1010002 last_encounter_arm_4
#6 1010002 acute_event_reject_arm_4 1
#7 1010002 acute_event_reject_arm_4b 2
#8 1010002 acute_event_infect_arm_4 1
#9 1010002 acute_event_infect_arm_4b 2
#10 1010002 acute_event_infect_arm_4c 3
#11 1010002 acute_event_infect_arm_4d 4
#12 1010002 acute_event_infect_arm_4e 5
#13 1010002 acute_event_infect_arm_4f 6
#14 1010002 acute_event_infect_arm_4g 7
#15 1010002 acute_event_cmvebv_arm_4 1
#16 1010002 acute_event_cmvebv_arm_4b 2
#17 1010006 pre_transplant_arm_4
#18 1010006 transplant_arm_4
#19 1010006 1_month_followup_arm_4
#20 1010006 2_year_followup_arm_4
#21 1010006 last_encounter_arm_4
#22 1010006 acute_event_reject_arm_4 1
#23 1010006 acute_event_reject_arm_4b 2
#24 1010006 acute_event_infect_arm_4 1
#25 1010006 acute_event_infect_arm_4b 2
#26 1010006 acute_event_infect_arm_4c 3
#27 1010006 acute_event_cmvebv_arm_4
#28 1010008 pre_transplant_arm_4
#29 1010008 transplant_arm_4
#30 1010008 3_month_followup_arm_4 1
#31 1010008 6_month_followup_arm_4 2
#32 1010008 1_year_followup_arm_4 3
#33 1010008 2_year_followup_arm_4 4
#34 1010008 3_year_followup_arm_4 5
#35 1010008 last_encounter_arm_4
#36 1010008 acute_event_reject_arm_4
#37 1010008 acute_event_infect_arm_4
#38 1010008 acute_event_cmvebv_arm_4