R:检测频率
R: Detect frequency
我必须在 R 中创建一个函数(或循环)来检测超频繁。
检测超频的要求是180天内来3次,如果满足这个要求,这个人就会超频,不仅是以后,而且在他没有达到超频要求的过去访问中还有。
pacient <- c(10,10,10,10,10,11,11,12,12,12,13, 13, 15, 14); pacient
date <- as.Date(c("01/01/2018","02/05/2018", "04/06/2018", "10/11/2018", "05/12/2018", "02/01/2018", "06/08/2018", "01/01/2018", "03/01/2018", "06/03/2018", "05/08/2018", "05/08/2019", "05/07/2019", "08/07/2017"), format = "%d/%m/%Y"); date
DF <- data.frame(pacient, date); DF
count_visit <- function(x){
DF <- data.table(DF)
DTord<-DF[with(DF , order(DF $ date)), ]; DTord
DTord[,num_visit := order(date), by = pacient];DTord
DTordID <- DTord[with(DTord, order(DTord$pacient)), ]; DTordID
DTordID[,max_visit := max(num_visit), by = pacient];DTordID
framedatos <- as.data.frame(DTordID)
return(framedatos)}
REUP_visit <- count_visit(DF); head(REUP_visit)
pacient date num_visit max_visit
10 01/01/2018 1 5
10 02/05/2018 2 5
10 04/06/2018 3 5
10 10/11/2018 4 5
10 05/12/2018 5 5
11 02/01/2018 1 2
11 06/08/2018 2 2
12 01/01/2018 1 3
12 03/01/2018 2 3
12 06/03/2018 3 3
13 05/08/2018 1 2
13 05/08/2019 2 2
14 08/07/2017 1 1
15 05/07/2019 1 1
到目前为止,我只设法创建了一个函数,告诉我每位患者的就诊次数和患者的最大就诊次数(这是我需要的其他东西):
pacient date num_visit max_visit days_visit <180 future_hyperf past_hyperf
10 01/01/2018 1 5 0 1 no yes
10 02/05/2018 2 5 121 2 no yes
10 04/06/2018 3 5 33 3 yes yes
10 10/11/2018 4 5 159 4 yes yes
10 05/12/2018 5 5 25 5 yes yes
11 02/01/2018 1 2 0 1 no no
11 06/08/2018 2 2 216 1 no no
12 01/01/2018 1 3 0 1 no yes
12 03/01/2018 2 3 2 2 no yes
12 06/03/2018 3 3 62 3 yes yes
13 05/08/2018 1 2 0 1 no no
13 05/08/2019 2 2 365 1 no no
14 08/07/2017 1 1 0 1 no no
15 05/07/2019 1 1 0 1 no no
我需要的输出具有:"day_visit"、“<180”、"future_hyperf" 和 "past_hyperf"。
变量"day_visit"的objective是将患者第一次到急诊室就诊的时间编号为0,然后统计两次就诊之间的天数。
DF <- DF %>%
group_by(pacient) %>%
arrange(date) %>%
mutate(days_visit= date - lag(date, default = first(date)))
变量“<180”将是这样的变量,第一次访问时数字为 1,第二次访问时为 2(如果与上次访问相比 <180 天),3(如果与上次访问相比 <180 天)之前的访问)等等。例如,如果患者达到2,第三次就诊未满足<180天,则需要重新输入1(将重新开始循环)。
变量"future_hyperf"表示是或否。它被标记为一旦患者在变量 <180 中达到 3 就创造了未来,如果访问晚于 180 天并且不遵守则无关紧要。一旦达到标准,就是永远。
变量"past_hyperf"将所有在变量"future_hyperf"中有if的患者本身也转换为过去
谢谢!
解决方案
DF3 <- DF %>%
arrange(pacient, date) %>%
group_by(pacient) %>%
mutate(days_visit = as.integer(date - lag(date, default = first(date))) ,
less_180 = days_visit < 180) %>%
mutate(counter = rowid(pacient, cumsum(date - shift(date, fill=first(date)) > 180)),
future_hyperf = case_when(counter >= 3 ~ "yes",
TRUE ~ "no"),
past_hyperf = case_when(max(counter, na.rm = T) >= 3 ~ "yes",
TRUE ~ "no"))
DF3 <- DF3[with(DF3,order(pacient,date)),]
这是我的做法。解释在注释中。
library(tidyverse)
DF %>%
group_by(pacient) %>% # group the data by "pacient"
mutate(lag_date = lag(date, n = 2)) %>% # create the variable of lag dates by 2 visits
mutate(date_diff = as.integer(date - lag_date)) %>% # Calculate the difference in dates
mutate(date_diff = case_when(is.na(date_diff) ~ 9999L, # replace NAs with 999 (cummin does not allow na.rm)
TRUE ~ date_diff)) %>% #
mutate(min_period = cummin(date_diff)) %>% # calculate the cumulative minimum of the differencce
mutate(future_hyperf = min_period < 180) %>% # check the cumulative min is less than 180
mutate(past_hyperf = min(min_period) < 180) %>%
ungroup()
## # A tibble: 14 x 7
## pacient date lag_date date_diff min_period future_hyperf past_hyperf
## <dbl> <date> <date> <int> <int> <lgl> <lgl>
## 1 10 2018-01-01 NA 9999 9999 FALSE TRUE
## 2 10 2018-05-02 NA 9999 9999 FALSE TRUE
## 3 10 2018-06-04 2018-01-01 154 154 TRUE TRUE
## 4 10 2018-11-10 2018-05-02 192 154 TRUE TRUE
## 5 10 2018-12-05 2018-06-04 184 154 TRUE TRUE
## 6 11 2018-01-02 NA 9999 9999 FALSE FALSE
## 7 11 2018-08-06 NA 9999 9999 FALSE FALSE
## 8 12 2018-01-01 NA 9999 9999 FALSE TRUE
## 9 12 2018-01-03 NA 9999 9999 FALSE TRUE
## 10 12 2018-03-06 2018-01-01 64 64 TRUE TRUE
## 11 13 2018-08-05 NA 9999 9999 FALSE FALSE
## 12 13 2019-08-05 NA 9999 9999 FALSE FALSE
## 13 15 2019-07-05 NA 9999 9999 FALSE FALSE
## 14 14 2017-07-08 NA 9999 9999 FALSE FALSE
试试这个:
pacient <- c(10, 10, 10, 10, 10, 11, 11, 12, 12, 12, 13, 13, 15, 14)
pacient
date <-
as.Date(
c(
"01/01/2018",
"02/05/2018",
"04/06/2018",
"10/11/2018",
"05/12/2018",
"02/01/2018",
"06/08/2018",
"01/01/2018",
"03/01/2018",
"06/03/2018",
"05/08/2018",
"05/08/2019",
"05/07/2019",
"08/07/2017"
),
format = "%d/%m/%Y"
)
date
DF <- data.frame(pacient, date)
DF
#packages
library(dplyr)
library(lubridate)
#time zone
lct <- Sys.getlocale("LC_TIME")
Sys.setlocale("LC_TIME", "C")
DF <- DF %>%
group_by(pacient) %>%
mutate(num_visit = cumsum(pacient) / pacient) %>% # number of visits
mutate(max_visit = max(num_visit)) %>% # max visit
mutate(days_visit = as.Date(date, "%d/%m/%Y") - lag(as.Date(date, "%d/%m/%Y"))) %>% # days between visits
mutate(minus_180_days = case_when(days_visit < 180 &
!is.na(days_visit) ~ num_visit,
TRUE ~ 1)) %>% # is days between visits < 180
mutate(future_hyperf = case_when(minus_180_days > 3 ~ "yes",
TRUE ~ "no")) %>% # future hyperf
mutate(past_hyperf = case_when(max(minus_180_days, na.rm = T) >= 3 ~ "yes",
TRUE ~ "no")) # past hyperf
希望对您有所帮助
我必须在 R 中创建一个函数(或循环)来检测超频繁。 检测超频的要求是180天内来3次,如果满足这个要求,这个人就会超频,不仅是以后,而且在他没有达到超频要求的过去访问中还有。
pacient <- c(10,10,10,10,10,11,11,12,12,12,13, 13, 15, 14); pacient
date <- as.Date(c("01/01/2018","02/05/2018", "04/06/2018", "10/11/2018", "05/12/2018", "02/01/2018", "06/08/2018", "01/01/2018", "03/01/2018", "06/03/2018", "05/08/2018", "05/08/2019", "05/07/2019", "08/07/2017"), format = "%d/%m/%Y"); date
DF <- data.frame(pacient, date); DF
count_visit <- function(x){
DF <- data.table(DF)
DTord<-DF[with(DF , order(DF $ date)), ]; DTord
DTord[,num_visit := order(date), by = pacient];DTord
DTordID <- DTord[with(DTord, order(DTord$pacient)), ]; DTordID
DTordID[,max_visit := max(num_visit), by = pacient];DTordID
framedatos <- as.data.frame(DTordID)
return(framedatos)}
REUP_visit <- count_visit(DF); head(REUP_visit)
pacient date num_visit max_visit
10 01/01/2018 1 5
10 02/05/2018 2 5
10 04/06/2018 3 5
10 10/11/2018 4 5
10 05/12/2018 5 5
11 02/01/2018 1 2
11 06/08/2018 2 2
12 01/01/2018 1 3
12 03/01/2018 2 3
12 06/03/2018 3 3
13 05/08/2018 1 2
13 05/08/2019 2 2
14 08/07/2017 1 1
15 05/07/2019 1 1
到目前为止,我只设法创建了一个函数,告诉我每位患者的就诊次数和患者的最大就诊次数(这是我需要的其他东西):
pacient date num_visit max_visit days_visit <180 future_hyperf past_hyperf
10 01/01/2018 1 5 0 1 no yes
10 02/05/2018 2 5 121 2 no yes
10 04/06/2018 3 5 33 3 yes yes
10 10/11/2018 4 5 159 4 yes yes
10 05/12/2018 5 5 25 5 yes yes
11 02/01/2018 1 2 0 1 no no
11 06/08/2018 2 2 216 1 no no
12 01/01/2018 1 3 0 1 no yes
12 03/01/2018 2 3 2 2 no yes
12 06/03/2018 3 3 62 3 yes yes
13 05/08/2018 1 2 0 1 no no
13 05/08/2019 2 2 365 1 no no
14 08/07/2017 1 1 0 1 no no
15 05/07/2019 1 1 0 1 no no
我需要的输出具有:"day_visit"、“<180”、"future_hyperf" 和 "past_hyperf"。
变量"day_visit"的objective是将患者第一次到急诊室就诊的时间编号为0,然后统计两次就诊之间的天数。
DF <- DF %>%
group_by(pacient) %>%
arrange(date) %>%
mutate(days_visit= date - lag(date, default = first(date)))
变量“<180”将是这样的变量,第一次访问时数字为 1,第二次访问时为 2(如果与上次访问相比 <180 天),3(如果与上次访问相比 <180 天)之前的访问)等等。例如,如果患者达到2,第三次就诊未满足<180天,则需要重新输入1(将重新开始循环)。
变量"future_hyperf"表示是或否。它被标记为一旦患者在变量 <180 中达到 3 就创造了未来,如果访问晚于 180 天并且不遵守则无关紧要。一旦达到标准,就是永远。
变量"past_hyperf"将所有在变量"future_hyperf"中有if的患者本身也转换为过去
谢谢!
解决方案
DF3 <- DF %>%
arrange(pacient, date) %>%
group_by(pacient) %>%
mutate(days_visit = as.integer(date - lag(date, default = first(date))) ,
less_180 = days_visit < 180) %>%
mutate(counter = rowid(pacient, cumsum(date - shift(date, fill=first(date)) > 180)),
future_hyperf = case_when(counter >= 3 ~ "yes",
TRUE ~ "no"),
past_hyperf = case_when(max(counter, na.rm = T) >= 3 ~ "yes",
TRUE ~ "no"))
DF3 <- DF3[with(DF3,order(pacient,date)),]
这是我的做法。解释在注释中。
library(tidyverse)
DF %>%
group_by(pacient) %>% # group the data by "pacient"
mutate(lag_date = lag(date, n = 2)) %>% # create the variable of lag dates by 2 visits
mutate(date_diff = as.integer(date - lag_date)) %>% # Calculate the difference in dates
mutate(date_diff = case_when(is.na(date_diff) ~ 9999L, # replace NAs with 999 (cummin does not allow na.rm)
TRUE ~ date_diff)) %>% #
mutate(min_period = cummin(date_diff)) %>% # calculate the cumulative minimum of the differencce
mutate(future_hyperf = min_period < 180) %>% # check the cumulative min is less than 180
mutate(past_hyperf = min(min_period) < 180) %>%
ungroup()
## # A tibble: 14 x 7
## pacient date lag_date date_diff min_period future_hyperf past_hyperf
## <dbl> <date> <date> <int> <int> <lgl> <lgl>
## 1 10 2018-01-01 NA 9999 9999 FALSE TRUE
## 2 10 2018-05-02 NA 9999 9999 FALSE TRUE
## 3 10 2018-06-04 2018-01-01 154 154 TRUE TRUE
## 4 10 2018-11-10 2018-05-02 192 154 TRUE TRUE
## 5 10 2018-12-05 2018-06-04 184 154 TRUE TRUE
## 6 11 2018-01-02 NA 9999 9999 FALSE FALSE
## 7 11 2018-08-06 NA 9999 9999 FALSE FALSE
## 8 12 2018-01-01 NA 9999 9999 FALSE TRUE
## 9 12 2018-01-03 NA 9999 9999 FALSE TRUE
## 10 12 2018-03-06 2018-01-01 64 64 TRUE TRUE
## 11 13 2018-08-05 NA 9999 9999 FALSE FALSE
## 12 13 2019-08-05 NA 9999 9999 FALSE FALSE
## 13 15 2019-07-05 NA 9999 9999 FALSE FALSE
## 14 14 2017-07-08 NA 9999 9999 FALSE FALSE
试试这个:
pacient <- c(10, 10, 10, 10, 10, 11, 11, 12, 12, 12, 13, 13, 15, 14)
pacient
date <-
as.Date(
c(
"01/01/2018",
"02/05/2018",
"04/06/2018",
"10/11/2018",
"05/12/2018",
"02/01/2018",
"06/08/2018",
"01/01/2018",
"03/01/2018",
"06/03/2018",
"05/08/2018",
"05/08/2019",
"05/07/2019",
"08/07/2017"
),
format = "%d/%m/%Y"
)
date
DF <- data.frame(pacient, date)
DF
#packages
library(dplyr)
library(lubridate)
#time zone
lct <- Sys.getlocale("LC_TIME")
Sys.setlocale("LC_TIME", "C")
DF <- DF %>%
group_by(pacient) %>%
mutate(num_visit = cumsum(pacient) / pacient) %>% # number of visits
mutate(max_visit = max(num_visit)) %>% # max visit
mutate(days_visit = as.Date(date, "%d/%m/%Y") - lag(as.Date(date, "%d/%m/%Y"))) %>% # days between visits
mutate(minus_180_days = case_when(days_visit < 180 &
!is.na(days_visit) ~ num_visit,
TRUE ~ 1)) %>% # is days between visits < 180
mutate(future_hyperf = case_when(minus_180_days > 3 ~ "yes",
TRUE ~ "no")) %>% # future hyperf
mutate(past_hyperf = case_when(max(minus_180_days, na.rm = T) >= 3 ~ "yes",
TRUE ~ "no")) # past hyperf
希望对您有所帮助