调整使用 data.table 函数的代码
Adjust code that uses data.table function
下面的代码使用 data.table
函数生成输出 table。但是,我想知道是否有可能以某种方式优化代码并仍然得到相同的结果?想法是减少代码以减少处理时间。
library(dplyr)
library(tidyr)
library(lubridate)
library(data.table)
df1 <- structure(
list(date1= c("2021-06-28","2021-06-28","2021-06-28","2021-06-28","2021-06-28",
"2021-06-28","2021-06-28","2021-06-28"),
date2 = c("2021-06-25","2021-06-25","2021-06-27","2021-07-07","2021-07-07","2021-07-09","2021-07-09","2021-07-09"),
Code = c("FDE","ABC","ABC","ABC","CDE","FGE","ABC","CDE"),
Week= c("Wednesday","Wednesday","Friday","Wednesday","Wednesday","Friday","Friday","Friday"),
DR1 = c(4,1,4,3,6,4,3,5),
DR01 = c(4,1,4,3,3,4,3,6), DR02= c(4,2,6,7,3,2,7,4),DR03= c(9,5,4,3,3,2,1,5),
DR04 = c(5,4,3,3,6,2,1,9),DR05 = c(5,4,5,3,6,2,1,9),
DR06 = c(2,4,3,3,5,6,7,8),DR07 = c(2,5,4,4,9,4,7,8),
DR08 = c(4,0,0,1,2,4,4,4),DR09 = c(2,5,4,4,9,4,7,8),DR010 = c(2,5,4,4,9,4,7,8),DR011 = c(4,7,3,2,2,7,7,7),
DR012 = c(4,4,2,3,0,4,4,5),DR013 = c(4,4,1,4,0,3,2,0),DR014 = c(0,3,1,2,0,2,NA,NA)),
class = "data.frame", row.names = c(NA, -8L))
selection = startsWith(names(df1), "DR0")
df1[selection][is.na(df1[selection])] = 0
dt1 <- as.data.table(df1)
cols <- grep("^DR0", colnames(dt1), value = TRUE)
medi_ana <-
dt1[, (paste0(cols, "_PV")) := DR1 - .SD, .SDcols = cols
][, lapply(.SD, median), by = .(Code, Week), .SDcols = paste0(cols, "_PV") ]
f1 <- function(nm, pat) grep(pat, nm, value = TRUE)
nm1 <- f1(names(df1), "^DR0\d+$")
nm2 <- f1(names(medi_ana), "_PV")
nm3 <- paste0("i.", nm2)
setDT(df1)[medi_ana, (nm2) := Map(`+`, mget(nm1), mget(nm3)), on = .(Code, Week)]
SPV1 <- df1[, c('date1', 'date2', 'Code', 'Week', nm2), with = FALSE]
dmda<-"2021-07-09"
code<-"CDE"
SPV2<-melt(SPV1[date2 == dmda & Code == code][,
lapply(.SD, sum, na.rm = TRUE), by = Code,
.SDcols = patterns("^DR0")],
id.var = "Code", variable.name = "name", value.name = "val")[,
name := readr::parse_number(as.character(name))][]
> SPV2
Code name val
1: CDE 1 5
2: CDE 2 5
3: CDE 3 5
4: CDE 4 5
5: CDE 5 5
6: CDE 6 5
7: CDE 7 5
8: CDE 8 5
9: CDE 9 5
10: CDE 10 5
11: CDE 11 5
12: CDE 12 5
13: CDE 13 5
14: CDE 14 5
result <- SPV2 %>%
group_by(Code) %>%
slice((as.Date(dmda) - min(as.Date(df1$date1) [
df1$Code == first(Code)])):max(name)+1) %>%
ungroup
> result
# A tibble: 3 x 3
Code name val
<chr> <dbl> <dbl>
1 CDE 12 5
2 CDE 13 5
3 CDE 14 5
dplyr
代码可以转换为data.table
如
SPV2[na.omit(SPV2[, .I[(as.Date(dmda) - min(as.Date(df1$date1) [
df1$Code == first(Code)])):max(name)+1], .(Code)]$V1)]
-输出
Code name val
<char> <num> <num>
1: CDE 12 5
2: CDE 13 5
3: CDE 14 5
下面的代码使用 data.table
函数生成输出 table。但是,我想知道是否有可能以某种方式优化代码并仍然得到相同的结果?想法是减少代码以减少处理时间。
library(dplyr)
library(tidyr)
library(lubridate)
library(data.table)
df1 <- structure(
list(date1= c("2021-06-28","2021-06-28","2021-06-28","2021-06-28","2021-06-28",
"2021-06-28","2021-06-28","2021-06-28"),
date2 = c("2021-06-25","2021-06-25","2021-06-27","2021-07-07","2021-07-07","2021-07-09","2021-07-09","2021-07-09"),
Code = c("FDE","ABC","ABC","ABC","CDE","FGE","ABC","CDE"),
Week= c("Wednesday","Wednesday","Friday","Wednesday","Wednesday","Friday","Friday","Friday"),
DR1 = c(4,1,4,3,6,4,3,5),
DR01 = c(4,1,4,3,3,4,3,6), DR02= c(4,2,6,7,3,2,7,4),DR03= c(9,5,4,3,3,2,1,5),
DR04 = c(5,4,3,3,6,2,1,9),DR05 = c(5,4,5,3,6,2,1,9),
DR06 = c(2,4,3,3,5,6,7,8),DR07 = c(2,5,4,4,9,4,7,8),
DR08 = c(4,0,0,1,2,4,4,4),DR09 = c(2,5,4,4,9,4,7,8),DR010 = c(2,5,4,4,9,4,7,8),DR011 = c(4,7,3,2,2,7,7,7),
DR012 = c(4,4,2,3,0,4,4,5),DR013 = c(4,4,1,4,0,3,2,0),DR014 = c(0,3,1,2,0,2,NA,NA)),
class = "data.frame", row.names = c(NA, -8L))
selection = startsWith(names(df1), "DR0")
df1[selection][is.na(df1[selection])] = 0
dt1 <- as.data.table(df1)
cols <- grep("^DR0", colnames(dt1), value = TRUE)
medi_ana <-
dt1[, (paste0(cols, "_PV")) := DR1 - .SD, .SDcols = cols
][, lapply(.SD, median), by = .(Code, Week), .SDcols = paste0(cols, "_PV") ]
f1 <- function(nm, pat) grep(pat, nm, value = TRUE)
nm1 <- f1(names(df1), "^DR0\d+$")
nm2 <- f1(names(medi_ana), "_PV")
nm3 <- paste0("i.", nm2)
setDT(df1)[medi_ana, (nm2) := Map(`+`, mget(nm1), mget(nm3)), on = .(Code, Week)]
SPV1 <- df1[, c('date1', 'date2', 'Code', 'Week', nm2), with = FALSE]
dmda<-"2021-07-09"
code<-"CDE"
SPV2<-melt(SPV1[date2 == dmda & Code == code][,
lapply(.SD, sum, na.rm = TRUE), by = Code,
.SDcols = patterns("^DR0")],
id.var = "Code", variable.name = "name", value.name = "val")[,
name := readr::parse_number(as.character(name))][]
> SPV2
Code name val
1: CDE 1 5
2: CDE 2 5
3: CDE 3 5
4: CDE 4 5
5: CDE 5 5
6: CDE 6 5
7: CDE 7 5
8: CDE 8 5
9: CDE 9 5
10: CDE 10 5
11: CDE 11 5
12: CDE 12 5
13: CDE 13 5
14: CDE 14 5
result <- SPV2 %>%
group_by(Code) %>%
slice((as.Date(dmda) - min(as.Date(df1$date1) [
df1$Code == first(Code)])):max(name)+1) %>%
ungroup
> result
# A tibble: 3 x 3
Code name val
<chr> <dbl> <dbl>
1 CDE 12 5
2 CDE 13 5
3 CDE 14 5
dplyr
代码可以转换为data.table
如
SPV2[na.omit(SPV2[, .I[(as.Date(dmda) - min(as.Date(df1$date1) [
df1$Code == first(Code)])):max(name)+1], .(Code)]$V1)]
-输出
Code name val
<char> <num> <num>
1: CDE 12 5
2: CDE 13 5
3: CDE 14 5