使用 data.table 而不是 pivot_longer
Use data.table instead of pivot_longer
我需要一些帮助来调整我生成的名为 adjusted
的输出。我的想法是以某种方式进行优化以更快地生成。请注意,我使用的是 pivot_longer
,这需要更长的时间。一种想法是继续使用 data.table
,就像我生成 SPV
时所做的那样。但是,在 adjusted
的这种情况下,我不知道该怎么做。你能帮帮我吗?
我想生成与问题中相同的输出 table。
library(dplyr)
library(tidyr)
library(lubridate)
library(data.table)
df1 <- structure(
list(date1= c("2021-06-28","2021-06-28","2021-06-28","2021-06-28","2021-06-28",
"2021-06-28","2021-06-28","2021-06-28"),
date2 = c("2021-06-25","2021-06-25","2021-06-27","2021-07-07","2021-07-07","2021-07-09","2021-07-09","2021-07-09"),
Code = c("FDE","ABC","ABC","ABC","CDE","FGE","ABC","CDE"),
Week= c("Wednesday","Wednesday","Friday","Wednesday","Wednesday","Friday","Friday","Friday"),
DR1 = c(4,1,4,3,3,4,3,5),
DR01 = c(4,1,4,3,3,4,3,6), DR02= c(4,2,6,7,3,2,7,4),DR03= c(9,5,4,3,3,2,1,5),
DR04 = c(5,4,3,3,6,2,1,9),DR05 = c(5,4,5,3,6,2,1,9),
DR06 = c(2,4,3,3,5,6,7,8),DR07 = c(2,5,4,4,9,4,7,8),
DR08 = c(0,0,0,1,2,0,0,0),DR09 = c(0,0,0,0,0,0,0,0),DR010 = c(0,0,0,0,0,0,0,0),DR011 = c(4,0,0,0,0,0,0,0),
DR012 = c(0,0,0,3,0,0,0,5),DR013 = c(0,0,1,0,0,0,2,0),DR014 = c(0,0,0,0,0,2,0,0)),
class = "data.frame", row.names = c(NA, -8L))
selection = startsWith(names(df1), "DRM")
df1[selection][is.na(df1[selection])] = 0
dt1 <- as.data.table(df1)
cols <- grep("^DR0", colnames(dt1), value = TRUE)
medi_ana <-
dt1[, (paste0(cols, "_PV")) := DR1 - .SD, .SDcols = cols
][, lapply(.SD, median), by = .(Code, Week), .SDcols = paste0(cols, "_PV") ]
f1 <- function(nm, pat) grep(pat, nm, value = TRUE)
nm1 <- f1(names(df1), "^DR0\d+$")
nm2 <- f1(names(medi_ana), "_PV")
nm3 <- paste0("i.", nm2)
setDT(df1)[medi_ana, (nm2) := Map(`+`, mget(nm1), mget(nm3)), on = .(Code, Week)]
SPV <- df1[, c('date1', 'date2', 'Code', 'Week', nm2), with = FALSE]
dmda<-"2021-07-09"
code<-"CDE"
adjusted<-SPV %>%
filter(date2==dmda,Code == code) %>%
group_by(Code) %>%
summarize(across(starts_with("DR0"), sum),.groups = 'drop') %>%
pivot_longer(cols= -Code, names_pattern = "DR0(.+)", values_to = "val") %>%
mutate(name = readr::parse_number(name))
> adjusted
# A tibble: 14 x 3
Code name val
<chr> <dbl> <dbl>
1 CDE 1 5
2 CDE 2 5
3 CDE 3 5
4 CDE 4 5
5 CDE 5 5
6 CDE 6 5
7 CDE 7 5
8 CDE 8 5
9 CDE 9 5
10 CDE 10 5
11 CDE 11 5
12 CDE 12 5
13 CDE 13 5
14 CDE 14 5
使用data.table
,过滤和分组后使用melt
library(data.table)
melt(SPV[date2 == dmda & Code == code][,
lapply(.SD, sum, na.rm = TRUE), by = Code,
.SDcols = patterns("^DR0")],
id.var = "Code", variable.name = "name", value.name = "val")[,
name := readr::parse_number(as.character(name))][]
-输出
Code name val
<char> <num> <num>
1: CDE 1 5
2: CDE 2 5
3: CDE 3 5
4: CDE 4 5
5: CDE 5 5
6: CDE 6 5
7: CDE 7 5
8: CDE 8 5
9: CDE 9 5
10: CDE 10 5
11: CDE 11 5
12: CDE 12 5
13: CDE 13 5
14: CDE 14 5
或者使用R链(|>
)
SPV[date2 == dmda & Code == code] |>
{\(.) .[, lapply(.SD, sum, na.rm = TRUE), by = Code,
.SDcols = patterns("^DR0")]}() |>
melt(id.var = "Code", variable.name = "name", value.name = "val") |>
{\(.) .[, name := readr::parse_number(as.character(name))][]}()
我需要一些帮助来调整我生成的名为 adjusted
的输出。我的想法是以某种方式进行优化以更快地生成。请注意,我使用的是 pivot_longer
,这需要更长的时间。一种想法是继续使用 data.table
,就像我生成 SPV
时所做的那样。但是,在 adjusted
的这种情况下,我不知道该怎么做。你能帮帮我吗?
我想生成与问题中相同的输出 table。
library(dplyr)
library(tidyr)
library(lubridate)
library(data.table)
df1 <- structure(
list(date1= c("2021-06-28","2021-06-28","2021-06-28","2021-06-28","2021-06-28",
"2021-06-28","2021-06-28","2021-06-28"),
date2 = c("2021-06-25","2021-06-25","2021-06-27","2021-07-07","2021-07-07","2021-07-09","2021-07-09","2021-07-09"),
Code = c("FDE","ABC","ABC","ABC","CDE","FGE","ABC","CDE"),
Week= c("Wednesday","Wednesday","Friday","Wednesday","Wednesday","Friday","Friday","Friday"),
DR1 = c(4,1,4,3,3,4,3,5),
DR01 = c(4,1,4,3,3,4,3,6), DR02= c(4,2,6,7,3,2,7,4),DR03= c(9,5,4,3,3,2,1,5),
DR04 = c(5,4,3,3,6,2,1,9),DR05 = c(5,4,5,3,6,2,1,9),
DR06 = c(2,4,3,3,5,6,7,8),DR07 = c(2,5,4,4,9,4,7,8),
DR08 = c(0,0,0,1,2,0,0,0),DR09 = c(0,0,0,0,0,0,0,0),DR010 = c(0,0,0,0,0,0,0,0),DR011 = c(4,0,0,0,0,0,0,0),
DR012 = c(0,0,0,3,0,0,0,5),DR013 = c(0,0,1,0,0,0,2,0),DR014 = c(0,0,0,0,0,2,0,0)),
class = "data.frame", row.names = c(NA, -8L))
selection = startsWith(names(df1), "DRM")
df1[selection][is.na(df1[selection])] = 0
dt1 <- as.data.table(df1)
cols <- grep("^DR0", colnames(dt1), value = TRUE)
medi_ana <-
dt1[, (paste0(cols, "_PV")) := DR1 - .SD, .SDcols = cols
][, lapply(.SD, median), by = .(Code, Week), .SDcols = paste0(cols, "_PV") ]
f1 <- function(nm, pat) grep(pat, nm, value = TRUE)
nm1 <- f1(names(df1), "^DR0\d+$")
nm2 <- f1(names(medi_ana), "_PV")
nm3 <- paste0("i.", nm2)
setDT(df1)[medi_ana, (nm2) := Map(`+`, mget(nm1), mget(nm3)), on = .(Code, Week)]
SPV <- df1[, c('date1', 'date2', 'Code', 'Week', nm2), with = FALSE]
dmda<-"2021-07-09"
code<-"CDE"
adjusted<-SPV %>%
filter(date2==dmda,Code == code) %>%
group_by(Code) %>%
summarize(across(starts_with("DR0"), sum),.groups = 'drop') %>%
pivot_longer(cols= -Code, names_pattern = "DR0(.+)", values_to = "val") %>%
mutate(name = readr::parse_number(name))
> adjusted
# A tibble: 14 x 3
Code name val
<chr> <dbl> <dbl>
1 CDE 1 5
2 CDE 2 5
3 CDE 3 5
4 CDE 4 5
5 CDE 5 5
6 CDE 6 5
7 CDE 7 5
8 CDE 8 5
9 CDE 9 5
10 CDE 10 5
11 CDE 11 5
12 CDE 12 5
13 CDE 13 5
14 CDE 14 5
使用data.table
,过滤和分组后使用melt
library(data.table)
melt(SPV[date2 == dmda & Code == code][,
lapply(.SD, sum, na.rm = TRUE), by = Code,
.SDcols = patterns("^DR0")],
id.var = "Code", variable.name = "name", value.name = "val")[,
name := readr::parse_number(as.character(name))][]
-输出
Code name val
<char> <num> <num>
1: CDE 1 5
2: CDE 2 5
3: CDE 3 5
4: CDE 4 5
5: CDE 5 5
6: CDE 6 5
7: CDE 7 5
8: CDE 8 5
9: CDE 9 5
10: CDE 10 5
11: CDE 11 5
12: CDE 12 5
13: CDE 13 5
14: CDE 14 5
或者使用R链(|>
)
SPV[date2 == dmda & Code == code] |>
{\(.) .[, lapply(.SD, sum, na.rm = TRUE), by = Code,
.SDcols = patterns("^DR0")]}() |>
melt(id.var = "Code", variable.name = "name", value.name = "val") |>
{\(.) .[, name := readr::parse_number(as.character(name))][]}()