R 中死亡率指标的 7 天滚动平均值
7 day rolling average of mortality rate metric in R
我有一个医院患者的数据集,包括他们的入院日期和结果(死亡或出院)。 对于每一天,我想计算死亡率的 7 天滚动平均值。具体来说,我希望代码能够
a) 添加没有患者入院的缺失日期(到目前为止,我一直手动完成 --> 是在此处创建第二个数据集的最佳选择,left_join()?)
b) 对于每一天,确定在过去 7 天内入院的所有患者(因此过去 6 天加上手头的具体日期)
c) 根据结果计算死亡率(=(死亡患者数/所有患者数)对于这 7 天内入院的患者window
示例:如果我查看 4 月 8 日,我想知道这一天和之前六天收治的患者有哪些。然后我想让代码意识到该样本中有 x 个死亡患者和 y 个出院患者,并根据 x/x+y.
计算死亡率
之前我将患者分配到第 1 周、第 2 周、第 3 周等,并使用 dplyr 的 group_by() 和 summarize() 来计算每周的死亡率。现在,我必须做一个滚动平均数,但我不知道该怎么做。我使用 R 已经有一段时间了,但有时仍然觉得自己像个初学者 ://
这是一些数据:
structure(list(Summary = c("Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Dead", "Discharged", "Discharged",
"Discharged", "Dead", "Dead", "Dead", "Dead", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Dead", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Dead", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Dead", "Discharged",
"Dead", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Dead", "Discharged", "Discharged", "Dead", "Discharged",
"Discharged", "Discharged", "Dead", "Dead", "Discharged", "Discharged",
"Dead", "Discharged", "Dead", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Dead",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Dead", "Discharged",
"Dead", "Discharged", "Discharged", "Dead", "Discharged", "Discharged",
"Dead", "Dead", "Discharged", "Discharged", "Discharged", "Dead",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Dead", "Discharged", "Discharged", "Dead", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Dead", "Discharged",
"Dead", "Discharged", "Dead", "Dead", "Dead", "Discharged", "Discharged",
"Discharged", "Dead", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Dead", "Discharged", "Dead", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Dead", "Dead", "Discharged", "Discharged",
"Discharged", "Dead", "Discharged", "Dead", "Discharged", "Discharged",
"Dead", "Discharged", "Dead", "Discharged", "Discharged", "Dead",
"Discharged", "Discharged", "Discharged", "Dead", "Discharged",
"Dead", "Discharged", "Discharged", "Discharged", "Dead", "Discharged",
"Discharged", "Discharged", "Dead", "Discharged", "Dead", "Dead",
"Discharged", "Dead", "Dead", "Discharged", "Discharged", "Dead",
"Dead", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Dead", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Dead", "Discharged", "Dead", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Dead", "Discharged",
"Dead", "Discharged", "Dead", "Discharged", "Discharged", "Discharged",
"Dead", "Dead", "Dead", "Discharged", "Discharged", "Dead", "Dead",
"Discharged", "Discharged", "Discharged", "Dead", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Dead", "Dead", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Dead",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Dead", "Discharged",
"Discharged", "Dead", "Discharged", "Dead", "Discharged", "Dead",
"Dead", "Discharged", "Discharged", "Discharged", "Dead", "Dead",
"Discharged", "Dead", "Dead", "Discharged", "Dead", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Dead",
"Discharged", "Discharged", "Discharged", "Discharged", "Dead",
"Dead", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Dead", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Dead", "Discharged", "Discharged",
"Discharged", "Discharged", "Dead", "Dead", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Dead", "Discharged",
"Dead", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Dead",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Dead", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Dead",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Dead", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Dead", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Dead", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Dead", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged"), Admission = structure(c(1578586420.224,
1580147094.528, 1580467041.28, 1580482376.704, 1581576565.76,
1582319350.784, 1582387115.008, 1582472442.88, 1582575203.328,
1582861464.576, 1583030023.168, 1583160439.808, 1583170532.352,
1583178658.816, 1583234757.632, 1583471735.808, 1583610278.912,
1583645930.496, 1583763633.152, 1583833887.744, 1583926817.792,
1583934813.184, 1583949231.104, 1584037966.848, 1584093803.52,
1584104289.28, 1584196039.68, 1584211506.176, 1584287003.648,
1584290280.448, 1584340218.88, 1584449139.712, 1584453989.376,
1584522540.032, 1584586503.168, 1584596071.424, 1584615994.368,
1584620581.888, 1584627266.56, 1584636703.744, 1584643388.416,
1584646140.928, 1584688477.184, 1584701846.528, 1584715215.872,
1584737104.896, 1584792941.568, 1584803165.184, 1584817976.32,
1584874075.136, 1584892949.504, 1584953111.552, 1584959403.008,
1584962417.664, 1584966480.896, 1584990336, 1584991253.504, 1585006195.712,
1585032147.968, 1585044599.808, 1585052595.2, 1585057707.008,
1585059804.16, 1585066488.832, 1585080775.68, 1585082217.472,
1585105286.144, 1585108562.944, 1585133990.912, 1585140151.296,
1585142903.808, 1585142903.808, 1585150505.984, 1585161122.816,
1585168331.776, 1585171084.288, 1585219843.072, 1585237406.72,
1585245533.184, 1585254315.008, 1585267553.28, 1585269781.504,
1585279742.976, 1585301107.712, 1585376736.256, 1585391285.248,
1585393120.256, 1585396134.912, 1585397445.632, 1585399542.784,
1585412387.84, 1585413436.416, 1585422349.312, 1585427592.192,
1585432966.144, 1585435456.512, 1585464292.352, 1585485394.944,
1585505711.104, 1585509512.192, 1585516065.792, 1585518162.944,
1585524061.184, 1585567183.872, 1585567314.944, 1585573999.616,
1585578456.064, 1585611355.136, 1585615811.584, 1585641763.84,
1585643860.992, 1585646482.432, 1585660507.136, 1585684886.528,
1585686983.68, 1585689605.12, 1585729844.224, 1585737446.4, 1585740461.056,
1585748718.592, 1585751995.392, 1585776374.784, 1585778603.008,
1585780962.304, 1585788302.336, 1585819759.616, 1585836667.904,
1585852920.832, 1585863144.448, 1585873761.28, 1585930122.24,
1585931301.888, 1585942836.224, 1585956336.64, 1585997231.104,
1586003522.56, 1586007061.504, 1586008503.296, 1586018989.056,
1586020561.92, 1586092913.664, 1586094486.528, 1586094879.744,
1586095928.32, 1586099598.336, 1586107200.512, 1586107855.872,
1586109821.952, 1586109821.952, 1586121225.216, 1586162643.968,
1586171032.576, 1586182042.624, 1586182304.768, 1586185188.352,
1586193052.672, 1586198819.84, 1586205897.728, 1586207732.736,
1586264486.912, 1586268812.288, 1586273399.808, 1586273924.096,
1586274579.456, 1586275234.816, 1586281264.128, 1586288342.016,
1586290308.096, 1586293715.968, 1586337100.8, 1586339460.096,
1586343392.256, 1586366854.144, 1586370393.088, 1586376422.4,
1586379043.84, 1586379043.84, 1586384024.576, 1586393986.048,
1586426229.76, 1586433438.72, 1586452050.944, 1586465944.576,
1586473808.896, 1586513523.712, 1586517980.16, 1586523091.968,
1586527417.344, 1586531742.72, 1586542359.552, 1586544718.848,
1586555073.536, 1586595443.712, 1586597671.936, 1586609337.344,
1586614973.44, 1586617594.88, 1586630702.08, 1586636993.536,
1586670941.184, 1586681558.016, 1586685883.392, 1586703709.184,
1586711966.72, 1586722976.768, 1586776978.432, 1586779993.088,
1586886423.552, 1586891797.504, 1586897302.528, 1586924696.576,
1586924958.72, 1586937672.704, 1586973062.144, 1586982892.544,
1586985251.84, 1586988397.568, 1587024180.224, 1587024573.44,
1587046855.68, 1587075167.232, 1587135198.208, 1587137164.288,
1587138081.792, 1587142538.24, 1587146994.688, 1587169276.928,
1587203879.936, 1587232060.416, 1587255129.088, 1587257619.456,
1587266925.568, 1587277149.184, 1587283309.568, 1587305722.88,
1587308475.392, 1587330757.632, 1587377419.264, 1587379385.344,
1587383448.576, 1587397080.064, 1587409925.12, 1587413988.352,
1587425391.616, 1587442168.832, 1587475198.976, 1587475723.264,
1587497874.432, 1587510064.128, 1587542963.2, 1587559740.416,
1587577959.424, 1587582022.656, 1587590149.12, 1587596047.36,
1587634844.672, 1587657257.984, 1587672593.408, 1587711783.936,
1587761460.224, 1587766572.032, 1587788723.2, 1587831321.6, 1587844297.728,
1587864876.032, 1587896202.24, 1587915207.68, 1587916387.328,
1587919401.984, 1587925824.512, 1588001977.344, 1588007613.44,
1588010234.88, 1588014560.256, 1588028584.96, 1588058076.16,
1588084814.848, 1588089664.512, 1588105262.08, 1588155986.944,
1588159525.888, 1588177220.608, 1588243543.04, 1588252455.936,
1588274344.96, 1588274476.032, 1588275000.32, 1588275524.608,
1588291384.32, 1588348138.496, 1588393620.48, 1588433204.224,
1588433728.512, 1588447491.072, 1588448932.864, 1588461122.56,
1588509357.056, 1588511978.496, 1588518663.168, 1588519711.744,
1588596651.008, 1588601631.744, 1588608971.776, 1588611462.144,
1588625093.632, 1588627846.144, 1588696134.656, 1588699411.456,
1588700591.104, 1588777006.08, 1588777923.584, 1588791686.144,
1588792865.792, 1588797060.096, 1588855649.28, 1588868363.264,
1588874392.576, 1588875047.936, 1588882387.968, 1588932588.544,
1589023552.512, 1589039936.512, 1589078733.824, 1589081355.264,
1589126444.032, 1589214524.416, 1589230384.128, 1589265511.424,
1589275866.112, 1589290808.32, 1589292774.4, 1589314139.136,
1589404054.528, 1589473653.76, 1589476275.2, 1589492397.056,
1589495280.64, 1589561209.856, 1589603152.896, 1589627270.144,
1589632381.952, 1589799236.608, 1589806707.712, 1590004626.432,
1590227448.832, 1590234133.504, 1590398628.864, 1590489068.544,
1590494180.352, 1590555915.264, 1590707434.496, 1590771266.56,
1590934451.2, 1591010997.248, 1591383897.088, 1591444452.352,
1591984468.992, 1592504038.4, 1592553714.688, 1592841679.872,
1592929629.184, 1592951256.064, 1594128937.984, 1594499347.456,
1595402171.392, 1595711370.24, 1597937103.872, 1598717768.704,
1599060521.984, 1599758087.168, 1599815496.704, 1600702198.784,
1600719631.36, 1601065923.584, 1601119400.96, 1601215476.736,
1601236710.4, 1601416934.4, 1601499640.832, 1601587328, 1601741206.528,
1601848423.424, 1601901245.44, 1601913828.352, 1602092872.704,
1602285417.472, 1602362881.024, 1602504963.072, 1602518987.776,
1602551231.488, 1602782311.424, 1602783491.072, 1602785457.152,
1602851124.224, 1602856629.248, 1602964501.504, 1602974594.048,
1603078140.928), class = c("POSIXct", "POSIXt"), tzone = "UTC")), row.names = c(NA,
-398L), class = c("tbl_df", "tbl", "data.frame"))
这是一个data.table
方法。代码的解释在代码的注释中
library( data.table )
#set data to data.table format
setDT( mydata )
#summarise by day
DT <- mydata[, .(total = .N), by = .(date = as.IDate( Admission ), Summary ) ]
#cast to wide
DT <- dcast(DT, date ~ Summary, value.var = "total", fill = 0 )
#create final based on min/max data in DT
final <- data.table( date = seq( min( DT$date ), max( DT$date ), by = 1 ) )
#initialise columns with zero-value
final[, `:=`( Dead = 0, Discharged = 0 ) ]
#update column values based on date
final[ DT, `:=`( Dead = i.Dead, Discharged = i.Discharged ), on = .(date) ][]
#calculate rolling 7 day mortality-rate
final[, mortality.rate := frollsum( Dead, n = 7 ) / ( frollsum( (Dead + Discharged), n = 7 ) ) ]
这里有一个 tidyverse 方法。我也在使用 zoo
包。
正在计算每天的入院人数和死亡人数
library(tidyverse)
summaries <- df %>%
mutate(date = as.Date(Admission)) %>%
group_by(date) %>%
summarise(total_adm = n(),
deaths = sum(Summary == "Dead"))
summaries
#> # A tibble: 129 x 3
#> date total_adm deaths
#> * <date> <int> <int>
#> 1 2020-01-09 1 0
#> 2 2020-01-27 1 0
#> 3 2020-01-31 2 0
#> 4 2020-02-13 1 0
#> 5 2020-02-21 1 1
#> 6 2020-02-22 1 0
#> 7 2020-02-23 1 0
#> 8 2020-02-24 1 0
#> 9 2020-02-28 1 1
#> 10 2020-03-01 1 1
#> # … with 119 more rows
计算滚动总和和死亡率
对于日期,我先创建一个序列,然后 left_join()
。在 rollsum()
中使用 align = "right"
意味着你得到前六天。
# sequence fro min to max date
date_seq <- seq.Date(from = min(summaries$date),
to = max(summaries$date),
by = 1)
tibble(date = date_seq) %>%
left_join(summaries) %>%
# Calculating rolling sums for admission and deaths, and mortality rate
mutate(
adm_7_days = zoo::rollsum(total_adm, k = 7, fill = NA, align = "right", na.rm = TRUE),
deaths_7_days = zoo::rollsum(deaths, k = 7, fill = NA, align = "right", na.rm = TRUE),
mortality_rate = deaths_7_days/adm_7_days
) %>%
print(n = 20)
#> Joining, by = "date"
#> # A tibble: 285 x 6
#> date total_adm deaths adm_7_days deaths_7_days mortality_rate
#> <date> <int> <int> <int> <int> <dbl>
#> 1 2020-01-09 1 0 NA NA NA
#> 2 2020-01-10 NA NA NA NA NA
#> 3 2020-01-11 NA NA NA NA NA
#> 4 2020-01-12 NA NA NA NA NA
#> 5 2020-01-13 NA NA NA NA NA
#> 6 2020-01-14 NA NA NA NA NA
#> 7 2020-01-15 NA NA 1 0 0
#> 8 2020-01-16 NA NA 0 0 NaN
#> 9 2020-01-17 NA NA 0 0 NaN
#> 10 2020-01-18 NA NA 0 0 NaN
#> 11 2020-01-19 NA NA 0 0 NaN
#> 12 2020-01-20 NA NA 0 0 NaN
#> 13 2020-01-21 NA NA 0 0 NaN
#> 14 2020-01-22 NA NA 0 0 NaN
#> 15 2020-01-23 NA NA 0 0 NaN
#> 16 2020-01-24 NA NA 0 0 NaN
#> 17 2020-01-25 NA NA 0 0 NaN
#> 18 2020-01-26 NA NA 0 0 NaN
#> 19 2020-01-27 1 0 1 0 0
#> 20 2020-01-28 NA NA 1 0 0
#> # … with 265 more rows
这里包runner
也有帮助
library(dplyr)
library(tidyr)
library(runner)
df %>%
group_by(date = as.Date(strptime(Admission, format = "%Y-%m-%d"))) %>%
pivot_wider(id_cols = date,
names_from = Summary,
values_from = Admission,
values_fn = length,
values_fill = 0) %>%
ungroup() %>%
complete(date = seq.Date(min(.$date), max(.$date), by = 'day'),
fill = list(Discharged = 0, Dead = 0)) %>%
mutate(Total = Discharged + Dead) %>%
mutate(mortality_rate = sum_run(Dead, k = 7)/sum_run(Total, k = 7))
# A tibble: 285 x 5
date Discharged Dead Total mortality_rate
<date> <dbl> <dbl> <dbl> <dbl>
1 2020-01-09 1 0 1 0
2 2020-01-10 0 0 0 0
3 2020-01-11 0 0 0 0
4 2020-01-12 0 0 0 0
5 2020-01-13 0 0 0 0
6 2020-01-14 0 0 0 0
7 2020-01-15 0 0 0 0
8 2020-01-16 0 0 0 NaN
9 2020-01-17 0 0 0 NaN
10 2020-01-18 0 0 0 NaN
# ... with 275 more rows
我有一个医院患者的数据集,包括他们的入院日期和结果(死亡或出院)。 对于每一天,我想计算死亡率的 7 天滚动平均值。具体来说,我希望代码能够 a) 添加没有患者入院的缺失日期(到目前为止,我一直手动完成 --> 是在此处创建第二个数据集的最佳选择,left_join()?) b) 对于每一天,确定在过去 7 天内入院的所有患者(因此过去 6 天加上手头的具体日期) c) 根据结果计算死亡率(=(死亡患者数/所有患者数)对于这 7 天内入院的患者window
示例:如果我查看 4 月 8 日,我想知道这一天和之前六天收治的患者有哪些。然后我想让代码意识到该样本中有 x 个死亡患者和 y 个出院患者,并根据 x/x+y.
计算死亡率之前我将患者分配到第 1 周、第 2 周、第 3 周等,并使用 dplyr 的 group_by() 和 summarize() 来计算每周的死亡率。现在,我必须做一个滚动平均数,但我不知道该怎么做。我使用 R 已经有一段时间了,但有时仍然觉得自己像个初学者 ://
这是一些数据:
structure(list(Summary = c("Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Dead", "Discharged", "Discharged",
"Discharged", "Dead", "Dead", "Dead", "Dead", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Dead", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Dead", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Dead", "Discharged",
"Dead", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Dead", "Discharged", "Discharged", "Dead", "Discharged",
"Discharged", "Discharged", "Dead", "Dead", "Discharged", "Discharged",
"Dead", "Discharged", "Dead", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Dead",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Dead", "Discharged",
"Dead", "Discharged", "Discharged", "Dead", "Discharged", "Discharged",
"Dead", "Dead", "Discharged", "Discharged", "Discharged", "Dead",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Dead", "Discharged", "Discharged", "Dead", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Dead", "Discharged",
"Dead", "Discharged", "Dead", "Dead", "Dead", "Discharged", "Discharged",
"Discharged", "Dead", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Dead", "Discharged", "Dead", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Dead", "Dead", "Discharged", "Discharged",
"Discharged", "Dead", "Discharged", "Dead", "Discharged", "Discharged",
"Dead", "Discharged", "Dead", "Discharged", "Discharged", "Dead",
"Discharged", "Discharged", "Discharged", "Dead", "Discharged",
"Dead", "Discharged", "Discharged", "Discharged", "Dead", "Discharged",
"Discharged", "Discharged", "Dead", "Discharged", "Dead", "Dead",
"Discharged", "Dead", "Dead", "Discharged", "Discharged", "Dead",
"Dead", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Dead", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Dead", "Discharged", "Dead", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Dead", "Discharged",
"Dead", "Discharged", "Dead", "Discharged", "Discharged", "Discharged",
"Dead", "Dead", "Dead", "Discharged", "Discharged", "Dead", "Dead",
"Discharged", "Discharged", "Discharged", "Dead", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Dead", "Dead", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Dead",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Dead", "Discharged",
"Discharged", "Dead", "Discharged", "Dead", "Discharged", "Dead",
"Dead", "Discharged", "Discharged", "Discharged", "Dead", "Dead",
"Discharged", "Dead", "Dead", "Discharged", "Dead", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Dead",
"Discharged", "Discharged", "Discharged", "Discharged", "Dead",
"Dead", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Dead", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Dead", "Discharged", "Discharged",
"Discharged", "Discharged", "Dead", "Dead", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Dead", "Discharged",
"Dead", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Dead",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Dead", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Dead",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Dead", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Dead", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Dead", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged", "Discharged",
"Discharged", "Discharged", "Dead", "Discharged", "Discharged",
"Discharged", "Discharged", "Discharged", "Discharged"), Admission = structure(c(1578586420.224,
1580147094.528, 1580467041.28, 1580482376.704, 1581576565.76,
1582319350.784, 1582387115.008, 1582472442.88, 1582575203.328,
1582861464.576, 1583030023.168, 1583160439.808, 1583170532.352,
1583178658.816, 1583234757.632, 1583471735.808, 1583610278.912,
1583645930.496, 1583763633.152, 1583833887.744, 1583926817.792,
1583934813.184, 1583949231.104, 1584037966.848, 1584093803.52,
1584104289.28, 1584196039.68, 1584211506.176, 1584287003.648,
1584290280.448, 1584340218.88, 1584449139.712, 1584453989.376,
1584522540.032, 1584586503.168, 1584596071.424, 1584615994.368,
1584620581.888, 1584627266.56, 1584636703.744, 1584643388.416,
1584646140.928, 1584688477.184, 1584701846.528, 1584715215.872,
1584737104.896, 1584792941.568, 1584803165.184, 1584817976.32,
1584874075.136, 1584892949.504, 1584953111.552, 1584959403.008,
1584962417.664, 1584966480.896, 1584990336, 1584991253.504, 1585006195.712,
1585032147.968, 1585044599.808, 1585052595.2, 1585057707.008,
1585059804.16, 1585066488.832, 1585080775.68, 1585082217.472,
1585105286.144, 1585108562.944, 1585133990.912, 1585140151.296,
1585142903.808, 1585142903.808, 1585150505.984, 1585161122.816,
1585168331.776, 1585171084.288, 1585219843.072, 1585237406.72,
1585245533.184, 1585254315.008, 1585267553.28, 1585269781.504,
1585279742.976, 1585301107.712, 1585376736.256, 1585391285.248,
1585393120.256, 1585396134.912, 1585397445.632, 1585399542.784,
1585412387.84, 1585413436.416, 1585422349.312, 1585427592.192,
1585432966.144, 1585435456.512, 1585464292.352, 1585485394.944,
1585505711.104, 1585509512.192, 1585516065.792, 1585518162.944,
1585524061.184, 1585567183.872, 1585567314.944, 1585573999.616,
1585578456.064, 1585611355.136, 1585615811.584, 1585641763.84,
1585643860.992, 1585646482.432, 1585660507.136, 1585684886.528,
1585686983.68, 1585689605.12, 1585729844.224, 1585737446.4, 1585740461.056,
1585748718.592, 1585751995.392, 1585776374.784, 1585778603.008,
1585780962.304, 1585788302.336, 1585819759.616, 1585836667.904,
1585852920.832, 1585863144.448, 1585873761.28, 1585930122.24,
1585931301.888, 1585942836.224, 1585956336.64, 1585997231.104,
1586003522.56, 1586007061.504, 1586008503.296, 1586018989.056,
1586020561.92, 1586092913.664, 1586094486.528, 1586094879.744,
1586095928.32, 1586099598.336, 1586107200.512, 1586107855.872,
1586109821.952, 1586109821.952, 1586121225.216, 1586162643.968,
1586171032.576, 1586182042.624, 1586182304.768, 1586185188.352,
1586193052.672, 1586198819.84, 1586205897.728, 1586207732.736,
1586264486.912, 1586268812.288, 1586273399.808, 1586273924.096,
1586274579.456, 1586275234.816, 1586281264.128, 1586288342.016,
1586290308.096, 1586293715.968, 1586337100.8, 1586339460.096,
1586343392.256, 1586366854.144, 1586370393.088, 1586376422.4,
1586379043.84, 1586379043.84, 1586384024.576, 1586393986.048,
1586426229.76, 1586433438.72, 1586452050.944, 1586465944.576,
1586473808.896, 1586513523.712, 1586517980.16, 1586523091.968,
1586527417.344, 1586531742.72, 1586542359.552, 1586544718.848,
1586555073.536, 1586595443.712, 1586597671.936, 1586609337.344,
1586614973.44, 1586617594.88, 1586630702.08, 1586636993.536,
1586670941.184, 1586681558.016, 1586685883.392, 1586703709.184,
1586711966.72, 1586722976.768, 1586776978.432, 1586779993.088,
1586886423.552, 1586891797.504, 1586897302.528, 1586924696.576,
1586924958.72, 1586937672.704, 1586973062.144, 1586982892.544,
1586985251.84, 1586988397.568, 1587024180.224, 1587024573.44,
1587046855.68, 1587075167.232, 1587135198.208, 1587137164.288,
1587138081.792, 1587142538.24, 1587146994.688, 1587169276.928,
1587203879.936, 1587232060.416, 1587255129.088, 1587257619.456,
1587266925.568, 1587277149.184, 1587283309.568, 1587305722.88,
1587308475.392, 1587330757.632, 1587377419.264, 1587379385.344,
1587383448.576, 1587397080.064, 1587409925.12, 1587413988.352,
1587425391.616, 1587442168.832, 1587475198.976, 1587475723.264,
1587497874.432, 1587510064.128, 1587542963.2, 1587559740.416,
1587577959.424, 1587582022.656, 1587590149.12, 1587596047.36,
1587634844.672, 1587657257.984, 1587672593.408, 1587711783.936,
1587761460.224, 1587766572.032, 1587788723.2, 1587831321.6, 1587844297.728,
1587864876.032, 1587896202.24, 1587915207.68, 1587916387.328,
1587919401.984, 1587925824.512, 1588001977.344, 1588007613.44,
1588010234.88, 1588014560.256, 1588028584.96, 1588058076.16,
1588084814.848, 1588089664.512, 1588105262.08, 1588155986.944,
1588159525.888, 1588177220.608, 1588243543.04, 1588252455.936,
1588274344.96, 1588274476.032, 1588275000.32, 1588275524.608,
1588291384.32, 1588348138.496, 1588393620.48, 1588433204.224,
1588433728.512, 1588447491.072, 1588448932.864, 1588461122.56,
1588509357.056, 1588511978.496, 1588518663.168, 1588519711.744,
1588596651.008, 1588601631.744, 1588608971.776, 1588611462.144,
1588625093.632, 1588627846.144, 1588696134.656, 1588699411.456,
1588700591.104, 1588777006.08, 1588777923.584, 1588791686.144,
1588792865.792, 1588797060.096, 1588855649.28, 1588868363.264,
1588874392.576, 1588875047.936, 1588882387.968, 1588932588.544,
1589023552.512, 1589039936.512, 1589078733.824, 1589081355.264,
1589126444.032, 1589214524.416, 1589230384.128, 1589265511.424,
1589275866.112, 1589290808.32, 1589292774.4, 1589314139.136,
1589404054.528, 1589473653.76, 1589476275.2, 1589492397.056,
1589495280.64, 1589561209.856, 1589603152.896, 1589627270.144,
1589632381.952, 1589799236.608, 1589806707.712, 1590004626.432,
1590227448.832, 1590234133.504, 1590398628.864, 1590489068.544,
1590494180.352, 1590555915.264, 1590707434.496, 1590771266.56,
1590934451.2, 1591010997.248, 1591383897.088, 1591444452.352,
1591984468.992, 1592504038.4, 1592553714.688, 1592841679.872,
1592929629.184, 1592951256.064, 1594128937.984, 1594499347.456,
1595402171.392, 1595711370.24, 1597937103.872, 1598717768.704,
1599060521.984, 1599758087.168, 1599815496.704, 1600702198.784,
1600719631.36, 1601065923.584, 1601119400.96, 1601215476.736,
1601236710.4, 1601416934.4, 1601499640.832, 1601587328, 1601741206.528,
1601848423.424, 1601901245.44, 1601913828.352, 1602092872.704,
1602285417.472, 1602362881.024, 1602504963.072, 1602518987.776,
1602551231.488, 1602782311.424, 1602783491.072, 1602785457.152,
1602851124.224, 1602856629.248, 1602964501.504, 1602974594.048,
1603078140.928), class = c("POSIXct", "POSIXt"), tzone = "UTC")), row.names = c(NA,
-398L), class = c("tbl_df", "tbl", "data.frame"))
这是一个data.table
方法。代码的解释在代码的注释中
library( data.table )
#set data to data.table format
setDT( mydata )
#summarise by day
DT <- mydata[, .(total = .N), by = .(date = as.IDate( Admission ), Summary ) ]
#cast to wide
DT <- dcast(DT, date ~ Summary, value.var = "total", fill = 0 )
#create final based on min/max data in DT
final <- data.table( date = seq( min( DT$date ), max( DT$date ), by = 1 ) )
#initialise columns with zero-value
final[, `:=`( Dead = 0, Discharged = 0 ) ]
#update column values based on date
final[ DT, `:=`( Dead = i.Dead, Discharged = i.Discharged ), on = .(date) ][]
#calculate rolling 7 day mortality-rate
final[, mortality.rate := frollsum( Dead, n = 7 ) / ( frollsum( (Dead + Discharged), n = 7 ) ) ]
这里有一个 tidyverse 方法。我也在使用 zoo
包。
正在计算每天的入院人数和死亡人数
library(tidyverse)
summaries <- df %>%
mutate(date = as.Date(Admission)) %>%
group_by(date) %>%
summarise(total_adm = n(),
deaths = sum(Summary == "Dead"))
summaries
#> # A tibble: 129 x 3
#> date total_adm deaths
#> * <date> <int> <int>
#> 1 2020-01-09 1 0
#> 2 2020-01-27 1 0
#> 3 2020-01-31 2 0
#> 4 2020-02-13 1 0
#> 5 2020-02-21 1 1
#> 6 2020-02-22 1 0
#> 7 2020-02-23 1 0
#> 8 2020-02-24 1 0
#> 9 2020-02-28 1 1
#> 10 2020-03-01 1 1
#> # … with 119 more rows
计算滚动总和和死亡率
对于日期,我先创建一个序列,然后 left_join()
。在 rollsum()
中使用 align = "right"
意味着你得到前六天。
# sequence fro min to max date
date_seq <- seq.Date(from = min(summaries$date),
to = max(summaries$date),
by = 1)
tibble(date = date_seq) %>%
left_join(summaries) %>%
# Calculating rolling sums for admission and deaths, and mortality rate
mutate(
adm_7_days = zoo::rollsum(total_adm, k = 7, fill = NA, align = "right", na.rm = TRUE),
deaths_7_days = zoo::rollsum(deaths, k = 7, fill = NA, align = "right", na.rm = TRUE),
mortality_rate = deaths_7_days/adm_7_days
) %>%
print(n = 20)
#> Joining, by = "date"
#> # A tibble: 285 x 6
#> date total_adm deaths adm_7_days deaths_7_days mortality_rate
#> <date> <int> <int> <int> <int> <dbl>
#> 1 2020-01-09 1 0 NA NA NA
#> 2 2020-01-10 NA NA NA NA NA
#> 3 2020-01-11 NA NA NA NA NA
#> 4 2020-01-12 NA NA NA NA NA
#> 5 2020-01-13 NA NA NA NA NA
#> 6 2020-01-14 NA NA NA NA NA
#> 7 2020-01-15 NA NA 1 0 0
#> 8 2020-01-16 NA NA 0 0 NaN
#> 9 2020-01-17 NA NA 0 0 NaN
#> 10 2020-01-18 NA NA 0 0 NaN
#> 11 2020-01-19 NA NA 0 0 NaN
#> 12 2020-01-20 NA NA 0 0 NaN
#> 13 2020-01-21 NA NA 0 0 NaN
#> 14 2020-01-22 NA NA 0 0 NaN
#> 15 2020-01-23 NA NA 0 0 NaN
#> 16 2020-01-24 NA NA 0 0 NaN
#> 17 2020-01-25 NA NA 0 0 NaN
#> 18 2020-01-26 NA NA 0 0 NaN
#> 19 2020-01-27 1 0 1 0 0
#> 20 2020-01-28 NA NA 1 0 0
#> # … with 265 more rows
这里包runner
也有帮助
library(dplyr)
library(tidyr)
library(runner)
df %>%
group_by(date = as.Date(strptime(Admission, format = "%Y-%m-%d"))) %>%
pivot_wider(id_cols = date,
names_from = Summary,
values_from = Admission,
values_fn = length,
values_fill = 0) %>%
ungroup() %>%
complete(date = seq.Date(min(.$date), max(.$date), by = 'day'),
fill = list(Discharged = 0, Dead = 0)) %>%
mutate(Total = Discharged + Dead) %>%
mutate(mortality_rate = sum_run(Dead, k = 7)/sum_run(Total, k = 7))
# A tibble: 285 x 5
date Discharged Dead Total mortality_rate
<date> <dbl> <dbl> <dbl> <dbl>
1 2020-01-09 1 0 1 0
2 2020-01-10 0 0 0 0
3 2020-01-11 0 0 0 0
4 2020-01-12 0 0 0 0
5 2020-01-13 0 0 0 0
6 2020-01-14 0 0 0 0
7 2020-01-15 0 0 0 0
8 2020-01-16 0 0 0 NaN
9 2020-01-17 0 0 0 NaN
10 2020-01-18 0 0 0 NaN
# ... with 275 more rows