使用 rollapply 计算每周的百分比变化
Using rollapply to compute a week on week percentage change
我有一些数据看起来像:
date col1 col2 col3
<chr> <dbl> <dbl> <dbl>
1 2020_09_01 53542. 22133. 25295.
2 2020_09_02 54157. 22505. 25327.
3 2020_09_03 54137. 23115. 24993.
4 2020_09_04 50795. 23127. 24166.
5 2020_09_05 32829. 19600. 21860.
我正在尝试使用 rollapply
函数来计算 7 天的百分比变化 - 或每周的百分比变化。我似乎无法让 rollapply 像我预期的那样工作。 rollapply
将每天计算与前一周相比的百分比变化。
lagPeriod = 7
matrixCalcFunction <- function(x){
(myData[[x]] - myData[[x - lagPeriod]]) / myData[[x - lagPeriod]]
}
myData %>%
pivot_longer(cols = contains("col")) %>%
tidyquant::tq_mutate(
select = value,
mutate_fun = rollapply,
width = lagPeriod ,
align = "right",
FUN = matrixCalcFunction
)
预期输出:
date col1 col2 col3
<chr> <dbl> <dbl> <dbl>
1 2020_09_01 NA NA NA
2 2020_09_02 NA NA NA
3 2020_09_03 NA NA NA
4 2020_09_04 NA NA NA
5 2020_09_05 NA NA NA
6 2020_09_06 NA NA NA
7 2020_09_07 -0.065 -0.055 -0.39
8 2020_09_08 -0.058 -0.029 -0.041
9 2020_09_09 0.068 0.071 0.039
10 2020_09_10 0.023 -0.0002 0.045
数据:
myData <- structure(list(date = c("2020_09_01", "2020_09_02", "2020_09_03",
"2020_09_04", "2020_09_05", "2020_09_06", "2020_09_07", "2020_09_08",
"2020_09_09", "2020_09_10", "2020_09_11", "2020_09_12", "2020_09_13",
"2020_09_14", "2020_09_15", "2020_09_16", "2020_09_17", "2020_09_18",
"2020_09_19", "2020_09_20", "2020_09_21", "2020_09_22", "2020_09_23",
"2020_09_24", "2020_09_25", "2020_09_26", "2020_09_27", "2020_09_28",
"2020_09_29", "2020_09_30"), col1 = c(53542.497, 54156.934, 54136.844,
50794.971, 32828.797, 28475.082, 50083.573, 51017.288, 57819.908,
51945.242, 27823.172, 34349.466, 28527.527, 54845.664, 56531.057,
56556.415, 55396.121, 54303.732, 37513.441, 30041.867, 52397.815,
55449.939, 56203.125, 53654.182, 53289.437, 38511.761, 28046.879,
52132.573, 56055.611, 55520.683), col2 = c(22133.29, 22504.958,
23115.242, 23126.773, 19599.718, 16752.282, 20920.38, 21844.255,
24763.05, 23121.879, 17430.447, 20110.582, 18795.882, 24027.224,
24890.61, 24408.889, 24363.402, 24582.204, 20146.731, 18376.923,
23063.298, 24221.946, 25228.194, 24658.424, 23333.315, 20066.397,
17504.372, 23561.362, 23456.284, 24101.302), col3 = c(25294.573,
25326.797, 24992.764, 24166.084, 21859.885, 17549.005, 24306.496,
24269.409, 25968.326, 25253.976, 17974.404, 22636.375, 20105.166,
27000.274, 26291.22, 27277.371, 26851.75, 26133.317, 24055.107,
19515.875, 25573.014, 31957.279, 28961.316, 26896.495, 26440.726,
22941.927, 19990.825, 26595.878, 27725.468, 25965.802)), row.names = c(NA,
-30L), class = c("tbl_df", "tbl", "data.frame"))
编辑:
此代码运行但我对 diff(.x))/lag(.x, 7)
有点困惑并且不确定它是否按我想要的方式运行,因为我得到的结果与预期输出不同。
myData %>%
column_to_rownames("date") %>%
mutate(across(everything(), ~ round(c(NA, diff(.x))/lag(.x, 7), 5),
names = "{col}_delta"))
对于单个观察(col1
第 1 行和第 7 行),我可以使用类似 diff(c(pull(myData[7, 2]), pull(myData[1, 2]))) = 3458.924
的东西,然后我可以将其划分为:3458.924 / pull(myData[1, 2]) = 0.0646
。因此,将 diff(c(.x, lag(.x, 7)))
之类的内容添加到 diff
函数中会得到结果。
将其转换为动物园对象,然后使用 diff
给出一个动物园对象。它可以使用 fortify.zoo(x)
转换回数据框,其中 x 是 diff
的结果。或者将其保留为动物园对象,以便您可以使用动物园的其他设施。
library(zoo)
z <- read.zoo(myData, format = "%Y_%m_%d")
diff(z, 6, arith = FALSE, na.pad = TRUE) - 1
要使用 rollapply 而不是最后一行,请使用:
rollapplyr(z, 7, function(x) x[7] / x[1] - 1, fill = NA)
或使用lag.zoo:
z / lag(z, -6, na.pad = TRUE) - 1
请注意,dplyr 会破坏 lag
,因此请确保它未加载,否则如果您需要它,请使用 library(dplyr, exclude = c("filter", "lag"))
加载它。
关于编辑试试这个:
library(dplyr, exclude = c("lag", "filter"))
myData %>% mutate(across(-1, ~ . / dplyr::lag(., 6) - 1))
我有一些数据看起来像:
date col1 col2 col3
<chr> <dbl> <dbl> <dbl>
1 2020_09_01 53542. 22133. 25295.
2 2020_09_02 54157. 22505. 25327.
3 2020_09_03 54137. 23115. 24993.
4 2020_09_04 50795. 23127. 24166.
5 2020_09_05 32829. 19600. 21860.
我正在尝试使用 rollapply
函数来计算 7 天的百分比变化 - 或每周的百分比变化。我似乎无法让 rollapply 像我预期的那样工作。 rollapply
将每天计算与前一周相比的百分比变化。
lagPeriod = 7
matrixCalcFunction <- function(x){
(myData[[x]] - myData[[x - lagPeriod]]) / myData[[x - lagPeriod]]
}
myData %>%
pivot_longer(cols = contains("col")) %>%
tidyquant::tq_mutate(
select = value,
mutate_fun = rollapply,
width = lagPeriod ,
align = "right",
FUN = matrixCalcFunction
)
预期输出:
date col1 col2 col3
<chr> <dbl> <dbl> <dbl>
1 2020_09_01 NA NA NA
2 2020_09_02 NA NA NA
3 2020_09_03 NA NA NA
4 2020_09_04 NA NA NA
5 2020_09_05 NA NA NA
6 2020_09_06 NA NA NA
7 2020_09_07 -0.065 -0.055 -0.39
8 2020_09_08 -0.058 -0.029 -0.041
9 2020_09_09 0.068 0.071 0.039
10 2020_09_10 0.023 -0.0002 0.045
数据:
myData <- structure(list(date = c("2020_09_01", "2020_09_02", "2020_09_03",
"2020_09_04", "2020_09_05", "2020_09_06", "2020_09_07", "2020_09_08",
"2020_09_09", "2020_09_10", "2020_09_11", "2020_09_12", "2020_09_13",
"2020_09_14", "2020_09_15", "2020_09_16", "2020_09_17", "2020_09_18",
"2020_09_19", "2020_09_20", "2020_09_21", "2020_09_22", "2020_09_23",
"2020_09_24", "2020_09_25", "2020_09_26", "2020_09_27", "2020_09_28",
"2020_09_29", "2020_09_30"), col1 = c(53542.497, 54156.934, 54136.844,
50794.971, 32828.797, 28475.082, 50083.573, 51017.288, 57819.908,
51945.242, 27823.172, 34349.466, 28527.527, 54845.664, 56531.057,
56556.415, 55396.121, 54303.732, 37513.441, 30041.867, 52397.815,
55449.939, 56203.125, 53654.182, 53289.437, 38511.761, 28046.879,
52132.573, 56055.611, 55520.683), col2 = c(22133.29, 22504.958,
23115.242, 23126.773, 19599.718, 16752.282, 20920.38, 21844.255,
24763.05, 23121.879, 17430.447, 20110.582, 18795.882, 24027.224,
24890.61, 24408.889, 24363.402, 24582.204, 20146.731, 18376.923,
23063.298, 24221.946, 25228.194, 24658.424, 23333.315, 20066.397,
17504.372, 23561.362, 23456.284, 24101.302), col3 = c(25294.573,
25326.797, 24992.764, 24166.084, 21859.885, 17549.005, 24306.496,
24269.409, 25968.326, 25253.976, 17974.404, 22636.375, 20105.166,
27000.274, 26291.22, 27277.371, 26851.75, 26133.317, 24055.107,
19515.875, 25573.014, 31957.279, 28961.316, 26896.495, 26440.726,
22941.927, 19990.825, 26595.878, 27725.468, 25965.802)), row.names = c(NA,
-30L), class = c("tbl_df", "tbl", "data.frame"))
编辑:
此代码运行但我对 diff(.x))/lag(.x, 7)
有点困惑并且不确定它是否按我想要的方式运行,因为我得到的结果与预期输出不同。
myData %>%
column_to_rownames("date") %>%
mutate(across(everything(), ~ round(c(NA, diff(.x))/lag(.x, 7), 5),
names = "{col}_delta"))
对于单个观察(col1
第 1 行和第 7 行),我可以使用类似 diff(c(pull(myData[7, 2]), pull(myData[1, 2]))) = 3458.924
的东西,然后我可以将其划分为:3458.924 / pull(myData[1, 2]) = 0.0646
。因此,将 diff(c(.x, lag(.x, 7)))
之类的内容添加到 diff
函数中会得到结果。
将其转换为动物园对象,然后使用 diff
给出一个动物园对象。它可以使用 fortify.zoo(x)
转换回数据框,其中 x 是 diff
的结果。或者将其保留为动物园对象,以便您可以使用动物园的其他设施。
library(zoo)
z <- read.zoo(myData, format = "%Y_%m_%d")
diff(z, 6, arith = FALSE, na.pad = TRUE) - 1
要使用 rollapply 而不是最后一行,请使用:
rollapplyr(z, 7, function(x) x[7] / x[1] - 1, fill = NA)
或使用lag.zoo:
z / lag(z, -6, na.pad = TRUE) - 1
请注意,dplyr 会破坏 lag
,因此请确保它未加载,否则如果您需要它,请使用 library(dplyr, exclude = c("filter", "lag"))
加载它。
关于编辑试试这个:
library(dplyr, exclude = c("lag", "filter"))
myData %>% mutate(across(-1, ~ . / dplyr::lag(., 6) - 1))