使用倒数第二个值作为 .init 参数的 accumulate 函数
Using accumulate function with second to last value as .init argument
我最近遇到了一个有趣的问题,即使用倒数第二个值作为 .init
参数加上一个附加向量的当前值来计算向量值。这是示例数据集:
set.seed(13)
dt <- data.frame(id = rep(letters[1:2], each = 5), time = rep(1:5, 2), ret = rnorm(10)/100)
dt$ind <- if_else(dt$time == 1, 120, if_else(dt$time == 2, 125, as.numeric(NA)))
id time ret ind
1 a 1 0.005543269 120
2 a 2 -0.002802719 125
3 a 3 0.017751634 NA
4 a 4 0.001873201 NA
5 a 5 0.011425261 NA
6 b 1 0.004155261 120
7 b 2 0.012295066 125
8 b 3 0.002366797 NA
9 b 4 -0.003653828 NA
10 b 5 0.011051443 NA
我要计算的是:
ind_{t} = ind_{t-2}*(1+ret_{t})
我尝试了以下代码。由于 .init
在这里没有用我尝试使原始 .init
无效并创建一个虚拟 .init
但不幸的是它不会将新创建的值(从第三行向下)拖入计算:
dt %>%
group_by(id) %>%
mutate(ind = c(120, accumulate(3:n(), .init = 125,
~ .x * 1/.x * ind[.y - 2] * (1 + ret[.y]))))
# A tibble: 10 x 4
# Groups: id [2]
id time ret ind
<chr> <int> <dbl> <dbl>
1 a 1 0.00554 120
2 a 2 -0.00280 125
3 a 3 0.0178 122.
4 a 4 0.00187 125.
5 a 5 0.0114 NA
6 b 1 0.00416 120
7 b 2 0.0123 125
8 b 3 0.00237 120.
9 b 4 -0.00365 125.
10 b 5 0.0111 NA
我想知道是否可以对这段代码进行调整并使其完全运行。
非常感谢您的提前帮助
使用由 ind 的当前值和 ind 的先验值组成的状态向量。这样,先验状态包含 ind 的第二个先验值。我们将其编码为复数,实部等于 ind,虚部等于 ind 的先验值。最后我们拿实数。
library(dplyr)
library(purrr)
dt %>%
group_by(id) %>%
mutate(result = c(ind[1],
Re(accumulate(.x = tail(ret, -2),
.f = ~ Im(.x) * (1 + .y) + Re(.x) * 1i,
.init = ind[2] + ind[1] * 1i)))) %>%
ungroup
给予:
# A tibble: 10 x 5
id time ret ind result
<chr> <int> <dbl> <dbl> <dbl>
1 a 1 0.00554 120 120
2 a 2 -0.00280 125 125
3 a 3 0.0178 NA 122.
4 a 4 0.00187 NA 125.
5 a 5 0.0114 NA 124.
6 b 1 0.00416 120 120
7 b 2 0.0123 125 125
8 b 3 0.00237 NA 120.
9 b 4 -0.00365 NA 125.
10 b 5 0.0111 NA 122.
变化
此变体消除了复数,并使用包含 2 个元素的向量代替每个复数,其中第一个数字对应于先前解中的实部,每对的第二个数字对应于虚部。这可以扩展到每个状态需要超过 2 个数字并且相关性涉及所有最后 N 个值的情况,但对于这里的问题,存在额外代码行的缺点,即从对列表中提取结果比先前解决方案中使用 Re 涉及更多的数字。
dt %>%
group_by(id) %>%
mutate(result = c(ind[1],
accumulate(.x = tail(ret, -2),
.f = ~ c(.x[2] * (1 + .y), .x[1]),
.init = ind[2:1])),
result = map_dbl(result, first)) %>%
ungroup
检查
我们检查以上结果是否正确。或者,这可以用作直接的解决方案。
calc <- function(ind, ret) {
for(i in seq(3, length(ret))) ind[i] <- ind[i-2] * (1 + ret[i])
ind
}
dt %>%
group_by(id) %>%
mutate(result = calc(ind, ret)) %>%
ungroup
给予:
# A tibble: 10 x 5
id time ret ind result
<chr> <int> <dbl> <dbl> <dbl>
1 a 1 0.00554 120 120
2 a 2 -0.00280 125 125
3 a 3 0.0178 NA 122.
4 a 4 0.00187 NA 125.
5 a 5 0.0114 NA 124.
6 b 1 0.00416 120 120
7 b 2 0.0123 125 125
8 b 3 0.00237 NA 120.
9 b 4 -0.00365 NA 125.
10 b 5 0.0111 NA 122.
我会通过为每个序列创建虚拟组来完成它,这样它就可以为任意数量的 'N' 完成。在新的详细数据上展示它
df <- data.frame(
stringsAsFactors = FALSE,
grp = c("a","a","a","a",
"a","a","a","a","a","b","b","b","b","b",
"b","b","b","b"),
rate = c(0.082322056,
0.098491104,0.07294593,0.08741672,0.030179747,
0.061389031,0.011232314,0.08553277,0.091272669,
0.031577847,0.024039791,0.091719552,0.032540636,
0.020411727,0.094521716,0.081729178,0.066429708,
0.04985793),
ind = c(11000L,12000L,
13000L,NA,NA,NA,NA,NA,NA,10000L,13000L,12000L,
NA,NA,NA,NA,NA,NA)
)
df
#> grp rate ind
#> 1 a 0.08232206 11000
#> 2 a 0.09849110 12000
#> 3 a 0.07294593 13000
#> 4 a 0.08741672 NA
#> 5 a 0.03017975 NA
#> 6 a 0.06138903 NA
#> 7 a 0.01123231 NA
#> 8 a 0.08553277 NA
#> 9 a 0.09127267 NA
#> 10 b 0.03157785 10000
#> 11 b 0.02403979 13000
#> 12 b 0.09171955 12000
#> 13 b 0.03254064 NA
#> 14 b 0.02041173 NA
#> 15 b 0.09452172 NA
#> 16 b 0.08172918 NA
#> 17 b 0.06642971 NA
#> 18 b 0.04985793 NA
library(tidyverse)
N = 3
df %>% group_by(grp) %>%
group_by(d = row_number() %% N, .add = TRUE) %>%
mutate(ind = accumulate(rate[-1] + 1, .init = ind[1], ~ .x * .y))
#> # A tibble: 18 x 4
#> # Groups: grp, d [6]
#> grp rate ind d
#> <chr> <dbl> <dbl> <dbl>
#> 1 a 0.0823 11000 1
#> 2 a 0.0985 12000 2
#> 3 a 0.0729 13000 0
#> 4 a 0.0874 11962. 1
#> 5 a 0.0302 12362. 2
#> 6 a 0.0614 13798. 0
#> 7 a 0.0112 12096. 1
#> 8 a 0.0855 13420. 2
#> 9 a 0.0913 15057. 0
#> 10 b 0.0316 10000 1
#> 11 b 0.0240 13000 2
#> 12 b 0.0917 12000 0
#> 13 b 0.0325 10325. 1
#> 14 b 0.0204 13265. 2
#> 15 b 0.0945 13134. 0
#> 16 b 0.0817 11169. 1
#> 17 b 0.0664 14147. 2
#> 18 b 0.0499 13789. 0
备选答案 dplyr
(仅使用您自己的数据稍加修改)
set.seed(13)
dt <- data.frame(id = rep(letters[1:2], each = 5), time = rep(1:5, 2), ret = rnorm(10)/100)
dt$ind <- ifelse(dt$time == 1, 12000, ifelse(dt$time == 2, 12500, as.numeric(NA)))
library(dplyr, warn.conflicts = F)
dt %>% group_by(id) %>%
group_by(d= row_number() %% 2, .add = TRUE) %>%
mutate(ind = cumprod(1 + duplicated(id) * ret)* ind[1])
#> # A tibble: 10 x 5
#> # Groups: id, d [4]
#> id time ret ind d
#> <chr> <int> <dbl> <dbl> <dbl>
#> 1 a 1 0.00554 12000 1
#> 2 a 2 -0.00280 12500 0
#> 3 a 3 0.0178 12213. 1
#> 4 a 4 0.00187 12523. 0
#> 5 a 5 0.0114 12353. 1
#> 6 b 1 0.00416 12000 0
#> 7 b 2 0.0123 12500 1
#> 8 b 3 0.00237 12028. 0
#> 9 b 4 -0.00365 12454. 1
#> 10 b 5 0.0111 12161. 0
我最近遇到了一个有趣的问题,即使用倒数第二个值作为 .init
参数加上一个附加向量的当前值来计算向量值。这是示例数据集:
set.seed(13)
dt <- data.frame(id = rep(letters[1:2], each = 5), time = rep(1:5, 2), ret = rnorm(10)/100)
dt$ind <- if_else(dt$time == 1, 120, if_else(dt$time == 2, 125, as.numeric(NA)))
id time ret ind
1 a 1 0.005543269 120
2 a 2 -0.002802719 125
3 a 3 0.017751634 NA
4 a 4 0.001873201 NA
5 a 5 0.011425261 NA
6 b 1 0.004155261 120
7 b 2 0.012295066 125
8 b 3 0.002366797 NA
9 b 4 -0.003653828 NA
10 b 5 0.011051443 NA
我要计算的是:
ind_{t} = ind_{t-2}*(1+ret_{t})
我尝试了以下代码。由于 .init
在这里没有用我尝试使原始 .init
无效并创建一个虚拟 .init
但不幸的是它不会将新创建的值(从第三行向下)拖入计算:
dt %>%
group_by(id) %>%
mutate(ind = c(120, accumulate(3:n(), .init = 125,
~ .x * 1/.x * ind[.y - 2] * (1 + ret[.y]))))
# A tibble: 10 x 4
# Groups: id [2]
id time ret ind
<chr> <int> <dbl> <dbl>
1 a 1 0.00554 120
2 a 2 -0.00280 125
3 a 3 0.0178 122.
4 a 4 0.00187 125.
5 a 5 0.0114 NA
6 b 1 0.00416 120
7 b 2 0.0123 125
8 b 3 0.00237 120.
9 b 4 -0.00365 125.
10 b 5 0.0111 NA
我想知道是否可以对这段代码进行调整并使其完全运行。 非常感谢您的提前帮助
使用由 ind 的当前值和 ind 的先验值组成的状态向量。这样,先验状态包含 ind 的第二个先验值。我们将其编码为复数,实部等于 ind,虚部等于 ind 的先验值。最后我们拿实数。
library(dplyr)
library(purrr)
dt %>%
group_by(id) %>%
mutate(result = c(ind[1],
Re(accumulate(.x = tail(ret, -2),
.f = ~ Im(.x) * (1 + .y) + Re(.x) * 1i,
.init = ind[2] + ind[1] * 1i)))) %>%
ungroup
给予:
# A tibble: 10 x 5
id time ret ind result
<chr> <int> <dbl> <dbl> <dbl>
1 a 1 0.00554 120 120
2 a 2 -0.00280 125 125
3 a 3 0.0178 NA 122.
4 a 4 0.00187 NA 125.
5 a 5 0.0114 NA 124.
6 b 1 0.00416 120 120
7 b 2 0.0123 125 125
8 b 3 0.00237 NA 120.
9 b 4 -0.00365 NA 125.
10 b 5 0.0111 NA 122.
变化
此变体消除了复数,并使用包含 2 个元素的向量代替每个复数,其中第一个数字对应于先前解中的实部,每对的第二个数字对应于虚部。这可以扩展到每个状态需要超过 2 个数字并且相关性涉及所有最后 N 个值的情况,但对于这里的问题,存在额外代码行的缺点,即从对列表中提取结果比先前解决方案中使用 Re 涉及更多的数字。
dt %>%
group_by(id) %>%
mutate(result = c(ind[1],
accumulate(.x = tail(ret, -2),
.f = ~ c(.x[2] * (1 + .y), .x[1]),
.init = ind[2:1])),
result = map_dbl(result, first)) %>%
ungroup
检查
我们检查以上结果是否正确。或者,这可以用作直接的解决方案。
calc <- function(ind, ret) {
for(i in seq(3, length(ret))) ind[i] <- ind[i-2] * (1 + ret[i])
ind
}
dt %>%
group_by(id) %>%
mutate(result = calc(ind, ret)) %>%
ungroup
给予:
# A tibble: 10 x 5
id time ret ind result
<chr> <int> <dbl> <dbl> <dbl>
1 a 1 0.00554 120 120
2 a 2 -0.00280 125 125
3 a 3 0.0178 NA 122.
4 a 4 0.00187 NA 125.
5 a 5 0.0114 NA 124.
6 b 1 0.00416 120 120
7 b 2 0.0123 125 125
8 b 3 0.00237 NA 120.
9 b 4 -0.00365 NA 125.
10 b 5 0.0111 NA 122.
我会通过为每个序列创建虚拟组来完成它,这样它就可以为任意数量的 'N' 完成。在新的详细数据上展示它
df <- data.frame(
stringsAsFactors = FALSE,
grp = c("a","a","a","a",
"a","a","a","a","a","b","b","b","b","b",
"b","b","b","b"),
rate = c(0.082322056,
0.098491104,0.07294593,0.08741672,0.030179747,
0.061389031,0.011232314,0.08553277,0.091272669,
0.031577847,0.024039791,0.091719552,0.032540636,
0.020411727,0.094521716,0.081729178,0.066429708,
0.04985793),
ind = c(11000L,12000L,
13000L,NA,NA,NA,NA,NA,NA,10000L,13000L,12000L,
NA,NA,NA,NA,NA,NA)
)
df
#> grp rate ind
#> 1 a 0.08232206 11000
#> 2 a 0.09849110 12000
#> 3 a 0.07294593 13000
#> 4 a 0.08741672 NA
#> 5 a 0.03017975 NA
#> 6 a 0.06138903 NA
#> 7 a 0.01123231 NA
#> 8 a 0.08553277 NA
#> 9 a 0.09127267 NA
#> 10 b 0.03157785 10000
#> 11 b 0.02403979 13000
#> 12 b 0.09171955 12000
#> 13 b 0.03254064 NA
#> 14 b 0.02041173 NA
#> 15 b 0.09452172 NA
#> 16 b 0.08172918 NA
#> 17 b 0.06642971 NA
#> 18 b 0.04985793 NA
library(tidyverse)
N = 3
df %>% group_by(grp) %>%
group_by(d = row_number() %% N, .add = TRUE) %>%
mutate(ind = accumulate(rate[-1] + 1, .init = ind[1], ~ .x * .y))
#> # A tibble: 18 x 4
#> # Groups: grp, d [6]
#> grp rate ind d
#> <chr> <dbl> <dbl> <dbl>
#> 1 a 0.0823 11000 1
#> 2 a 0.0985 12000 2
#> 3 a 0.0729 13000 0
#> 4 a 0.0874 11962. 1
#> 5 a 0.0302 12362. 2
#> 6 a 0.0614 13798. 0
#> 7 a 0.0112 12096. 1
#> 8 a 0.0855 13420. 2
#> 9 a 0.0913 15057. 0
#> 10 b 0.0316 10000 1
#> 11 b 0.0240 13000 2
#> 12 b 0.0917 12000 0
#> 13 b 0.0325 10325. 1
#> 14 b 0.0204 13265. 2
#> 15 b 0.0945 13134. 0
#> 16 b 0.0817 11169. 1
#> 17 b 0.0664 14147. 2
#> 18 b 0.0499 13789. 0
备选答案 dplyr
(仅使用您自己的数据稍加修改)
set.seed(13)
dt <- data.frame(id = rep(letters[1:2], each = 5), time = rep(1:5, 2), ret = rnorm(10)/100)
dt$ind <- ifelse(dt$time == 1, 12000, ifelse(dt$time == 2, 12500, as.numeric(NA)))
library(dplyr, warn.conflicts = F)
dt %>% group_by(id) %>%
group_by(d= row_number() %% 2, .add = TRUE) %>%
mutate(ind = cumprod(1 + duplicated(id) * ret)* ind[1])
#> # A tibble: 10 x 5
#> # Groups: id, d [4]
#> id time ret ind d
#> <chr> <int> <dbl> <dbl> <dbl>
#> 1 a 1 0.00554 12000 1
#> 2 a 2 -0.00280 12500 0
#> 3 a 3 0.0178 12213. 1
#> 4 a 4 0.00187 12523. 0
#> 5 a 5 0.0114 12353. 1
#> 6 b 1 0.00416 12000 0
#> 7 b 2 0.0123 12500 1
#> 8 b 3 0.00237 12028. 0
#> 9 b 4 -0.00365 12454. 1
#> 10 b 5 0.0111 12161. 0