(1-previous_record)*current_record 的累积乘积
Cumulative product of (1-previous_record)*current_record
数据框包含两个变量(time
和rate
)和10个观测值
time <- seq(1:10)
rate <- 1-(0.99^time)
dat <- data.frame(time, rate)
我需要添加一个新列(名为 new_rate
)。
new_rate
定义如下
注:new_rate_1
是第一个观察新的列new_rate,以此类推
new_rate_1 = rate_1
new_rate_2 = (1-rate_1)*rate_2
new_rate_3 = (1-rate_1)*(1-rate_2)*rate_3
new_rate_4 = (1-rate_1)*(1-rate_2)*(1-rate_3)*rate_4
...
new_rate_10 = (1-rate_1)*(1-rate_2)*(1-rate_3)*(1-rate_4)*(1-rate_5)*(1-rate_6)*(1-rate_7)*(1-rate_8)*(1-rate_9)*rate_10
如何在 base R 或 dplyr
中完成?
cumprod
来拯救(向 @Cole 致敬以简化代码):
dat$rate * c(1, cumprod(1 - head(dat$rate, -1)))
逻辑是,您实际上是在执行 1 - dat$rate
的 cum
计算式 prod
乘以当前步骤。
在第一步中,您可以只保留现有值,但随后您需要偏移两个向量,以便乘法得到所需的结果。
证明:
out <- c(
dat$rate[1],
(1-dat$rate[1])*dat$rate[2],
(1-dat$rate[1])*(1-dat$rate[2])*dat$rate[3],
(1-dat$rate[1])*(1-dat$rate[2])*(1-dat$rate[3])*dat$rate[4],
(1-dat$rate[1])*(1-dat$rate[2])*(1-dat$rate[3])*(1-dat$rate[4])*dat$rate[5],
(1-dat$rate[1])*(1-dat$rate[2])*(1-dat$rate[3])*(1-dat$rate[4])*(1-dat$rate[5])*dat$rate[6],
(1-dat$rate[1])*(1-dat$rate[2])*(1-dat$rate[3])*(1-dat$rate[4])*(1-dat$rate[5])*(1-dat$rate[6])*dat$rate[7],
(1-dat$rate[1])*(1-dat$rate[2])*(1-dat$rate[3])*(1-dat$rate[4])*(1-dat$rate[5])*(1-dat$rate[6])*(1-dat$rate[7])*dat$rate[8],
(1-dat$rate[1])*(1-dat$rate[2])*(1-dat$rate[3])*(1-dat$rate[4])*(1-dat$rate[5])*(1-dat$rate[6])*(1-dat$rate[7])*(1-dat$rate[8])*dat$rate[9],
(1-dat$rate[1])*(1-dat$rate[2])*(1-dat$rate[3])*(1-dat$rate[4])*(1-dat$rate[5])*(1-dat$rate[6])*(1-dat$rate[7])*(1-dat$rate[8])*(1-dat$rate[9])*dat$rate[10]
)
all.equal(
dat$rate * c(1, cumprod(1 - head(dat$rate, -1))),
out
)
#[1] TRUE
如果您仍然对如何使用 purrr::reduce
系列函数感兴趣。这里有两个解决方案:
- 在每次迭代中,如果将 accumulated/previous 值乘以(1/前一个速率值 - 1)*(当前速率值),您将在每一行中获得所需的输出
library(purrr)
accumulate2(dat$rate[-nrow(dat)], dat$rate[-1], .init = dat$rate[1],
~ ..1 * (1/..2 - 1) * ..3) %>%
simplify()
[1] 0.01000000 0.01970100 0.02881885 0.03709807 0.04432372 0.05033049 0.05500858 0.05830607
[9] 0.06022773 0.06083074
而且在 base R 中,我们可以执行以下操作:
Reduce(function(x, y) {
x * (1/dat$rate[y - 1] - 1) * dat$rate[y]
}, init = dat$rate[1],
seq_len(nrow(dat))[-1], accumulate = TRUE)
[1] 0.01000000 0.01970100 0.02881885 0.03709807 0.04432372 0.05033049 0.05500858 0.05830607
[9] 0.06022773 0.06083074
使用 cumprod
的简单数学方法应该有效
> c(1, head(cumprod(1 - rate), -1)) * rate
[1] 0.01000000 0.01970100 0.02881885 0.03709807 0.04432372 0.05033049
[7] 0.05500858 0.05830607 0.06022773 0.06083074
如果你想用递归练习,可以试试下面的方法
f <- function(v, k = length(v)) {
if (k == 1) {
return(v[k])
}
u <- f(v, k - 1)
c(u, tail(u, 1) * (1 / v[k - 1] - 1) * v[k])
}
这样
> f(rate)
[1] 0.01000000 0.01970100 0.02881885 0.03709807 0.04432372 0.05033049
[7] 0.05500858 0.05830607 0.06022773 0.06083074
数据框包含两个变量(time
和rate
)和10个观测值
time <- seq(1:10)
rate <- 1-(0.99^time)
dat <- data.frame(time, rate)
我需要添加一个新列(名为 new_rate
)。
new_rate
定义如下
注:new_rate_1
是第一个观察新的列new_rate,以此类推
new_rate_1 = rate_1
new_rate_2 = (1-rate_1)*rate_2
new_rate_3 = (1-rate_1)*(1-rate_2)*rate_3
new_rate_4 = (1-rate_1)*(1-rate_2)*(1-rate_3)*rate_4
...
new_rate_10 = (1-rate_1)*(1-rate_2)*(1-rate_3)*(1-rate_4)*(1-rate_5)*(1-rate_6)*(1-rate_7)*(1-rate_8)*(1-rate_9)*rate_10
如何在 base R 或 dplyr
中完成?
cumprod
来拯救(向 @Cole 致敬以简化代码):
dat$rate * c(1, cumprod(1 - head(dat$rate, -1)))
逻辑是,您实际上是在执行 1 - dat$rate
的 cum
计算式 prod
乘以当前步骤。
在第一步中,您可以只保留现有值,但随后您需要偏移两个向量,以便乘法得到所需的结果。
证明:
out <- c(
dat$rate[1],
(1-dat$rate[1])*dat$rate[2],
(1-dat$rate[1])*(1-dat$rate[2])*dat$rate[3],
(1-dat$rate[1])*(1-dat$rate[2])*(1-dat$rate[3])*dat$rate[4],
(1-dat$rate[1])*(1-dat$rate[2])*(1-dat$rate[3])*(1-dat$rate[4])*dat$rate[5],
(1-dat$rate[1])*(1-dat$rate[2])*(1-dat$rate[3])*(1-dat$rate[4])*(1-dat$rate[5])*dat$rate[6],
(1-dat$rate[1])*(1-dat$rate[2])*(1-dat$rate[3])*(1-dat$rate[4])*(1-dat$rate[5])*(1-dat$rate[6])*dat$rate[7],
(1-dat$rate[1])*(1-dat$rate[2])*(1-dat$rate[3])*(1-dat$rate[4])*(1-dat$rate[5])*(1-dat$rate[6])*(1-dat$rate[7])*dat$rate[8],
(1-dat$rate[1])*(1-dat$rate[2])*(1-dat$rate[3])*(1-dat$rate[4])*(1-dat$rate[5])*(1-dat$rate[6])*(1-dat$rate[7])*(1-dat$rate[8])*dat$rate[9],
(1-dat$rate[1])*(1-dat$rate[2])*(1-dat$rate[3])*(1-dat$rate[4])*(1-dat$rate[5])*(1-dat$rate[6])*(1-dat$rate[7])*(1-dat$rate[8])*(1-dat$rate[9])*dat$rate[10]
)
all.equal(
dat$rate * c(1, cumprod(1 - head(dat$rate, -1))),
out
)
#[1] TRUE
如果您仍然对如何使用 purrr::reduce
系列函数感兴趣。这里有两个解决方案:
- 在每次迭代中,如果将 accumulated/previous 值乘以(1/前一个速率值 - 1)*(当前速率值),您将在每一行中获得所需的输出
library(purrr)
accumulate2(dat$rate[-nrow(dat)], dat$rate[-1], .init = dat$rate[1],
~ ..1 * (1/..2 - 1) * ..3) %>%
simplify()
[1] 0.01000000 0.01970100 0.02881885 0.03709807 0.04432372 0.05033049 0.05500858 0.05830607
[9] 0.06022773 0.06083074
而且在 base R 中,我们可以执行以下操作:
Reduce(function(x, y) {
x * (1/dat$rate[y - 1] - 1) * dat$rate[y]
}, init = dat$rate[1],
seq_len(nrow(dat))[-1], accumulate = TRUE)
[1] 0.01000000 0.01970100 0.02881885 0.03709807 0.04432372 0.05033049 0.05500858 0.05830607
[9] 0.06022773 0.06083074
使用 cumprod
的简单数学方法应该有效
> c(1, head(cumprod(1 - rate), -1)) * rate
[1] 0.01000000 0.01970100 0.02881885 0.03709807 0.04432372 0.05033049
[7] 0.05500858 0.05830607 0.06022773 0.06083074
如果你想用递归练习,可以试试下面的方法
f <- function(v, k = length(v)) {
if (k == 1) {
return(v[k])
}
u <- f(v, k - 1)
c(u, tail(u, 1) * (1 / v[k - 1] - 1) * v[k])
}
这样
> f(rate)
[1] 0.01000000 0.01970100 0.02881885 0.03709807 0.04432372 0.05033049
[7] 0.05500858 0.05830607 0.06022773 0.06083074