如何在数据帧计算中用矢量函数替换 R 中的 for 循环?
How do I replace a for- loop in R with vector functions in dataframe calculations?
我一直在努力避免在 R 中使用 for 循环以加快计算和简化,尽可能地依赖向量函数。到目前为止,我已经成功了,直到 运行 进入某些摊销计算。我碰壁了,不得不求助于 for 循环,请参阅下面的 MWE 代码。它工作正常,但我想用 vector 或其他更高效的函数替换它。有人可以帮我用向量函数替换下面的内容吗?
在从中提取此 MWE 的完整代码中,它使用 Shiny 进行反应。周期和向量速率,实际上是所有变量,都会根据用户输入发生剧烈变化。 MWE 示例输入变量已简化。
无论如何,下面是一个非常笨拙的电锯方法,需要精简。但是我不知道如何从我最有经验的完整 XLS 思维方式来处理这个问题。如果 for 循环是此类计算的唯一可行选择,我欢迎任何改进以下 MWE 的建议。
最底部是尝试“矢量化”的有缺陷的代码,但当矢量变量随时间变化时结果不准确。我在底部的图像中展示了这种矢量化方法的一个问题,其中 ending/beginning 余额在从一个时期移动到下一个时期时不匹配(for-loop MWE 代码没有这些问题 - 它是功能齐全但超级笨拙)。
For循环MWE代码:
periods <- 10
beginBal <- 1000
yield_vector <- c(0.30,0.30,0.30,0.30,0.30,0.28,0.26,0.20,0.18,0.20)
npr_vector <- c(0.30,0.30,0.30,0.30,0.30,0.30,0.30,0.30,0.30,0.30)
mpr_vector <- c(0.20,0.20,0.20,0.20,0.20,0.20,0.20,0.20,0.20,0.20)
default_vector <- c(0.10,0.10,0.10,0.10,0.10,0.09,0.08,0.07,0.06,0.05)
amort <- data.frame(period=seq(1,periods,1),
beginBal=rep(NA,periods),
yield=rep(NA,periods,),
purchases=rep(NA,periods),
payments=rep(NA,periods),
defaults=rep(NA,periods),
endBal=rep(NA,periods))
# Completes first row of data frame
amort[1,2] <- beginBal
amort[1,3] <- beginBal * yield_vector[1]/12
amort[1,4] <- beginBal * npr_vector[1]
amort[1,5] <- beginBal * mpr_vector[1]
amort[1,6] <- beginBal * default_vector[1] / 12
amort[1,7] <- beginBal + amort[1,4] - amort[1,5] - amort[1,6]
# Completes remaining rows of data frame
for(i in 2:nrow(amort)){
amort[i,2] <- amort[i-1,7]
amort[i,3] <- amort[i,2] * yield_vector[i]/12
amort[i,4] <- amort[i,2] * npr_vector[i]
amort[i,5] <- amort[i,2] * mpr_vector[i]
amort[i,6] <- amort[i,2] * default_vector[i]/12
amort[i,7] <- amort[i,2] + amort[i,4] - amort[i,5] - amort[i,6]
}
amort
这是一个看起来很漂亮但有缺陷的矢量化尝试,请参见下图中的输出缺陷之一(这些问题不会出现在上面的 for 循环 MWE 中):
amort <- data.frame(period=seq(1,periods,1))
amort$beginBal <- beginBal*(1-(mpr_vector[]+default_vector[]/12-npr_vector[]))^(amort$period-1)
amort$yield <- amort$beginBal*yield_vector[]/12
amort$purchases <- amort$beginBal*npr_vector[]
amort$payments <- amort$beginBal*mpr_vector[]
amort$defaults <- amort$beginBal*default_vector[]/12
amort$endBal <- amort$beginBal+amort$purchases-amort$payments-amort$defaults
amort <- cbind(amort,yield_vector,npr_vector,mpr_vector,default_vector)
amort
你可以这样做:
f <- function(x, y){
x * (1 + npr_vector[y] - mpr_vector[y] - default_vector[y] / 12)
}
res <- Reduce(f, seq(periods), init = beginBal, accumulate = TRUE)
b <- head(res, -1)
result <- data.frame(period = seq(periods), beginBal = b, yield = b * yield_vector/ 12,
purchases = b * npr_vector, payments = b * mpr_vector,
defaults = b * default_vector/12, endBal = res[-1])
检查:
result
period beginBal yield purchases payments defaults endBal
1 1 1000.000 25.00000 300.0000 200.0000 8.333333 1091.667
2 2 1091.667 27.29167 327.5000 218.3333 9.097222 1191.736
3 3 1191.736 29.79340 357.5208 238.3472 9.931134 1300.979
4 4 1300.979 32.52446 390.2936 260.1957 10.841488 1420.235
5 5 1420.235 35.50587 426.0705 284.0470 11.835291 1550.423
6 6 1550.423 36.17654 465.1269 310.0846 11.628174 1693.837
7 7 1693.837 36.69981 508.1512 338.7675 11.292249 1851.929
8 8 1851.929 30.86548 555.5786 370.3858 10.802918 2026.319
9 9 2026.319 30.39478 607.8956 405.2637 10.131594 2218.819
10 10 2218.819 36.98032 665.6457 443.7638 9.245079 2431.456
all.equal(result, amort)
[1] TRUE
此解决方案也可用于 tidyverse
:
library(dplyr)
library(purrr)
data2 <- cbind(period, yield_vector, npr_vector, mpr_vector, default_vector)
data2 %>%
nest_by(period) %>%
ungroup() %>%
mutate(beginBal = accumulate(data[-1], .init = beginBal,
~ .x +
(.x * .y$npr_vector) -
(.x * .y$mpr_vector) -
(.x * .y$default_vector / 12))) %>%
unnest(data) %>%
mutate(yield = beginBal * yield_vector/12,
purchases = beginBal * npr_vector,
payments = beginBal * mpr_vector,
defaults = beginBal * default_vector / 12,
endBal = beginBal + purchases - payments - defaults) %>%
select(!contains("vector"))
输出
# A tibble: 10 x 7
period beginBal yield purchases payments defaults endBal
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1000 25 300 200 8.33 1092.
2 2 1092. 27.3 328. 218. 9.10 1192.
3 3 1192. 29.8 358. 238. 9.93 1301.
4 4 1301. 32.5 390. 260. 10.8 1420.
5 5 1420. 35.5 426. 284. 11.8 1550.
6 6 1552. 36.2 465. 310. 11.6 1695.
7 7 1696. 36.8 509. 339. 11.3 1855.
8 8 1856. 30.9 557. 371. 10.8 2031.
9 9 2033. 30.5 610. 407. 10.2 2226.
10 10 2227. 37.1 668. 445. 9.28 2441.
如果不在 baseR 中使用 Reduce
,我会这样做
解释-
- 对于每一行,您实际上通过将该行的
beginBal
乘以 1 + npr_vector - mpr_vector - default_vector/12
创建了一个 Endbal
- 所以我创建了一个 dummy/anonymous 向量,方法是将
1
附加到它的开头并获得它的累积积。喜欢cumprod(c(1, 1 + npr_vector - mpr_vector - default_vector/12)
- 此后使用
[-(periods + 1)]
剪裁了它的最后一个元素
- 然后乘以
beginBal
初始值。这将为每个 period
提供 beginBal
值
- 改变其余列非常简单。
- 如果您需要任何进一步的解释,请随时询问。
#given data
periods <- 10
beginBal <- 1000
yield_vector <- c(0.30,0.30,0.30,0.30,0.30,0.28,0.26,0.20,0.18,0.20)
npr_vector <- c(0.30,0.30,0.30,0.30,0.30,0.30,0.30,0.30,0.30,0.30)
mpr_vector <- c(0.20,0.20,0.20,0.20,0.20,0.20,0.20,0.20,0.20,0.20)
default_vector <- c(0.10,0.10,0.10,0.10,0.10,0.09,0.08,0.07,0.06,0.05)
amort <- data.frame(Period = seq(periods),
beginBal = beginBal * cumprod(c(1, 1 + npr_vector - mpr_vector - default_vector/12)[-(periods + 1)]))
amort <- transform(amort, Yeild = beginBal * yield_vector/12,
Purchases = beginBal * npr_vector,
Payments = beginBal * mpr_vector,
defaults = beginBal * default_vector/12,
EndBal = beginBal * (1 + npr_vector - mpr_vector - default_vector/12))
amort
#> Period beginBal Yeild Purchases Payments defaults EndBal
#> 1 1 1000.000 25.00000 300.0000 200.0000 8.333333 1091.667
#> 2 2 1091.667 27.29167 327.5000 218.3333 9.097222 1191.736
#> 3 3 1191.736 29.79340 357.5208 238.3472 9.931134 1300.979
#> 4 4 1300.979 32.52446 390.2936 260.1957 10.841488 1420.235
#> 5 5 1420.235 35.50587 426.0705 284.0470 11.835291 1550.423
#> 6 6 1550.423 36.17654 465.1269 310.0846 11.628174 1693.837
#> 7 7 1693.837 36.69981 508.1512 338.7675 11.292249 1851.929
#> 8 8 1851.929 30.86548 555.5786 370.3858 10.802918 2026.319
#> 9 9 2026.319 30.39478 607.8956 405.2637 10.131594 2218.819
#> 10 10 2218.819 36.98032 665.6457 443.7638 9.245079 2431.456
由 reprex package (v2.0.0)
于 2021-07-16 创建
在dplyr
中只有
library(dplyr, warn.conflicts = F)
#amortisation
seq(periods) %>%
as.data.frame() %>%
setNames('Period') %>%
mutate(beginBal = beginBal * cumprod(c(1, 1 + npr_vector - mpr_vector - default_vector/12)[-(periods + 1)]),
Yeild = beginBal * yield_vector/12,
Purchases = beginBal * npr_vector,
Payments = beginBal * mpr_vector,
defaults = beginBal * default_vector/12,
EndBal = beginBal * (1 + npr_vector - mpr_vector - default_vector/12))
#> Period beginBal Yeild Purchases Payments defaults EndBal
#> 1 1 1000.000 25.00000 300.0000 200.0000 8.333333 1091.667
#> 2 2 1091.667 27.29167 327.5000 218.3333 9.097222 1191.736
#> 3 3 1191.736 29.79340 357.5208 238.3472 9.931134 1300.979
#> 4 4 1300.979 32.52446 390.2936 260.1957 10.841488 1420.235
#> 5 5 1420.235 35.50587 426.0705 284.0470 11.835291 1550.423
#> 6 6 1550.423 36.17654 465.1269 310.0846 11.628174 1693.837
#> 7 7 1693.837 36.69981 508.1512 338.7675 11.292249 1851.929
#> 8 8 1851.929 30.86548 555.5786 370.3858 10.802918 2026.319
#> 9 9 2026.319 30.39478 607.8956 405.2637 10.131594 2218.819
#> 10 10 2218.819 36.98032 665.6457 443.7638 9.245079 2431.456
但是,在 purrr::accumulate
中,语法将是
library(tidyverse)
# amortisation
seq(periods) %>%
as.data.frame() %>%
setNames('Period') %>%
mutate(beginBal = accumulate(1 + npr_vector - mpr_vector - default_vector/12, .init = beginBal,
~ .x * .y)[-(n() + 1)],
Yeild = beginBal * yield_vector/12,
Purchases = beginBal * npr_vector,
Payments = beginBal * mpr_vector,
defaults = beginBal * default_vector/12,
EndBal = beginBal * (1 + npr_vector - mpr_vector - default_vector/12))
#> Period beginBal Yeild Purchases Payments defaults EndBal
#> 1 1 1000.000 25.00000 300.0000 200.0000 8.333333 1091.667
#> 2 2 1091.667 27.29167 327.5000 218.3333 9.097222 1191.736
#> 3 3 1191.736 29.79340 357.5208 238.3472 9.931134 1300.979
#> 4 4 1300.979 32.52446 390.2936 260.1957 10.841488 1420.235
#> 5 5 1420.235 35.50587 426.0705 284.0470 11.835291 1550.423
#> 6 6 1550.423 36.17654 465.1269 310.0846 11.628174 1693.837
#> 7 7 1693.837 36.69981 508.1512 338.7675 11.292249 1851.929
#> 8 8 1851.929 30.86548 555.5786 370.3858 10.802918 2026.319
#> 9 9 2026.319 30.39478 607.8956 405.2637 10.131594 2218.819
#> 10 10 2218.819 36.98032 665.6457 443.7638 9.245079 2431.456
我一直在努力避免在 R 中使用 for 循环以加快计算和简化,尽可能地依赖向量函数。到目前为止,我已经成功了,直到 运行 进入某些摊销计算。我碰壁了,不得不求助于 for 循环,请参阅下面的 MWE 代码。它工作正常,但我想用 vector 或其他更高效的函数替换它。有人可以帮我用向量函数替换下面的内容吗?
在从中提取此 MWE 的完整代码中,它使用 Shiny 进行反应。周期和向量速率,实际上是所有变量,都会根据用户输入发生剧烈变化。 MWE 示例输入变量已简化。
无论如何,下面是一个非常笨拙的电锯方法,需要精简。但是我不知道如何从我最有经验的完整 XLS 思维方式来处理这个问题。如果 for 循环是此类计算的唯一可行选择,我欢迎任何改进以下 MWE 的建议。
最底部是尝试“矢量化”的有缺陷的代码,但当矢量变量随时间变化时结果不准确。我在底部的图像中展示了这种矢量化方法的一个问题,其中 ending/beginning 余额在从一个时期移动到下一个时期时不匹配(for-loop MWE 代码没有这些问题 - 它是功能齐全但超级笨拙)。
For循环MWE代码:
periods <- 10
beginBal <- 1000
yield_vector <- c(0.30,0.30,0.30,0.30,0.30,0.28,0.26,0.20,0.18,0.20)
npr_vector <- c(0.30,0.30,0.30,0.30,0.30,0.30,0.30,0.30,0.30,0.30)
mpr_vector <- c(0.20,0.20,0.20,0.20,0.20,0.20,0.20,0.20,0.20,0.20)
default_vector <- c(0.10,0.10,0.10,0.10,0.10,0.09,0.08,0.07,0.06,0.05)
amort <- data.frame(period=seq(1,periods,1),
beginBal=rep(NA,periods),
yield=rep(NA,periods,),
purchases=rep(NA,periods),
payments=rep(NA,periods),
defaults=rep(NA,periods),
endBal=rep(NA,periods))
# Completes first row of data frame
amort[1,2] <- beginBal
amort[1,3] <- beginBal * yield_vector[1]/12
amort[1,4] <- beginBal * npr_vector[1]
amort[1,5] <- beginBal * mpr_vector[1]
amort[1,6] <- beginBal * default_vector[1] / 12
amort[1,7] <- beginBal + amort[1,4] - amort[1,5] - amort[1,6]
# Completes remaining rows of data frame
for(i in 2:nrow(amort)){
amort[i,2] <- amort[i-1,7]
amort[i,3] <- amort[i,2] * yield_vector[i]/12
amort[i,4] <- amort[i,2] * npr_vector[i]
amort[i,5] <- amort[i,2] * mpr_vector[i]
amort[i,6] <- amort[i,2] * default_vector[i]/12
amort[i,7] <- amort[i,2] + amort[i,4] - amort[i,5] - amort[i,6]
}
amort
这是一个看起来很漂亮但有缺陷的矢量化尝试,请参见下图中的输出缺陷之一(这些问题不会出现在上面的 for 循环 MWE 中):
amort <- data.frame(period=seq(1,periods,1))
amort$beginBal <- beginBal*(1-(mpr_vector[]+default_vector[]/12-npr_vector[]))^(amort$period-1)
amort$yield <- amort$beginBal*yield_vector[]/12
amort$purchases <- amort$beginBal*npr_vector[]
amort$payments <- amort$beginBal*mpr_vector[]
amort$defaults <- amort$beginBal*default_vector[]/12
amort$endBal <- amort$beginBal+amort$purchases-amort$payments-amort$defaults
amort <- cbind(amort,yield_vector,npr_vector,mpr_vector,default_vector)
amort
你可以这样做:
f <- function(x, y){
x * (1 + npr_vector[y] - mpr_vector[y] - default_vector[y] / 12)
}
res <- Reduce(f, seq(periods), init = beginBal, accumulate = TRUE)
b <- head(res, -1)
result <- data.frame(period = seq(periods), beginBal = b, yield = b * yield_vector/ 12,
purchases = b * npr_vector, payments = b * mpr_vector,
defaults = b * default_vector/12, endBal = res[-1])
检查:
result
period beginBal yield purchases payments defaults endBal
1 1 1000.000 25.00000 300.0000 200.0000 8.333333 1091.667
2 2 1091.667 27.29167 327.5000 218.3333 9.097222 1191.736
3 3 1191.736 29.79340 357.5208 238.3472 9.931134 1300.979
4 4 1300.979 32.52446 390.2936 260.1957 10.841488 1420.235
5 5 1420.235 35.50587 426.0705 284.0470 11.835291 1550.423
6 6 1550.423 36.17654 465.1269 310.0846 11.628174 1693.837
7 7 1693.837 36.69981 508.1512 338.7675 11.292249 1851.929
8 8 1851.929 30.86548 555.5786 370.3858 10.802918 2026.319
9 9 2026.319 30.39478 607.8956 405.2637 10.131594 2218.819
10 10 2218.819 36.98032 665.6457 443.7638 9.245079 2431.456
all.equal(result, amort)
[1] TRUE
此解决方案也可用于 tidyverse
:
library(dplyr)
library(purrr)
data2 <- cbind(period, yield_vector, npr_vector, mpr_vector, default_vector)
data2 %>%
nest_by(period) %>%
ungroup() %>%
mutate(beginBal = accumulate(data[-1], .init = beginBal,
~ .x +
(.x * .y$npr_vector) -
(.x * .y$mpr_vector) -
(.x * .y$default_vector / 12))) %>%
unnest(data) %>%
mutate(yield = beginBal * yield_vector/12,
purchases = beginBal * npr_vector,
payments = beginBal * mpr_vector,
defaults = beginBal * default_vector / 12,
endBal = beginBal + purchases - payments - defaults) %>%
select(!contains("vector"))
输出
# A tibble: 10 x 7
period beginBal yield purchases payments defaults endBal
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1000 25 300 200 8.33 1092.
2 2 1092. 27.3 328. 218. 9.10 1192.
3 3 1192. 29.8 358. 238. 9.93 1301.
4 4 1301. 32.5 390. 260. 10.8 1420.
5 5 1420. 35.5 426. 284. 11.8 1550.
6 6 1552. 36.2 465. 310. 11.6 1695.
7 7 1696. 36.8 509. 339. 11.3 1855.
8 8 1856. 30.9 557. 371. 10.8 2031.
9 9 2033. 30.5 610. 407. 10.2 2226.
10 10 2227. 37.1 668. 445. 9.28 2441.
如果不在 baseR 中使用 Reduce
,我会这样做
解释-
- 对于每一行,您实际上通过将该行的
beginBal
乘以1 + npr_vector - mpr_vector - default_vector/12
创建了一个 - 所以我创建了一个 dummy/anonymous 向量,方法是将
1
附加到它的开头并获得它的累积积。喜欢cumprod(c(1, 1 + npr_vector - mpr_vector - default_vector/12)
- 此后使用
[-(periods + 1)]
剪裁了它的最后一个元素
- 然后乘以
beginBal
初始值。这将为每个period
提供 - 改变其余列非常简单。
- 如果您需要任何进一步的解释,请随时询问。
Endbal
beginBal
值
#given data
periods <- 10
beginBal <- 1000
yield_vector <- c(0.30,0.30,0.30,0.30,0.30,0.28,0.26,0.20,0.18,0.20)
npr_vector <- c(0.30,0.30,0.30,0.30,0.30,0.30,0.30,0.30,0.30,0.30)
mpr_vector <- c(0.20,0.20,0.20,0.20,0.20,0.20,0.20,0.20,0.20,0.20)
default_vector <- c(0.10,0.10,0.10,0.10,0.10,0.09,0.08,0.07,0.06,0.05)
amort <- data.frame(Period = seq(periods),
beginBal = beginBal * cumprod(c(1, 1 + npr_vector - mpr_vector - default_vector/12)[-(periods + 1)]))
amort <- transform(amort, Yeild = beginBal * yield_vector/12,
Purchases = beginBal * npr_vector,
Payments = beginBal * mpr_vector,
defaults = beginBal * default_vector/12,
EndBal = beginBal * (1 + npr_vector - mpr_vector - default_vector/12))
amort
#> Period beginBal Yeild Purchases Payments defaults EndBal
#> 1 1 1000.000 25.00000 300.0000 200.0000 8.333333 1091.667
#> 2 2 1091.667 27.29167 327.5000 218.3333 9.097222 1191.736
#> 3 3 1191.736 29.79340 357.5208 238.3472 9.931134 1300.979
#> 4 4 1300.979 32.52446 390.2936 260.1957 10.841488 1420.235
#> 5 5 1420.235 35.50587 426.0705 284.0470 11.835291 1550.423
#> 6 6 1550.423 36.17654 465.1269 310.0846 11.628174 1693.837
#> 7 7 1693.837 36.69981 508.1512 338.7675 11.292249 1851.929
#> 8 8 1851.929 30.86548 555.5786 370.3858 10.802918 2026.319
#> 9 9 2026.319 30.39478 607.8956 405.2637 10.131594 2218.819
#> 10 10 2218.819 36.98032 665.6457 443.7638 9.245079 2431.456
由 reprex package (v2.0.0)
于 2021-07-16 创建在dplyr
中只有
library(dplyr, warn.conflicts = F)
#amortisation
seq(periods) %>%
as.data.frame() %>%
setNames('Period') %>%
mutate(beginBal = beginBal * cumprod(c(1, 1 + npr_vector - mpr_vector - default_vector/12)[-(periods + 1)]),
Yeild = beginBal * yield_vector/12,
Purchases = beginBal * npr_vector,
Payments = beginBal * mpr_vector,
defaults = beginBal * default_vector/12,
EndBal = beginBal * (1 + npr_vector - mpr_vector - default_vector/12))
#> Period beginBal Yeild Purchases Payments defaults EndBal
#> 1 1 1000.000 25.00000 300.0000 200.0000 8.333333 1091.667
#> 2 2 1091.667 27.29167 327.5000 218.3333 9.097222 1191.736
#> 3 3 1191.736 29.79340 357.5208 238.3472 9.931134 1300.979
#> 4 4 1300.979 32.52446 390.2936 260.1957 10.841488 1420.235
#> 5 5 1420.235 35.50587 426.0705 284.0470 11.835291 1550.423
#> 6 6 1550.423 36.17654 465.1269 310.0846 11.628174 1693.837
#> 7 7 1693.837 36.69981 508.1512 338.7675 11.292249 1851.929
#> 8 8 1851.929 30.86548 555.5786 370.3858 10.802918 2026.319
#> 9 9 2026.319 30.39478 607.8956 405.2637 10.131594 2218.819
#> 10 10 2218.819 36.98032 665.6457 443.7638 9.245079 2431.456
但是,在 purrr::accumulate
中,语法将是
library(tidyverse)
# amortisation
seq(periods) %>%
as.data.frame() %>%
setNames('Period') %>%
mutate(beginBal = accumulate(1 + npr_vector - mpr_vector - default_vector/12, .init = beginBal,
~ .x * .y)[-(n() + 1)],
Yeild = beginBal * yield_vector/12,
Purchases = beginBal * npr_vector,
Payments = beginBal * mpr_vector,
defaults = beginBal * default_vector/12,
EndBal = beginBal * (1 + npr_vector - mpr_vector - default_vector/12))
#> Period beginBal Yeild Purchases Payments defaults EndBal
#> 1 1 1000.000 25.00000 300.0000 200.0000 8.333333 1091.667
#> 2 2 1091.667 27.29167 327.5000 218.3333 9.097222 1191.736
#> 3 3 1191.736 29.79340 357.5208 238.3472 9.931134 1300.979
#> 4 4 1300.979 32.52446 390.2936 260.1957 10.841488 1420.235
#> 5 5 1420.235 35.50587 426.0705 284.0470 11.835291 1550.423
#> 6 6 1550.423 36.17654 465.1269 310.0846 11.628174 1693.837
#> 7 7 1693.837 36.69981 508.1512 338.7675 11.292249 1851.929
#> 8 8 1851.929 30.86548 555.5786 370.3858 10.802918 2026.319
#> 9 9 2026.319 30.39478 607.8956 405.2637 10.131594 2218.819
#> 10 10 2218.819 36.98032 665.6457 443.7638 9.245079 2431.456