如何将函数应用于数据集的行并获取 R 中每一行的结果
How to apply a function to a rows of a dataset and get the result for each row in R
我写了一个函数,它使用 Monte Carlo Simulation 来计算 R 中看涨期权的价值。我想将该函数应用到包含真实数据的数据集中的 63 行。换句话说,我希望函数将每一行的值用于其变量
我可以为函数的变量赋值,但是对于大量数据这样做需要时间
# call put option monte carlo
callMC<-function(nSim=10000, tau, r, sigma, S0, K) {
Z <- rnorm(nSim, mean=0, sd=1)
BT <- sqrt(tau) * Z
ST = S0*exp((r - 0.5*sigma^2)*tau + sigma*BT)
# price and standard error of call option
simulated_call_payoffs <- exp(-r*tau)*pmax(ST-K,0)
price_call <- mean(simulated_call_payoffs)
sterr_call <- sd(simulated_call_payoffs)/sqrt(nSim)
output<-list(price_call=price_call, sterr_call=sterr_call)
return(output)
}
set.seed(1)
results<-callMC(n=10000, tau=70/365, r=0.0176,
sigma=0.208, S0=142.76, K=140)
results
tau <- c(1,
2,
3,
4,
5,
8,
9,
10,
12,
15,
15,
16,
17,
18,
19,
22,
24,
25,
26,
29,
30,
31,
32,
33,
36,
37,
38,
39,
40,
43,
44,
45,
46,
47,
50,
51,
52,
53,
54,
57,
58,
59,
60,
61,
64,
65,
66,
67,
68,
71,
72,
73,
74,
75,
78,
79,
80,
81,
82,
85,
86,
87,
88
)/365
r <- c(0.0168, 0.016, 0.0165, 0.0154, 0.0152, 0.0156, 0.0175, 0.0159, 0.0176,
0.0177, 0.0167, 0.0154, 0.0176, 0.0176, 0.0176, 0.0178, 0.018, 0.0177,
0.0179, 0.018, 0.0185, 0.0177, 0.0178, 0.0184, 0.0169, 0.0173, 0.0192, 0.0182, 0.0184, 0.0178, 0.0183, 0.0177, 0.0177, 0.0174, 0.0192, 0.0181, 0.0181, 0.0194, 0.0176, 0.0177, 0.0193, 0.0179, 0.0188, 0.0186, 0.0177, 0.0173, 0.018, 0.0179, 0.0184, 0.019, 0.0183, 0.0177, 0.0172, 0.0185, 0.0192, 0.0189, 0.0189, 0.0192, 0.0192, 0.0192, 0.0192, 0.0192, 0.0182
)
sigma <- c(0.2564,0.2564,0.2564,0.2564,0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564)
S0<-c(
135.59,
134.56,
134.41,
134.22,
134.13,
134.21,
135.32,
133.76,
133.91,
133.92,
133.22,
131.91,
131.99,
132.12,
132.91,
134.45,
133.77,135.09,
135.97,
134.34,
133.84,
133.2,
134.52,
134.31,
134.4,
134,
134.48,
135.59,
135.47,
137.61,
137.69,
138.78,
137.89,
137.67,
135.53,
133.73,
135.25,
133.82,
135.97,
135.44,
134.07,
134.38,
133.96,
132.58,
134.09,
134.26,
142.11,
143,
142.04,
142.76,
141.13,
139.67,
138.38,
141.28,
142.99,
142.02,
141.69,
143.66,
145.42,
143.24,
143.55,
143.16,
141.68
)
K <-rep(140, each=1, times=63)
df<- data.frame(tau,r,sigma, S0,K)
我使用 apply(df,1,callMC) 将函数应用于每一行,但是,我遇到了错误
apply(df,1,callMC)
Error in FUN(newX[, i], ...) :
argument "tau" is missing, with no default
包 {purrr
} 中的函数 map
对这些情况非常有用。
以下位遍历 df 中的每一行,并将每一列的值提供给函数(.x 从 1 到 df 中的行数,一个接一个。您可以将一个值赋给.x 测试该特定行;例如,.x = 1
).
1:nrow(df) %>%
map_df(~ callMC(n=.x, tau=df$tau[.x], r=df$r[.x], sigma=df$sigma[.x], S0=df$S0[.x], K=df$K[.x]))
这里是完整代码:
# call put option monte carlo
callMC<-function(nSim=10000, tau, r, sigma, S0, K) {
Z <- rnorm(nSim, mean=0, sd=1)
BT <- sqrt(tau) * Z
ST = S0*exp((r - 0.5*sigma^2)*tau + sigma*BT)
# price and standard error of call option
simulated_call_payoffs <- exp(-r*tau)*pmax(ST-K,0)
price_call <- mean(simulated_call_payoffs)
sterr_call <- sd(simulated_call_payoffs)/sqrt(nSim)
output<-list(price_call=price_call, sterr_call=sterr_call)
return(output)
}
set.seed(1)
results<-callMC(n=10000, tau=70/365, r=0.0176,
sigma=0.208, S0=142.76, K=140)
results
#> $price_call
#> [1] 6.908401
#>
#> $sterr_call
#> [1] 0.09126226
tau <- c(1,
2,
3,
4,
5,
8,
9,
10,
12,
15,
15,
16,
17,
18,
19,
22,
24,
25,
26,
29,
30,
31,
32,
33,
36,
37,
38,
39,
40,
43,
44,
45,
46,
47,
50,
51,
52,
53,
54,
57,
58,
59,
60,
61,
64,
65,
66,
67,
68,
71,
72,
73,
74,
75,
78,
79,
80,
81,
82,
85,
86,
87,
88
)/365
r <- c(0.0168, 0.016, 0.0165, 0.0154, 0.0152, 0.0156, 0.0175, 0.0159, 0.0176,
0.0177, 0.0167, 0.0154, 0.0176, 0.0176, 0.0176, 0.0178, 0.018, 0.0177,
0.0179, 0.018, 0.0185, 0.0177, 0.0178, 0.0184, 0.0169, 0.0173, 0.0192, 0.0182, 0.0184, 0.0178, 0.0183, 0.0177, 0.0177, 0.0174, 0.0192, 0.0181, 0.0181, 0.0194, 0.0176, 0.0177, 0.0193, 0.0179, 0.0188, 0.0186, 0.0177, 0.0173, 0.018, 0.0179, 0.0184, 0.019, 0.0183, 0.0177, 0.0172, 0.0185, 0.0192, 0.0189, 0.0189, 0.0192, 0.0192, 0.0192, 0.0192, 0.0192, 0.0182
)
sigma <- c(0.2564,0.2564,0.2564,0.2564,0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564)
S0<-c(
135.59,
134.56,
134.41,
134.22,
134.13,
134.21,
135.32,
133.76,
133.91,
133.92,
133.22,
131.91,
131.99,
132.12,
132.91,
134.45,
133.77,135.09,
135.97,
134.34,
133.84,
133.2,
134.52,
134.31,
134.4,
134,
134.48,
135.59,
135.47,
137.61,
137.69,
138.78,
137.89,
137.67,
135.53,
133.73,
135.25,
133.82,
135.97,
135.44,
134.07,
134.38,
133.96,
132.58,
134.09,
134.26,
142.11,
143,
142.04,
142.76,
141.13,
139.67,
138.38,
141.28,
142.99,
142.02,
141.69,
143.66,
145.42,
143.24,
143.55,
143.16,
141.68
)
K <-rep(140, each=1, times=63)
df<- data.frame(tau,r,sigma, S0,K)
library(purrr)
DF = 1:nrow(df) %>%
map_df(~ callMC(n=.x, tau=df$tau[.x], r=df$r[.x], sigma=df$sigma[.x], S0=df$S0[.x], K=df$K[.x]))
DF
#> # A tibble: 63 × 2
#> price_call sterr_call
#> <dbl> <dbl>
#> 1 0 NA
#> 2 0 0
#> 3 0 0
#> 4 0 0
#> 5 0 0
#> 6 0.660 0.660
#> 7 0 0
#> 8 0.386 0.386
#> 9 0 0
#> 10 0 0
#> # … with 53 more rows
由 reprex package (v2.0.1)
于 2022-03-22 创建
这是一个 tidyverse
+ mapply
方法:
library(dplyr)
library(tidyr)
df |>
mutate(data = mapply(callMC,tau = tau, r = r, sigma = sigma, S0 = S0, K = K, SIMPLIFY = FALSE)) |>
unnest_wider(col = data)
##> + # A tibble: 63 × 7
##> tau r sigma S0 K price_call sterr_call
##> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##> 1 0.00274 0.0168 0.256 136. 140 0.00629 0.000921
##> 2 0.00548 0.016 0.256 135. 140 0.0181 0.00178
##> 3 0.00822 0.0165 0.256 134. 140 0.0531 0.00345
##> 4 0.0110 0.0154 0.256 134. 140 0.0920 0.00516
##> 5 0.0137 0.0152 0.256 134. 140 0.150 0.00722
##> 6 0.0219 0.0156 0.256 134. 140 0.358 0.0127
##> 7 0.0247 0.0175 0.256 135. 140 0.599 0.0168
##> 8 0.0274 0.0159 0.256 134. 140 0.419 0.0143
##> 9 0.0329 0.0176 0.256 134. 140 0.551 0.0176
##> 10 0.0411 0.0177 0.256 134. 140 0.825 0.0232
##> # … with 53 more rows
问题是您将一个向量(包含 5 个参数)传递给您的函数,该函数需要 5 个单独的参数 - 如果包含 n,则为 6 个。您需要让您的函数知道它正在接收 6 个单独的参数
这是在 base R 中获得简洁答案的一种简单方法。我对第一个参数进行了“硬编码”,因为您似乎使用的是 10000,但如果需要,您可以将其设为变量
x[1], x[2]...
是数据框每一行的独立元素 df
然后我使用 do.call
、rbind
和 as.data.frame
将结果制作成一个漂亮的数据框
as.data.frame(do.call(rbind, apply(df, 1, function(x) callMC(10000, x[1], x[2], x[3], x[4], x[5]))))
price_call sterr_call
1 0.006132845 0.0008339928
2 0.01769498 0.001826786
3 0.06270669 0.003937882
4 0.08954985 0.005085325
5 0.1461733 0.007129216
6 0.3558649 0.01246984
7 0.6200213 0.01750186
...
编辑:使用您在 df
中定义的变量替代答案。同样,as.data.frame
和t
只是为了整理结果
as.data.frame(t(mapply(callMC, 10000, tau, r, sigma, S0, K)))
price_call sterr_call
1 0.006855707 0.0009683526
2 0.01869136 0.001834313
3 0.04815097 0.003332207
4 0.08621679 0.004994365
5 0.1532411 0.007166206
...
我写了一个函数,它使用 Monte Carlo Simulation 来计算 R 中看涨期权的价值。我想将该函数应用到包含真实数据的数据集中的 63 行。换句话说,我希望函数将每一行的值用于其变量 我可以为函数的变量赋值,但是对于大量数据这样做需要时间
# call put option monte carlo
callMC<-function(nSim=10000, tau, r, sigma, S0, K) {
Z <- rnorm(nSim, mean=0, sd=1)
BT <- sqrt(tau) * Z
ST = S0*exp((r - 0.5*sigma^2)*tau + sigma*BT)
# price and standard error of call option
simulated_call_payoffs <- exp(-r*tau)*pmax(ST-K,0)
price_call <- mean(simulated_call_payoffs)
sterr_call <- sd(simulated_call_payoffs)/sqrt(nSim)
output<-list(price_call=price_call, sterr_call=sterr_call)
return(output)
}
set.seed(1)
results<-callMC(n=10000, tau=70/365, r=0.0176,
sigma=0.208, S0=142.76, K=140)
results
tau <- c(1,
2,
3,
4,
5,
8,
9,
10,
12,
15,
15,
16,
17,
18,
19,
22,
24,
25,
26,
29,
30,
31,
32,
33,
36,
37,
38,
39,
40,
43,
44,
45,
46,
47,
50,
51,
52,
53,
54,
57,
58,
59,
60,
61,
64,
65,
66,
67,
68,
71,
72,
73,
74,
75,
78,
79,
80,
81,
82,
85,
86,
87,
88
)/365
r <- c(0.0168, 0.016, 0.0165, 0.0154, 0.0152, 0.0156, 0.0175, 0.0159, 0.0176,
0.0177, 0.0167, 0.0154, 0.0176, 0.0176, 0.0176, 0.0178, 0.018, 0.0177,
0.0179, 0.018, 0.0185, 0.0177, 0.0178, 0.0184, 0.0169, 0.0173, 0.0192, 0.0182, 0.0184, 0.0178, 0.0183, 0.0177, 0.0177, 0.0174, 0.0192, 0.0181, 0.0181, 0.0194, 0.0176, 0.0177, 0.0193, 0.0179, 0.0188, 0.0186, 0.0177, 0.0173, 0.018, 0.0179, 0.0184, 0.019, 0.0183, 0.0177, 0.0172, 0.0185, 0.0192, 0.0189, 0.0189, 0.0192, 0.0192, 0.0192, 0.0192, 0.0192, 0.0182
)
sigma <- c(0.2564,0.2564,0.2564,0.2564,0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564)
S0<-c(
135.59,
134.56,
134.41,
134.22,
134.13,
134.21,
135.32,
133.76,
133.91,
133.92,
133.22,
131.91,
131.99,
132.12,
132.91,
134.45,
133.77,135.09,
135.97,
134.34,
133.84,
133.2,
134.52,
134.31,
134.4,
134,
134.48,
135.59,
135.47,
137.61,
137.69,
138.78,
137.89,
137.67,
135.53,
133.73,
135.25,
133.82,
135.97,
135.44,
134.07,
134.38,
133.96,
132.58,
134.09,
134.26,
142.11,
143,
142.04,
142.76,
141.13,
139.67,
138.38,
141.28,
142.99,
142.02,
141.69,
143.66,
145.42,
143.24,
143.55,
143.16,
141.68
)
K <-rep(140, each=1, times=63)
df<- data.frame(tau,r,sigma, S0,K)
我使用 apply(df,1,callMC) 将函数应用于每一行,但是,我遇到了错误
apply(df,1,callMC)
Error in FUN(newX[, i], ...) :
argument "tau" is missing, with no default
包 {purrr
} 中的函数 map
对这些情况非常有用。
以下位遍历 df 中的每一行,并将每一列的值提供给函数(.x 从 1 到 df 中的行数,一个接一个。您可以将一个值赋给.x 测试该特定行;例如,.x = 1
).
1:nrow(df) %>%
map_df(~ callMC(n=.x, tau=df$tau[.x], r=df$r[.x], sigma=df$sigma[.x], S0=df$S0[.x], K=df$K[.x]))
这里是完整代码:
# call put option monte carlo
callMC<-function(nSim=10000, tau, r, sigma, S0, K) {
Z <- rnorm(nSim, mean=0, sd=1)
BT <- sqrt(tau) * Z
ST = S0*exp((r - 0.5*sigma^2)*tau + sigma*BT)
# price and standard error of call option
simulated_call_payoffs <- exp(-r*tau)*pmax(ST-K,0)
price_call <- mean(simulated_call_payoffs)
sterr_call <- sd(simulated_call_payoffs)/sqrt(nSim)
output<-list(price_call=price_call, sterr_call=sterr_call)
return(output)
}
set.seed(1)
results<-callMC(n=10000, tau=70/365, r=0.0176,
sigma=0.208, S0=142.76, K=140)
results
#> $price_call
#> [1] 6.908401
#>
#> $sterr_call
#> [1] 0.09126226
tau <- c(1,
2,
3,
4,
5,
8,
9,
10,
12,
15,
15,
16,
17,
18,
19,
22,
24,
25,
26,
29,
30,
31,
32,
33,
36,
37,
38,
39,
40,
43,
44,
45,
46,
47,
50,
51,
52,
53,
54,
57,
58,
59,
60,
61,
64,
65,
66,
67,
68,
71,
72,
73,
74,
75,
78,
79,
80,
81,
82,
85,
86,
87,
88
)/365
r <- c(0.0168, 0.016, 0.0165, 0.0154, 0.0152, 0.0156, 0.0175, 0.0159, 0.0176,
0.0177, 0.0167, 0.0154, 0.0176, 0.0176, 0.0176, 0.0178, 0.018, 0.0177,
0.0179, 0.018, 0.0185, 0.0177, 0.0178, 0.0184, 0.0169, 0.0173, 0.0192, 0.0182, 0.0184, 0.0178, 0.0183, 0.0177, 0.0177, 0.0174, 0.0192, 0.0181, 0.0181, 0.0194, 0.0176, 0.0177, 0.0193, 0.0179, 0.0188, 0.0186, 0.0177, 0.0173, 0.018, 0.0179, 0.0184, 0.019, 0.0183, 0.0177, 0.0172, 0.0185, 0.0192, 0.0189, 0.0189, 0.0192, 0.0192, 0.0192, 0.0192, 0.0192, 0.0182
)
sigma <- c(0.2564,0.2564,0.2564,0.2564,0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564,
0.2564)
S0<-c(
135.59,
134.56,
134.41,
134.22,
134.13,
134.21,
135.32,
133.76,
133.91,
133.92,
133.22,
131.91,
131.99,
132.12,
132.91,
134.45,
133.77,135.09,
135.97,
134.34,
133.84,
133.2,
134.52,
134.31,
134.4,
134,
134.48,
135.59,
135.47,
137.61,
137.69,
138.78,
137.89,
137.67,
135.53,
133.73,
135.25,
133.82,
135.97,
135.44,
134.07,
134.38,
133.96,
132.58,
134.09,
134.26,
142.11,
143,
142.04,
142.76,
141.13,
139.67,
138.38,
141.28,
142.99,
142.02,
141.69,
143.66,
145.42,
143.24,
143.55,
143.16,
141.68
)
K <-rep(140, each=1, times=63)
df<- data.frame(tau,r,sigma, S0,K)
library(purrr)
DF = 1:nrow(df) %>%
map_df(~ callMC(n=.x, tau=df$tau[.x], r=df$r[.x], sigma=df$sigma[.x], S0=df$S0[.x], K=df$K[.x]))
DF
#> # A tibble: 63 × 2
#> price_call sterr_call
#> <dbl> <dbl>
#> 1 0 NA
#> 2 0 0
#> 3 0 0
#> 4 0 0
#> 5 0 0
#> 6 0.660 0.660
#> 7 0 0
#> 8 0.386 0.386
#> 9 0 0
#> 10 0 0
#> # … with 53 more rows
由 reprex package (v2.0.1)
于 2022-03-22 创建这是一个 tidyverse
+ mapply
方法:
library(dplyr)
library(tidyr)
df |>
mutate(data = mapply(callMC,tau = tau, r = r, sigma = sigma, S0 = S0, K = K, SIMPLIFY = FALSE)) |>
unnest_wider(col = data)
##> + # A tibble: 63 × 7
##> tau r sigma S0 K price_call sterr_call
##> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##> 1 0.00274 0.0168 0.256 136. 140 0.00629 0.000921
##> 2 0.00548 0.016 0.256 135. 140 0.0181 0.00178
##> 3 0.00822 0.0165 0.256 134. 140 0.0531 0.00345
##> 4 0.0110 0.0154 0.256 134. 140 0.0920 0.00516
##> 5 0.0137 0.0152 0.256 134. 140 0.150 0.00722
##> 6 0.0219 0.0156 0.256 134. 140 0.358 0.0127
##> 7 0.0247 0.0175 0.256 135. 140 0.599 0.0168
##> 8 0.0274 0.0159 0.256 134. 140 0.419 0.0143
##> 9 0.0329 0.0176 0.256 134. 140 0.551 0.0176
##> 10 0.0411 0.0177 0.256 134. 140 0.825 0.0232
##> # … with 53 more rows
问题是您将一个向量(包含 5 个参数)传递给您的函数,该函数需要 5 个单独的参数 - 如果包含 n,则为 6 个。您需要让您的函数知道它正在接收 6 个单独的参数
这是在 base R 中获得简洁答案的一种简单方法。我对第一个参数进行了“硬编码”,因为您似乎使用的是 10000,但如果需要,您可以将其设为变量
x[1], x[2]...
是数据框每一行的独立元素 df
然后我使用 do.call
、rbind
和 as.data.frame
将结果制作成一个漂亮的数据框
as.data.frame(do.call(rbind, apply(df, 1, function(x) callMC(10000, x[1], x[2], x[3], x[4], x[5]))))
price_call sterr_call
1 0.006132845 0.0008339928
2 0.01769498 0.001826786
3 0.06270669 0.003937882
4 0.08954985 0.005085325
5 0.1461733 0.007129216
6 0.3558649 0.01246984
7 0.6200213 0.01750186
...
编辑:使用您在 df
中定义的变量替代答案。同样,as.data.frame
和t
只是为了整理结果
as.data.frame(t(mapply(callMC, 10000, tau, r, sigma, S0, K)))
price_call sterr_call
1 0.006855707 0.0009683526
2 0.01869136 0.001834313
3 0.04815097 0.003332207
4 0.08621679 0.004994365
5 0.1532411 0.007166206
...