如何将函数应用于数据集的行并获取 R 中每一行的结果

Question

我写了一个函数，它使用 Monte Carlo Simulation 来计算 R 中看涨期权的价值。我想将该函数应用到包含真实数据的数据集中的 63 行。换句话说，我希望函数将每一行的值用于其变量我可以为函数的变量赋值，但是对于大量数据这样做需要时间

# call put option monte carlo
callMC<-function(nSim=10000, tau, r, sigma, S0, K) {
  
  Z <- rnorm(nSim, mean=0, sd=1)
  BT <- sqrt(tau) * Z
  ST = S0*exp((r - 0.5*sigma^2)*tau + sigma*BT)
  
  # price and standard error of call option
  simulated_call_payoffs <- exp(-r*tau)*pmax(ST-K,0)
  price_call <- mean(simulated_call_payoffs)
  sterr_call <- sd(simulated_call_payoffs)/sqrt(nSim)
  
  output<-list(price_call=price_call, sterr_call=sterr_call)
  return(output)
  
}
set.seed(1)
results<-callMC(n=10000, tau=70/365, r=0.0176, 
                     sigma=0.208, S0=142.76, K=140)
results

tau <- c(1,
         2,
         3,
         4,
         5,
         8,
         9,
         10,
         12,
         15,
         15,
         16,
         17,
         18,
         19,
         22,
         24,
         25,
         26,
         29,
         30,
         31,
         32,
         33,
         36,
         37,
         38,
         39,
         40,
         43,
         44,
         45,
         46,
         47,
         50,
         51,
         52,
         53,
         54,
         57,
         58,
         59,
         60,
         61,
         64,
         65,
         66,
         67,
         68,
         71,
         72,
         73,
         74,
         75,
         78,
         79,
         80,
         81,
         82,
         85,
         86,
         87,
         88
)/365
r <- c(0.0168,  0.016,  0.0165, 0.0154, 0.0152, 0.0156, 0.0175, 0.0159, 0.0176,
       0.0177,  0.0167, 0.0154, 0.0176, 0.0176, 0.0176, 0.0178, 0.018,  0.0177,
       0.0179,  0.018,  0.0185, 0.0177, 0.0178, 0.0184, 0.0169, 0.0173, 0.0192, 0.0182, 0.0184, 0.0178, 0.0183, 0.0177, 0.0177, 0.0174, 0.0192, 0.0181, 0.0181, 0.0194, 0.0176, 0.0177, 0.0193, 0.0179, 0.0188, 0.0186, 0.0177, 0.0173, 0.018,  0.0179, 0.0184, 0.019,  0.0183, 0.0177, 0.0172, 0.0185, 0.0192, 0.0189, 0.0189, 0.0192, 0.0192, 0.0192, 0.0192, 0.0192, 0.0182
)
sigma <- c(0.2564,0.2564,0.2564,0.2564,0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564)
S0<-c(
  135.59,
  134.56,
  134.41,
  134.22,
  134.13,
  134.21,
  135.32,
  133.76,
  133.91,
  133.92,
  133.22,
  131.91,
  131.99,
  132.12,
  132.91,
  134.45,
  133.77,135.09,
  135.97,
  134.34,
  133.84,
  133.2,
  134.52,
  134.31,
  134.4,
  134,
  134.48,
  135.59,
  135.47,
  137.61,
  137.69,
  138.78,
  137.89,
  137.67,
  135.53,
  133.73,
  135.25,
  133.82,
  135.97,
  135.44,
  134.07,
  134.38,
  133.96,
  132.58,
  134.09,
  134.26,
  142.11,
  143,
  142.04,
  142.76,
  141.13,
  139.67,
  138.38,
  141.28,
  142.99,
  142.02,
  141.69,
  143.66,
  145.42,
  143.24,
  143.55,
  143.16,
  141.68
)
K <-rep(140, each=1, times=63)
df<- data.frame(tau,r,sigma, S0,K)

我使用 apply(df,1,callMC) 将函数应用于每一行，但是，我遇到了错误

apply(df,1,callMC)
Error in FUN(newX[, i], ...) : 
  argument "tau" is missing, with no default

Answer 1

包 {purrr} 中的函数 map 对这些情况非常有用。

以下位遍历 df 中的每一行，并将每一列的值提供给函数（.x 从 1 到 df 中的行数，一个接一个。您可以将一个值赋给.x 测试该特定行；例如，.x = 1).

1:nrow(df) %>% 
  map_df(~ callMC(n=.x, tau=df$tau[.x], r=df$r[.x], sigma=df$sigma[.x], S0=df$S0[.x], K=df$K[.x]))

这里是完整代码：

# call put option monte carlo
callMC<-function(nSim=10000, tau, r, sigma, S0, K) {
  
  Z <- rnorm(nSim, mean=0, sd=1)
  BT <- sqrt(tau) * Z
  ST = S0*exp((r - 0.5*sigma^2)*tau + sigma*BT)
  
  # price and standard error of call option
  simulated_call_payoffs <- exp(-r*tau)*pmax(ST-K,0)
  price_call <- mean(simulated_call_payoffs)
  sterr_call <- sd(simulated_call_payoffs)/sqrt(nSim)
  
  output<-list(price_call=price_call, sterr_call=sterr_call)
  return(output)
  
}
set.seed(1)
results<-callMC(n=10000, tau=70/365, r=0.0176, 
                sigma=0.208, S0=142.76, K=140)
results
#> $price_call
#> [1] 6.908401
#> 
#> $sterr_call
#> [1] 0.09126226

tau <- c(1,
         2,
         3,
         4,
         5,
         8,
         9,
         10,
         12,
         15,
         15,
         16,
         17,
         18,
         19,
         22,
         24,
         25,
         26,
         29,
         30,
         31,
         32,
         33,
         36,
         37,
         38,
         39,
         40,
         43,
         44,
         45,
         46,
         47,
         50,
         51,
         52,
         53,
         54,
         57,
         58,
         59,
         60,
         61,
         64,
         65,
         66,
         67,
         68,
         71,
         72,
         73,
         74,
         75,
         78,
         79,
         80,
         81,
         82,
         85,
         86,
         87,
         88
)/365
r <- c(0.0168,  0.016,  0.0165, 0.0154, 0.0152, 0.0156, 0.0175, 0.0159, 0.0176,
       0.0177,  0.0167, 0.0154, 0.0176, 0.0176, 0.0176, 0.0178, 0.018,  0.0177,
       0.0179,  0.018,  0.0185, 0.0177, 0.0178, 0.0184, 0.0169, 0.0173, 0.0192, 0.0182, 0.0184, 0.0178, 0.0183, 0.0177, 0.0177, 0.0174, 0.0192, 0.0181, 0.0181, 0.0194, 0.0176, 0.0177, 0.0193, 0.0179, 0.0188, 0.0186, 0.0177, 0.0173, 0.018,  0.0179, 0.0184, 0.019,  0.0183, 0.0177, 0.0172, 0.0185, 0.0192, 0.0189, 0.0189, 0.0192, 0.0192, 0.0192, 0.0192, 0.0192, 0.0182
)
sigma <- c(0.2564,0.2564,0.2564,0.2564,0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564,
           0.2564)
S0<-c(
  135.59,
  134.56,
  134.41,
  134.22,
  134.13,
  134.21,
  135.32,
  133.76,
  133.91,
  133.92,
  133.22,
  131.91,
  131.99,
  132.12,
  132.91,
  134.45,
  133.77,135.09,
  135.97,
  134.34,
  133.84,
  133.2,
  134.52,
  134.31,
  134.4,
  134,
  134.48,
  135.59,
  135.47,
  137.61,
  137.69,
  138.78,
  137.89,
  137.67,
  135.53,
  133.73,
  135.25,
  133.82,
  135.97,
  135.44,
  134.07,
  134.38,
  133.96,
  132.58,
  134.09,
  134.26,
  142.11,
  143,
  142.04,
  142.76,
  141.13,
  139.67,
  138.38,
  141.28,
  142.99,
  142.02,
  141.69,
  143.66,
  145.42,
  143.24,
  143.55,
  143.16,
  141.68
)
K <-rep(140, each=1, times=63)
df<- data.frame(tau,r,sigma, S0,K)


library(purrr)
DF = 1:nrow(df) %>% 
  map_df(~ callMC(n=.x, tau=df$tau[.x], r=df$r[.x], sigma=df$sigma[.x], S0=df$S0[.x], K=df$K[.x]))

DF
#> # A tibble: 63 × 2
#>    price_call sterr_call
#>         <dbl>      <dbl>
#>  1      0         NA    
#>  2      0          0    
#>  3      0          0    
#>  4      0          0    
#>  5      0          0    
#>  6      0.660      0.660
#>  7      0          0    
#>  8      0.386      0.386
#>  9      0          0    
#> 10      0          0    
#> # … with 53 more rows

^{由 reprex package (v2.0.1)}

于 2022-03-22 创建

Answer 2

这是一个 tidyverse + mapply 方法：

library(dplyr)
library(tidyr)

df |>
  mutate(data = mapply(callMC,tau = tau, r = r, sigma = sigma, S0 = S0, K = K, SIMPLIFY = FALSE)) |>
  unnest_wider(col = data)


##> + # A tibble: 63 × 7
##>        tau      r sigma    S0     K price_call sterr_call
##>      <dbl>  <dbl> <dbl> <dbl> <dbl>      <dbl>      <dbl>
##>  1 0.00274 0.0168 0.256  136.   140    0.00629   0.000921
##>  2 0.00548 0.016  0.256  135.   140    0.0181    0.00178 
##>  3 0.00822 0.0165 0.256  134.   140    0.0531    0.00345 
##>  4 0.0110  0.0154 0.256  134.   140    0.0920    0.00516 
##>  5 0.0137  0.0152 0.256  134.   140    0.150     0.00722 
##>  6 0.0219  0.0156 0.256  134.   140    0.358     0.0127  
##>  7 0.0247  0.0175 0.256  135.   140    0.599     0.0168  
##>  8 0.0274  0.0159 0.256  134.   140    0.419     0.0143  
##>  9 0.0329  0.0176 0.256  134.   140    0.551     0.0176  
##> 10 0.0411  0.0177 0.256  134.   140    0.825     0.0232  
##> # … with 53 more rows

Answer 3

问题是您将一个向量（包含 5 个参数）传递给您的函数，该函数需要 5 个单独的参数 - 如果包含 n，则为 6 个。您需要让您的函数知道它正在接收 6 个单独的参数

这是在 base R 中获得简洁答案的一种简单方法。我对第一个参数进行了“硬编码”，因为您似乎使用的是 10000，但如果需要，您可以将其设为变量

x[1], x[2]... 是数据框每一行的独立元素 df

然后我使用 do.call、rbind 和 as.data.frame 将结果制作成一个漂亮的数据框

as.data.frame(do.call(rbind, apply(df, 1, function(x) callMC(10000, x[1], x[2], x[3], x[4], x[5]))))

    price_call   sterr_call
1  0.006132845 0.0008339928
2   0.01769498  0.001826786
3   0.06270669  0.003937882
4   0.08954985  0.005085325
5    0.1461733  0.007129216
6    0.3558649   0.01246984
7    0.6200213   0.01750186
...

编辑：使用您在 df 中定义的变量替代答案。同样，as.data.frame和t只是为了整理结果

as.data.frame(t(mapply(callMC, 10000, tau, r, sigma, S0, K)))

    price_call   sterr_call
1  0.006855707 0.0009683526
2   0.01869136  0.001834313
3   0.04815097  0.003332207
4   0.08621679  0.004994365
5    0.1532411  0.007166206
...

如何将函数应用于数据集的行并获取 R 中每一行的结果

How to apply a function to a rows of a dataset and get the result for each row in R

r

montecarlo