仅在特定日期进行滚动回归

Do a rolling regression only on specific dates

我正在尝试进行以下滚动回归:stock_return = α + β market_return.

我的数据集 "data" 如下所示:

Company Date        stock_return    market_return       Alpha   Beta
AAPL    01.01.2014  3%              4%
…
AAPL    31.12.2019  5%              1%
MSFT    01.01.2014  2%              4%
…
MSFT    31.12.2019  6%              1%

此外,我有一个数据框,其中包含应该执行回归的事件:

Company Date        
AAPL    05.02.2015
…   
MSFT    04.08.2018

我正在使用 runner 包进行滚动回归:

running_regression <- function(z) {
  coef(lm(stock_return ~ market_return , data = as.data.frame(z)))
}

output <- runner(seq_along(data$market_return), 
                         k = 180, 
                         lag = 5,
                         at = ?
                         f = running_regression)

但是,我对我的函数和放入 "at" 的内容感到困惑,只是为了对我的第二个数据框中的事件进行回归。

请看下面的一个最小示例:

    Date <- seq(from = as.Date("2014-01-01"), to = as.Date("2019-12-31"), by = 'day')
    Date <- format(Date, format="%d.%m.%Y")

    Company.name <- c(replicate(2191, ""))
    AAPL <- data.frame(Company.name = "AAPL",Date)
    market_return <- c(rnorm(2191))
    AAPL <- cbind(AAPL, market_return)
    MSFT <- c(replicate(2191, "MSFT"))
    MSFT <- data.frame(Company.name = "MSFT",Date)
    MSFT <- cbind(MSFT, market_return)
    df <- rbind(AAPL, MSFT)
    stock_return <- c(rnorm(4382))
    df <- cbind(df,stock_return)
    df[,"Alpha"] <- NA
    df[,"Beta"] <- NA

    Company.name2 <- c(replicate(450, "AAPL"),replicate(450, "MSFT"))
    Event_date <- sample(seq(as.Date('2015/01/01'), as.Date('2019/12/31'), by="day"), 900)
    Event_date <- format(Event_date, format="%d.%m.%Y")
    df2 <- data.frame(Company.name2, Event_date)

    #____

    running_regression <- function(z) {
      coef(lmList(stock_return ~ market_return | Company.name, data=z))
    }


runner(df$market_return, 
       k = 180, 
       lag = 5,
       f = running_regression)

希望我没猜错,

如果您只想 运行 df2 中特定日期的回归,一个快速的解决方案是使用相关名称合并它们:

head(merge(df2,df,by.x=c("Company.name2","Event_date"),by.y=c("Company.name","Date")))
  Company.name2 Event_date market_return stock_return Alpha Beta
1          AAPL 01.01.2016    -0.4814525   -0.9864268    NA   NA
2          AAPL 01.01.2019    -1.3093155    0.8357707    NA   NA
3          AAPL 01.02.2017     1.5059626   -1.3181592    NA   NA
4          AAPL 01.03.2015    -1.3226590    1.4786993    NA   NA
5          AAPL 01.03.2016     2.1394338   -0.8900286    NA   NA
6          AAPL 01.03.2019     0.6035526    0.5453212    NA   NA

您可以看到日期乱七八糟,因此首先按公司和日期对 df2 进行排序是有意义的:

df2 = df2[order(df2$Company.name2,
as.Date(as.character(df2$Event_date),format="%d.%m.%Y")),]

df_merged = merge(df2,df,
by.x=c("Company.name2","Event_date"),
by.y=c("Company.name","Date"),sort=FALSE)

要进行滚动回归,您可以在此处使用 lmList,但它会变得有点复杂。下面我使用 roll 中的 roll_lm 来计算系数。不太了解你的滞后,也许你可以详细说明一下,但是你无法对 运行 进行回归:

library(roll)
result = by(df_merged,df_merged$Company.name2,function(z){
roll_lm(z$market_return,z$stock_return,180)})

tail(result$AAPL$coefficients)
       (Intercept)         x1
[445,] -0.07817682 0.10662762
[446,] -0.06440454 0.09257577
[447,] -0.07007445 0.09461642
[448,] -0.05917523 0.09582312
[449,] -0.05292590 0.10025369
[450,] -0.04930798 0.09911921

自 runner 版本 0.3.5 以来,您可以指定 x = df 并从 data.frame 对 windows 进行 运行 回归。因为你改变 df2 你必须通过 x = df[df$Company.name == Company.name2,]df 的相关部分进行子集 - 你必须对 idx.

做同样的事情
running_regression_intercept <- function(x) {
  coef(lm(stock_return ~ market_return, data = x))[1]
}

running_regression_slope <- function(x) {
  coef(lm(stock_return ~ market_return, data = x))[2]
}
library(dplyr)
library(runner)
df2 %>%
  group_by(Company.name2) %>%
  mutate(
    intercept = runner(
      x = df[df$Company.name ==  Company.name2[1], ],
      k = "180 days",
      lag = "5 days",
      idx = df$Date[df$Company.name == Company.name2[1]],
      at = Event_date,
      f = running_regression_intercept,
    ),
    slope = runner(
      x = df[df$Company.name == Company.name2[1], ],
      k = "180 days",
      lag = "5 days",
      idx = df$Date[df$Company.name == Company.name2[1]],
      at = Event_date,
      f = running_regression_slope
    )
  )

# Company.name2 Event_date    alpha     beta
#     <fct>         <date>        <dbl>    <dbl>
#   1 AAPL          2017-01-12  0.0114   0.00488
#   2 AAPL          2017-07-31 -0.0654   0.00574
#   3 AAPL          2019-02-27 -0.0861   0.0310 
#   4 AAPL          2018-09-06  0.0405  -0.0630 
#   5 AAPL          2015-09-03 -0.121   -0.0246 
#   6 AAPL          2018-11-20 -0.0283  -0.0254 
#   7 AAPL          2015-07-03 -0.116   -0.0186 
#   8 AAPL          2015-02-03  0.102    0.0409 
#   9 AAPL          2017-03-16 -0.0157   0.0124 
#   10 AAPL          2019-06-08 -0.00302  0.0532 

我需要稍微修改一下您的数据,因为格式 Event.Date 从 Date 更改为 character

Date <- seq(from = as.Date("2014-01-01"), 
            to = as.Date("2019-12-31"), 
            by = 'day')
market_return <- c(rnorm(2191))

AAPL <- data.frame(
  Company.name = "AAPL", 
  Date =  Date,
  market_return = market_return
)

MSFT <- data.frame(
  Company.name = "MSFT", 
  Date = Date,
  market_return = market_return
)

df <- rbind(AAPL, MSFT)
df$stock_return <- c(rnorm(4382))
df <- df[order(df$Date),]

df2 <- data.frame(
  Company.name2 = c(replicate(450, "AAPL"), replicate(450, "MSFT")), 
  Event_date = sample(
    seq(as.Date('2015/01/01'), 
        as.Date('2019/12/31'), 
        by="day"),
    size =  900)
  )