仅在特定日期进行滚动回归
Do a rolling regression only on specific dates
我正在尝试进行以下滚动回归:stock_return = α + β market_return.
我的数据集 "data" 如下所示:
Company Date stock_return market_return Alpha Beta
AAPL 01.01.2014 3% 4%
…
AAPL 31.12.2019 5% 1%
MSFT 01.01.2014 2% 4%
…
MSFT 31.12.2019 6% 1%
此外,我有一个数据框,其中包含应该执行回归的事件:
Company Date
AAPL 05.02.2015
…
MSFT 04.08.2018
我正在使用 runner 包进行滚动回归:
running_regression <- function(z) {
coef(lm(stock_return ~ market_return , data = as.data.frame(z)))
}
output <- runner(seq_along(data$market_return),
k = 180,
lag = 5,
at = ?
f = running_regression)
但是,我对我的函数和放入 "at" 的内容感到困惑,只是为了对我的第二个数据框中的事件进行回归。
请看下面的一个最小示例:
Date <- seq(from = as.Date("2014-01-01"), to = as.Date("2019-12-31"), by = 'day')
Date <- format(Date, format="%d.%m.%Y")
Company.name <- c(replicate(2191, ""))
AAPL <- data.frame(Company.name = "AAPL",Date)
market_return <- c(rnorm(2191))
AAPL <- cbind(AAPL, market_return)
MSFT <- c(replicate(2191, "MSFT"))
MSFT <- data.frame(Company.name = "MSFT",Date)
MSFT <- cbind(MSFT, market_return)
df <- rbind(AAPL, MSFT)
stock_return <- c(rnorm(4382))
df <- cbind(df,stock_return)
df[,"Alpha"] <- NA
df[,"Beta"] <- NA
Company.name2 <- c(replicate(450, "AAPL"),replicate(450, "MSFT"))
Event_date <- sample(seq(as.Date('2015/01/01'), as.Date('2019/12/31'), by="day"), 900)
Event_date <- format(Event_date, format="%d.%m.%Y")
df2 <- data.frame(Company.name2, Event_date)
#____
running_regression <- function(z) {
coef(lmList(stock_return ~ market_return | Company.name, data=z))
}
runner(df$market_return,
k = 180,
lag = 5,
f = running_regression)
希望我没猜错,
如果您只想 运行 df2 中特定日期的回归,一个快速的解决方案是使用相关名称合并它们:
head(merge(df2,df,by.x=c("Company.name2","Event_date"),by.y=c("Company.name","Date")))
Company.name2 Event_date market_return stock_return Alpha Beta
1 AAPL 01.01.2016 -0.4814525 -0.9864268 NA NA
2 AAPL 01.01.2019 -1.3093155 0.8357707 NA NA
3 AAPL 01.02.2017 1.5059626 -1.3181592 NA NA
4 AAPL 01.03.2015 -1.3226590 1.4786993 NA NA
5 AAPL 01.03.2016 2.1394338 -0.8900286 NA NA
6 AAPL 01.03.2019 0.6035526 0.5453212 NA NA
您可以看到日期乱七八糟,因此首先按公司和日期对 df2 进行排序是有意义的:
df2 = df2[order(df2$Company.name2,
as.Date(as.character(df2$Event_date),format="%d.%m.%Y")),]
df_merged = merge(df2,df,
by.x=c("Company.name2","Event_date"),
by.y=c("Company.name","Date"),sort=FALSE)
要进行滚动回归,您可以在此处使用 lmList,但它会变得有点复杂。下面我使用 roll 中的 roll_lm 来计算系数。不太了解你的滞后,也许你可以详细说明一下,但是你无法对 运行 进行回归:
library(roll)
result = by(df_merged,df_merged$Company.name2,function(z){
roll_lm(z$market_return,z$stock_return,180)})
tail(result$AAPL$coefficients)
(Intercept) x1
[445,] -0.07817682 0.10662762
[446,] -0.06440454 0.09257577
[447,] -0.07007445 0.09461642
[448,] -0.05917523 0.09582312
[449,] -0.05292590 0.10025369
[450,] -0.04930798 0.09911921
自 runner 版本 0.3.5
以来,您可以指定 x = df
并从 data.frame
对 windows 进行 运行 回归。因为你改变 df2
你必须通过 x = df[df$Company.name == Company.name2,]
对 df
的相关部分进行子集 - 你必须对 idx
.
做同样的事情
running_regression_intercept <- function(x) {
coef(lm(stock_return ~ market_return, data = x))[1]
}
running_regression_slope <- function(x) {
coef(lm(stock_return ~ market_return, data = x))[2]
}
library(dplyr)
library(runner)
df2 %>%
group_by(Company.name2) %>%
mutate(
intercept = runner(
x = df[df$Company.name == Company.name2[1], ],
k = "180 days",
lag = "5 days",
idx = df$Date[df$Company.name == Company.name2[1]],
at = Event_date,
f = running_regression_intercept,
),
slope = runner(
x = df[df$Company.name == Company.name2[1], ],
k = "180 days",
lag = "5 days",
idx = df$Date[df$Company.name == Company.name2[1]],
at = Event_date,
f = running_regression_slope
)
)
# Company.name2 Event_date alpha beta
# <fct> <date> <dbl> <dbl>
# 1 AAPL 2017-01-12 0.0114 0.00488
# 2 AAPL 2017-07-31 -0.0654 0.00574
# 3 AAPL 2019-02-27 -0.0861 0.0310
# 4 AAPL 2018-09-06 0.0405 -0.0630
# 5 AAPL 2015-09-03 -0.121 -0.0246
# 6 AAPL 2018-11-20 -0.0283 -0.0254
# 7 AAPL 2015-07-03 -0.116 -0.0186
# 8 AAPL 2015-02-03 0.102 0.0409
# 9 AAPL 2017-03-16 -0.0157 0.0124
# 10 AAPL 2019-06-08 -0.00302 0.0532
我需要稍微修改一下您的数据,因为格式 Event.Date 从 Date
更改为 character
。
Date <- seq(from = as.Date("2014-01-01"),
to = as.Date("2019-12-31"),
by = 'day')
market_return <- c(rnorm(2191))
AAPL <- data.frame(
Company.name = "AAPL",
Date = Date,
market_return = market_return
)
MSFT <- data.frame(
Company.name = "MSFT",
Date = Date,
market_return = market_return
)
df <- rbind(AAPL, MSFT)
df$stock_return <- c(rnorm(4382))
df <- df[order(df$Date),]
df2 <- data.frame(
Company.name2 = c(replicate(450, "AAPL"), replicate(450, "MSFT")),
Event_date = sample(
seq(as.Date('2015/01/01'),
as.Date('2019/12/31'),
by="day"),
size = 900)
)
我正在尝试进行以下滚动回归:stock_return = α + β market_return.
我的数据集 "data" 如下所示:
Company Date stock_return market_return Alpha Beta
AAPL 01.01.2014 3% 4%
…
AAPL 31.12.2019 5% 1%
MSFT 01.01.2014 2% 4%
…
MSFT 31.12.2019 6% 1%
此外,我有一个数据框,其中包含应该执行回归的事件:
Company Date
AAPL 05.02.2015
…
MSFT 04.08.2018
我正在使用 runner 包进行滚动回归:
running_regression <- function(z) {
coef(lm(stock_return ~ market_return , data = as.data.frame(z)))
}
output <- runner(seq_along(data$market_return),
k = 180,
lag = 5,
at = ?
f = running_regression)
但是,我对我的函数和放入 "at" 的内容感到困惑,只是为了对我的第二个数据框中的事件进行回归。
请看下面的一个最小示例:
Date <- seq(from = as.Date("2014-01-01"), to = as.Date("2019-12-31"), by = 'day')
Date <- format(Date, format="%d.%m.%Y")
Company.name <- c(replicate(2191, ""))
AAPL <- data.frame(Company.name = "AAPL",Date)
market_return <- c(rnorm(2191))
AAPL <- cbind(AAPL, market_return)
MSFT <- c(replicate(2191, "MSFT"))
MSFT <- data.frame(Company.name = "MSFT",Date)
MSFT <- cbind(MSFT, market_return)
df <- rbind(AAPL, MSFT)
stock_return <- c(rnorm(4382))
df <- cbind(df,stock_return)
df[,"Alpha"] <- NA
df[,"Beta"] <- NA
Company.name2 <- c(replicate(450, "AAPL"),replicate(450, "MSFT"))
Event_date <- sample(seq(as.Date('2015/01/01'), as.Date('2019/12/31'), by="day"), 900)
Event_date <- format(Event_date, format="%d.%m.%Y")
df2 <- data.frame(Company.name2, Event_date)
#____
running_regression <- function(z) {
coef(lmList(stock_return ~ market_return | Company.name, data=z))
}
runner(df$market_return,
k = 180,
lag = 5,
f = running_regression)
希望我没猜错,
如果您只想 运行 df2 中特定日期的回归,一个快速的解决方案是使用相关名称合并它们:
head(merge(df2,df,by.x=c("Company.name2","Event_date"),by.y=c("Company.name","Date")))
Company.name2 Event_date market_return stock_return Alpha Beta
1 AAPL 01.01.2016 -0.4814525 -0.9864268 NA NA
2 AAPL 01.01.2019 -1.3093155 0.8357707 NA NA
3 AAPL 01.02.2017 1.5059626 -1.3181592 NA NA
4 AAPL 01.03.2015 -1.3226590 1.4786993 NA NA
5 AAPL 01.03.2016 2.1394338 -0.8900286 NA NA
6 AAPL 01.03.2019 0.6035526 0.5453212 NA NA
您可以看到日期乱七八糟,因此首先按公司和日期对 df2 进行排序是有意义的:
df2 = df2[order(df2$Company.name2,
as.Date(as.character(df2$Event_date),format="%d.%m.%Y")),]
df_merged = merge(df2,df,
by.x=c("Company.name2","Event_date"),
by.y=c("Company.name","Date"),sort=FALSE)
要进行滚动回归,您可以在此处使用 lmList,但它会变得有点复杂。下面我使用 roll 中的 roll_lm 来计算系数。不太了解你的滞后,也许你可以详细说明一下,但是你无法对 运行 进行回归:
library(roll)
result = by(df_merged,df_merged$Company.name2,function(z){
roll_lm(z$market_return,z$stock_return,180)})
tail(result$AAPL$coefficients)
(Intercept) x1
[445,] -0.07817682 0.10662762
[446,] -0.06440454 0.09257577
[447,] -0.07007445 0.09461642
[448,] -0.05917523 0.09582312
[449,] -0.05292590 0.10025369
[450,] -0.04930798 0.09911921
自 runner 版本 0.3.5
以来,您可以指定 x = df
并从 data.frame
对 windows 进行 运行 回归。因为你改变 df2
你必须通过 x = df[df$Company.name == Company.name2,]
对 df
的相关部分进行子集 - 你必须对 idx
.
running_regression_intercept <- function(x) {
coef(lm(stock_return ~ market_return, data = x))[1]
}
running_regression_slope <- function(x) {
coef(lm(stock_return ~ market_return, data = x))[2]
}
library(dplyr)
library(runner)
df2 %>%
group_by(Company.name2) %>%
mutate(
intercept = runner(
x = df[df$Company.name == Company.name2[1], ],
k = "180 days",
lag = "5 days",
idx = df$Date[df$Company.name == Company.name2[1]],
at = Event_date,
f = running_regression_intercept,
),
slope = runner(
x = df[df$Company.name == Company.name2[1], ],
k = "180 days",
lag = "5 days",
idx = df$Date[df$Company.name == Company.name2[1]],
at = Event_date,
f = running_regression_slope
)
)
# Company.name2 Event_date alpha beta
# <fct> <date> <dbl> <dbl>
# 1 AAPL 2017-01-12 0.0114 0.00488
# 2 AAPL 2017-07-31 -0.0654 0.00574
# 3 AAPL 2019-02-27 -0.0861 0.0310
# 4 AAPL 2018-09-06 0.0405 -0.0630
# 5 AAPL 2015-09-03 -0.121 -0.0246
# 6 AAPL 2018-11-20 -0.0283 -0.0254
# 7 AAPL 2015-07-03 -0.116 -0.0186
# 8 AAPL 2015-02-03 0.102 0.0409
# 9 AAPL 2017-03-16 -0.0157 0.0124
# 10 AAPL 2019-06-08 -0.00302 0.0532
我需要稍微修改一下您的数据,因为格式 Event.Date 从 Date
更改为 character
。
Date <- seq(from = as.Date("2014-01-01"),
to = as.Date("2019-12-31"),
by = 'day')
market_return <- c(rnorm(2191))
AAPL <- data.frame(
Company.name = "AAPL",
Date = Date,
market_return = market_return
)
MSFT <- data.frame(
Company.name = "MSFT",
Date = Date,
market_return = market_return
)
df <- rbind(AAPL, MSFT)
df$stock_return <- c(rnorm(4382))
df <- df[order(df$Date),]
df2 <- data.frame(
Company.name2 = c(replicate(450, "AAPL"), replicate(450, "MSFT")),
Event_date = sample(
seq(as.Date('2015/01/01'),
as.Date('2019/12/31'),
by="day"),
size = 900)
)