(非常)使用逻辑回归的简单 quantstrat 交易模型

(very) Simple quantstrat trading model using logistic regression

我一直在研究 R 中的 quantstrat 回测包,我想就一个特别(糟糕)的策略获得一些建议。

我的想法是只要逻辑回归模型告诉我市场将上涨(在 prediction 列中表示为 1)就买入。每天逻辑回归告诉我市场将上涨,我购买 .orderqtf = 10 股 Google。逻辑回归告诉我价格将下跌的那一天(在 prediction 列中用 0 表示)然后我们将所有当前股票抛售到 Google 并重新开始,直到它告诉我们买.

问题:

关于我所描述的内容,我的代码是否正确?

如果您注意到我滞后了两个输入变量。即 momentum(lag(GOOG$close), n = 12)

也就是我想用t-1天来预测t天。这也是正确的吗?我不想使用任何可能为预测提供偏差结果的指标

quantstrat 包对我来说似乎有点学习曲线,所以我只想确保我得到基本的正确。

型号:

rm(list=ls())
require(quantstrat)
require(PerformanceAnalytics)

set.seed(1234)

#setting up some initial parameters for the quantstrat trading model
initDate="2007-01-01"
from <- "2017-01-01"
to <- "2018-12-01"
init_equity <- 1000
adjustment <- TRUE

.orderqty <- 10
.txnfees <- -10

currency('USD')
Sys.setenv(TZ="UTC")

#Collect the data
symbols <- c('GOOG')
getSymbols(symbols, from=from, to=to, src="yahoo", adjust=TRUE)  

colnames(GOOG) <- c("open", "high", "low", "close", "volume", "adjusted")

# create the dependent variable for a logistic regression
GOOG$direction <- with(GOOG, ifelse(close >= open, 1, 0))

#create two basic input variables - lagged
GOOG$rsi <- RSI(lag(GOOG$close), nFast=14, nSlow = 26, nSig = 9, maType = SMA)
GOOG$momentum <- momentum(lag(GOOG$close), n = 12)

GOOG <- GOOG[complete.cases(GOOG), ] 

# create a training and test set
train_date <- nrow(GOOG) *0.8
train <- GOOG[1:train_date,]
test <- GOOG[-c(1:train_date),]

#Run a simple logistic regression and obtain predicted probabilities
lm.fit <- glm(direction ~ rsi + momentum, data = train, family = binomial)
summary(lm.fit)
pr.lm <- predict(lm.fit, test, type = "response")


# Extract the OHLC from the GOOG stock and match it with the test dates
TEST <- subset(GOOG, index(GOOG) %in% index(test))

#Add out predictions to the TEST data if its greater than 0.6
TEST$prediction <- ifelse(pr.lm > 0.6, 1, 0)

paste0("Accuracy", mean(TEST$direction == TEST$prediction))

# Now that we have a strategy we want to buy everytime the logistic model states that
# the direction would be a "1"

# Setting up the strategy
GOOG <- TEST
stock("GOOG", currency="USD", multiplier=1)
strategy.st <- portfolio.st <- account.st <- "LogisticRegressionStrategy"
rm.strat(strategy.st)
rm.strat(portfolio.st)
rm.strat(account.st)

initPortf(name = portfolio.st,
          symbols = symbols, 
          initDate = initDate, 
          currency = 'USD')

initAcct(name = account.st, 
         portfolios = portfolio.st, 
         initDate = initDate, 
         currency = 'USD',
         initEq = init_equity)

initOrders(portfolio.st,
           symbols = symbols,
           initDate = initDate)

strategy(strategy.st, store = TRUE)


# Adding the rules, enter at the low price when "prediction" = 1, taking transaction fees into account
add.rule(strategy = strategy.st,
         name = "ruleSignal",
         arguments = list(sigcol = "prediction",
                          sigval = 1,
                          orderqty = .orderqty,
                          ordertype = "market",
                          #orderside = "long", 
                          prefer = "Low", 
                          TxnFees = .txnfees, 
                          replace = FALSE),
         type = "enter",
         label = "EnterLONG")

# As soon as the Logistic regression predicts a "0" we dump all our shares in GOOG

add.rule(strategy.st, 
         name = "ruleSignal", 
         arguments = list(sigcol = "prediction", 
                          sigval = 0, 
                          #orderside = "short", 
                          ordertype = "market", 
                          orderqty = "all", 
                          TxnFees = .txnfees, 
                          replace = TRUE), 
         type = "exit", 
         label = "Exit2SHORT")


applyStrategy(strategy.st, portfolios = portfolio.st)

updatePortf(portfolio.st)
updateAcct(account.st)
updateEndEq(account.st)

chart.Posn(portfolio.st, Symbol = "GOOG", 
           TA="add_SMA(n = 10, col = 2); add_SMA(n = 30, col = 4)")

看起来你快到了。您将模型拟合到训练数据上,并确保在测试集上进行回测,这是正确的做法。

有些事情你要注意:不要在 add.rule 中设置 prefer = low 用于输入信号;您永远不会事先知道实际交易中的低点在哪里,以便在下一个柱上填充。

我在这里让逻辑回归预测比当前柱提前 1 个柱,因为如果您实时进行这些预测,这就是您要做的 "online"/。这没关系,只要我们只使用预测概率,并且显然永远不要使用 direction_fwd 作为交易信号,因为它会引入 前瞻性偏差 .

为了让重新运行代码更容易,我还将市场数据存储在一个环境.data中,这样你就可以在GOOG中为applyStrategy重新生成数据而不需要请求数据如果重新运行部分代码,再次来自雅虎。

您可能还想限制入场次数。您可以使用 addPositionLimit 执行此操作。您可能不想在概率 > 0.6 时购买每根柱线,而只是第一次购买(交叉),因此我引入了 signal 代码来处理此问题。

请记住,默认情况下,在 quantstrat 中,订单会在数据的 下一个 柱上成交(这里是下一个柱上的开盘价,因为 prefer = "Open"),默认情况下使填充更真实(这更适用于日内柱数据或报价数据行),但我认为这就是您想要的,因为您不知道当前柱的 RSI 和动量值然后是当前柱的 end,因此在下一个开盘柱上填充很有意义。

rm(list=ls())
require(quantstrat)
require(PerformanceAnalytics)

set.seed(1234)

#setting up some initial parameters for the quantstrat trading model
initDate="2007-01-01"
from <- "2017-01-01"
to <- "2018-12-01"
init_equity <- 1000
adjustment <- TRUE

.orderqty <- 10
.txnfees <- -10

currency('USD')
Sys.setenv(TZ="UTC")

#Collect the data
symbols <- c('GOOG')
.data <- new.env()
getSymbols(symbols, from=from, to=to, src="yahoo", adjust=TRUE, env = .data)  

colnames(.data$GOOG) <- c("open", "high", "low", "close", "volume", "adjusted")

mdata <- .data$GOOG

# create the dependent variable for a logistic regression
mdata$direction <- with(mdata, ifelse(close >= open, 1, 0))

#create two basic input variables - lagged
mdata$rsi <- RSI(mdata$close, nFast=14, nSlow = 26, nSig = 9, maType = SMA)
mdata$momentum <- momentum(mdata$close, n = 12)

mdata <- mdata[complete.cases(mdata), ] 
mdata$direction_fwd <- lag.xts(mdata$direction, k = -1)
# create a training and test set
train_date <- nrow(mdata) *0.8
train <- mdata[1:train_date,]
test <- mdata[-c(1:train_date),]



#Run a simple logistic regression and obtain predicted probabilities
lm.fit <- glm(direction_fwd ~ rsi + momentum, data = train, family = binomial)
summary(lm.fit)
pr.lm <- predict(lm.fit, test, type = "response")
test$pred_prob <- pr.lm

#Add out predictions to the TEST data if its greater than 0.6
test$prediction <- ifelse(pr.lm > 0.6, 1, 0)

paste0("Accuracy: ", mean(test$direction_fwd == test$prediction, na.rm = T))


# Simple way to run applyStrategy is to make sure the data for the symbol is in a variable with its name, like so:
GOOG <- test


stock("GOOG", currency="USD", multiplier=1)
strategy.st <- portfolio.st <- account.st <- "LogisticRegressionStrategy"
rm.strat(strategy.st)
rm.strat(portfolio.st)
rm.strat(account.st)



initPortf(name = portfolio.st,
          symbols = symbols, 
          initDate = initDate, 
          currency = 'USD')

initAcct(name = account.st, 
         portfolios = portfolio.st, 
         initDate = initDate, 
         currency = 'USD',
         initEq = init_equity)

initOrders(portfolio.st,
           symbols = symbols,
           initDate = initDate)

strategy(strategy.st, store = TRUE)

nMult_orderqty <- 2
addPosLimit(portfolio.st, symbol = "GOOG", timestamp = initDate, maxpos = nMult_orderqty * .orderqty)

# Buy when prob exceeds 0.6 for the first time, using cross= TRUE
add.signal(strategy = strategy.st,
         name = "sigThreshold",
         arguments = list(threshold=0.6, column="pred_prob", relationship="gt", cross= TRUE),
         label = "longSig")

 #exit when prob drops below 0.5 for the first time
add.signal(strategy = strategy.st,
           name = "sigThreshold",
           arguments = list(threshold=0.5, column="pred_prob", relationship="lt", cross= TRUE),
           label = "exitLongSig")

# Adding the rules, enter at the low price when "prediction" = 1, taking transaction fees into account
add.rule(strategy = strategy.st,
         name = "ruleSignal",
         arguments = list(sigcol = "longSig",
                          sigval = 1,
                          orderqty = .orderqty,
                          ordertype = "market",
                          orderside = "long",
                          osFUN = osMaxPos,
                          prefer = "Open",  #Never kknow the low in advance. Use the open, as it is for the next day (be aware that the open price for bar data has its own problems too)
                          TxnFees = .txnfees, 
                          replace = FALSE),
         type = "enter",
         label = "EnterLONG")

# As soon as the Logistic regression predicts a "0" we dump all our shares in GOOG

add.rule(strategy.st, 
         name = "ruleSignal", 
         arguments = list(sigcol = "exitLongSig", 
                          sigval = 1, 
                          ordertype = "market", 
                          orderside = "long",
                          orderqty = "all", 
                          TxnFees = .txnfees, 
                          replace = TRUE), 
         type = "exit", 
         label = "Exit2SHORT")


applyStrategy(strategy.st, portfolios = portfolio.st)

updatePortf(portfolio.st)
updateAcct(account.st)
updateEndEq(account.st)

chart.Posn(portfolio.st, Symbol = "GOOG", 
           TA="add_SMA(n = 10, col = 2); add_SMA(n = 30, col = 4)")