Better late than never, as promised, the R code for the SVM system discussed in a previous post.
For the record this code is based on the random forest system created by Max Dama. I thought that it would make it easier for common reader to compare and evaluate. I also want to state that this isn’t anywhere close to optimal programming, I did that I long time ago and I was only starting with R at the time.
Here is the system :
SVMClassifModel = function(data, targets, returns, lookback = 252, ktype = "C-svc", crossvalid = 10, C = 10) {
# Construct a predictive model using support vector machine
# Input data must be lagged one period to avoid look-ahead bias
# Print predictions and confidence, accuracy, equity curves plot, and performance statistics v.s. benchmark
# Libraries
require(kernlab)
require(quantmod)
# Make sure targets is a factor (for classification)
targets = as.factor(targets)
data$targets = as.factor(data$targets)
# Generate indexes for backtest
idx = data.frame(targets = lookback:(nrow(data)-1))
# Isolate index to be used later
inx = index(returns[idx$targets])
# Prediction function to be used for backtesting
pred1pd = function(t) {
# Train model
model = trainSVM(data[(t-lookback):t, ], ktype, C, crossvalid)
# Prediction
pred = predict(model, data[t+1, -1], type="prob")
# Print for user inspection
print(pred)
}
# backtest by looping over the calendar previously generated
preds = sapply(idx$targets, pred1pd)
# print output
print(preds)
print(max.col(preds))
preds = data.frame(t(rbind(mle = max.col(t(preds)), preds)))
print(preds)
print(summaryStats((returns[idx$targets] * (preds$mle*2-3)), returns[idx$targets], comp = TRUE))
#Equity curves
equity = xts(cumprod((returns[idx$targets] * (preds$mle*2-3))+1), inx)
Benchmark = xts(cumprod(returns[idx$targets] + 1), inx)
# y axis values range
yrngMin = abs(min(equity, Benchmark))
yrngMax = abs(max(equity, Benchmark))
# Plot curves
chartSeries(equity, log.scale = TRUE, name='Equity Curves', yrange=c(yrngMin, yrngMax))
addTA(Benchmark, on=1, col='gold')
}
trainSVM = function(data, ktype, C, crossvalid) {
# Return a trained svm model
trainedmodel = ksvm(targets ~ ., data = data, type = ktype, kernel="rbfdot", kpar=list(sigma=0.05), C = C, prob.model = TRUE, cross = crossvalid)
}
featureGen = function(sym, returns) {
# Return a data frame to be used as input by the SVM system
# Targets vector
targets = coredata(returns)
targets[targets>=0] = 1
targets[targets<0] = -1
targets = as.factor(targets)
#RSIs
rsi2 = RSI(Cl(sym), 2 )
rsi3 = RSI(Cl(sym), 3 )
rsi4 = RSI(Cl(sym), 4 )
rsi5 = RSI(Cl(sym), 5 )
rsi6 = RSI(Cl(sym), 6 )
rsi7 = RSI(Cl(sym), 7 )
rsi8 = RSI(Cl(sym), 8 )
rsi9 = RSI(Cl(sym), 9 )
rsi10 = RSI(Cl(sym), 10 )
rsi11 = RSI(Cl(sym), 11 )
rsi12 = RSI(Cl(sym), 12 )
rsi13 = RSI(Cl(sym), 13 )
rsi14 = RSI(Cl(sym), 14 )
rsi15 = RSI(Cl(sym), 15 )
rsi16 = RSI(Cl(sym), 16 )
rsi17 = RSI(Cl(sym), 17 )
rsi18 = RSI(Cl(sym), 18 )
rsi19 = RSI(Cl(sym), 19 )
rsi20 = RSI(Cl(sym), 20 )
rsi21 = RSI(Cl(sym), 21 )
rsi22 = RSI(Cl(sym), 22 )
rsi23 = RSI(Cl(sym), 23 )
rsi24 = RSI(Cl(sym), 24 )
rsi25 = RSI(Cl(sym), 25 )
rsi26 = RSI(Cl(sym), 26 )
rsi27 = RSI(Cl(sym), 27 )
rsi28 = RSI(Cl(sym), 28 )
rsi29 = RSI(Cl(sym), 29 )
rsi30 = RSI(Cl(sym), 30 )
# lagged RSIs to correspond RSI with target period
rsi2 = Lag(rsi2, 1)
rsi3 = Lag(rsi3, 1)
rsi4 = Lag(rsi4, 1)
rsi5 = Lag(rsi5, 1)
rsi6 = Lag(rsi6, 1)
rsi7 = Lag(rsi7, 1)
rsi8 = Lag(rsi8, 1)
rsi9 = Lag(rsi9, 1)
rsi10 = Lag(rsi10, 1)
rsi11 = Lag(rsi11, 1)
rsi12 = Lag(rsi12, 1)
rsi13 = Lag(rsi13, 1)
rsi14 = Lag(rsi14, 1)
rsi15 = Lag(rsi15, 1)
rsi16 = Lag(rsi16, 1)
rsi17 = Lag(rsi17, 1)
rsi18 = Lag(rsi18, 1)
rsi19 = Lag(rsi19, 1)
rsi20 = Lag(rsi20, 1)
rsi21 = Lag(rsi21, 1)
rsi22 = Lag(rsi22, 1)
rsi23 = Lag(rsi23, 1)
rsi24 = Lag(rsi24, 1)
rsi25 = Lag(rsi25, 1)
rsi26 = Lag(rsi26, 1)
rsi27 = Lag(rsi27, 1)
rsi28 = Lag(rsi28, 1)
rsi29 = Lag(rsi29, 1)
rsi30 = Lag(rsi30, 1)
# Data frame
data = data.frame(targets, rsi2, rsi3, rsi4, rsi5, rsi6, rsi7, rsi8, rsi9, rsi10, rsi11, rsi12, rsi13, rsi14, rsi15, rsi16, rsi17, rsi18, rsi19, rsi20, rsi21, rsi22, rsi23, rsi24, rsi25, rsi26, rsi27, rsi28, rsi29, rsi30)
# names(data) = c("targets", "data")
# Results
return(data)
}
summaryStats = function(x, bmk, comp = FALSE) {
#Required library
require(PerformanceAnalytics)
#Compute stats of interest for strategy
cumRetx = Return.cumulative(x)
annRetx = Return.annualized(x, scale=252)
sharpex = SharpeRatio.annualized(x, scale=252)
winpctx = length(x[x > 0])/length(x[x != 0])
annSDx = sd.annualized(x, scale=252)
maxDDx = maxDrawdown(x)
avDDx = mean(Drawdowns(x))
if(comp == TRUE) {
#Compute stats of interest for benchmark
cumRetbmk = Return.cumulative(bmk)
annRetbmk = Return.annualized(bmk, scale=252)
sharpebmk = SharpeRatio.annualized(bmk, scale=252)
winpctbmk = length(bmk[bmk > 0])/length(bmk)
annSDbmk = sd.annualized(bmk, scale=252)
maxDDbmk = maxDrawdown(bmk)
avDDbmk = mean(Drawdowns(bmk))
#Return result vectors
Benchmark = c(cumRetbmk, annRetbmk, sharpebmk, winpctbmk, annSDbmk, maxDDbmk, avDDbmk)
Strategy = c(cumRetx, annRetx, sharpex, winpctx, annSDx, maxDDx, avDDx)
nms = c("Cumulative Return", "Annualized Return", "Annualized Sharpe Ratio", "Winning Percentage", "Annualized Volatility", "Maximum Drawdown", "Average Drawdown")
result = data.frame(Strategy, Benchmark, row.names = nms)
} else {
#Return result vectors
nms = c("Cumulative Return", "Annualized Return", "Annualized Sharpe Ratio", "Winning Percentage", "Annualized Volatility", "Maximum Drawdown", "Average Drawdown")
Strategy = c(cumRetx, annRetx, sharpex, winpctx, annSDx, maxDDx, avDDx)
result = data.frame(Strategy, row.names = nms)
}
return(result)
}
Here is the harness used to use the system. Don’t forget to change the first two line of the code and replace with your directory.
For example:
setwd(“C:\Users\John Doe\Documents”)
source(“SVM System”)
setwd("INPUT DIRECTORY")
source("NAME OF THE RSI SYSTEM FILE IN THE FOLDER")
require(quantmod)
require(PerformanceAnalytics)
# Load data with quantmod
getSymbols('SPY', from='2000-06-01')
returns = dailyReturn(Cl(SPY), type='log')
# Generate data frame of data and targets
data = featureGen(SPY, returns)
targets = coredata(returns)
targets[targets>=0] = 1
targets[targets<0] = -1
targets = as.factor(targets)
# Run the system
SVMClassifModel(data[30:nrow(data),], targets[30:length(targets)], returns, lookback = 252, ktype = "C-svc", crossvalid = 10, C = 60)
Lastly I would like to know if anyone has a better idea to share code. This is not very good way and I would like to improve it. I also welcome suggestions to make the code more efficient. I also want to make clear that I do not think that this is a good system and I know that it could be improved by adding predictors and all, it is only to give an example to follow-up on the post mentioned above.
QF