Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ###################################################
- ### The Available Data
- ###################################################
- library(DMwR)
- data(GSPC)
- ###################################################
- ### Handling time dependent data in R
- ###################################################
- library(xts)
- x1 <- xts(rnorm(100),seq(as.POSIXct("2000-01-01"),len=100,by="day"))
- x1[1:5]
- x2 <- xts(rnorm(100),seq(as.POSIXct("2000-01-01 13:00"),len=100,by="min"))
- x2[1:4]
- x3 <- xts(rnorm(3),as.Date(c('2005-01-01','2005-01-10','2005-01-12')))
- x3
- x1[as.POSIXct("2000-01-04")]
- x1["2000-01-05"]
- x1["20000105"]
- x1["2000-04"]
- x1["2000-03-27/"]
- x1["2000-02-26/2000-03-03"]
- x1["/20000103"]
- mts.vals <- matrix(round(rnorm(25),2),5,5)
- colnames(mts.vals) <- paste('ts',1:5,sep='')
- mts <- xts(mts.vals,as.POSIXct(c('2003-01-01','2003-01-04',
- '2003-01-05','2003-01-06','2003-02-16')))
- mts
- mts["2003-01",c("ts2","ts5")]
- index(mts)
- coredata(mts)
- ###################################################
- ### Reading the data from the CSV file
- ###################################################
- GSPC <- as.xts(read.zoo('sp500.csv',header=T))
- ###################################################
- ### Getting the data from the Web
- ###################################################
- library(tseries)
- GSPC <- as.xts(get.hist.quote("^GSPC",start="1970-01-02",
- quote=c("Open", "High", "Low", "Close","Volume","AdjClose")))
- head(GSPC)
- GSPC <- as.xts(get.hist.quote("^GSPC",
- start="1970-01-02",end='2009-09-15',
- quote=c("Open", "High", "Low", "Close","Volume","AdjClose")))
- library(quantmod)
- getSymbols('^GSPC')
- getSymbols('^GSPC',from='1970-01-01',to='2009-09-15')
- colnames(GSPC) <- c("Open", "High", "Low", "Close","Volume","AdjClose")
- setSymbolLookup(IBM=list(name='IBM',src='yahoo'),
- USDEUR=list(name='USD/EUR',src='oanda',
- from=as.Date('2009-01-01')))
- getSymbols(c('IBM','USDEUR'))
- head(IBM)
- head(USDEUR)
- ###################################################
- ### Reading the data from a MySQL database
- ###################################################
- library(RODBC)
- ch <- odbcConnect("QuotesDSN",uid="myusername",pwd="mypassword")
- allQuotes <- sqlFetch(ch,"gspc")
- GSPC <- xts(allQuotes[,-1],order.by=as.Date(allQuotes[,1]))
- head(GSPC)
- odbcClose(ch)
- library(DBI)
- library(RMySQL)
- drv <- dbDriver("MySQL")
- ch <- dbConnect(drv,dbname="Quotes","myusername","mypassword")
- allQuotes <- dbGetQuery(ch,"select * from gspc")
- GSPC <- xts(allQuotes[,-1],order.by=as.Date(allQuotes[,1]))
- head(GSPC)
- dbDisconnect(ch)
- dbUnloadDriver(drv)
- setSymbolLookup(GSPC=list(name='gspc',src='mysql',
- db.fields=c('Index','Open','High','Low','Close','Volume','AdjClose'),
- user='xpto',password='ypto',dbname='Quotes'))
- getSymbols('GSPC')
- ###################################################
- ### Defining the Prediction Tasks
- ###################################################
- T.ind <- function(quotes,tgt.margin=0.025,n.days=10) {
- v <- apply(HLC(quotes),1,mean)
- r <- matrix(NA,ncol=n.days,nrow=NROW(quotes))
- ## The following statment is wrong in the book (page 109)!
- for(x in 1:n.days) r[,x] <- Next(Delt(Cl(quotes),v,k=x),x)
- x <- apply(r,1,function(x) sum(x[x > tgt.margin | x < -tgt.margin]))
- if (is.xts(quotes)) xts(x,time(quotes)) else x
- }
- candleChart(last(GSPC,'3 months'),theme='white',TA=NULL)
- avgPrice <- function(p) apply(HLC(p),1,mean)
- addAvgPrice <- newTA(FUN=avgPrice,col=1,legend='AvgPrice')
- addT.ind <- newTA(FUN=T.ind,col='red',legend='tgtRet')
- addAvgPrice(on=1)
- addT.ind()
- myATR <- function(x) ATR(HLC(x))[,'atr']
- mySMI <- function(x) SMI(HLC(x))[,'SMI']
- myADX <- function(x) ADX(HLC(x))[,'ADX']
- myAroon <- function(x) aroon(x[,c('High','Low')])$oscillator
- myBB <- function(x) BBands(HLC(x))[,'pctB']
- myChaikinVol <- function(x) Delt(chaikinVolatility(x[,c("High","Low")]))[,1]
- myCLV <- function(x) EMA(CLV(HLC(x)))[,1]
- myEMV <- function(x) EMV(x[,c('High','Low')],x[,'Volume'])[,2]
- myMACD <- function(x) MACD(Cl(x))[,2]
- myMFI <- function(x) MFI(x[,c("High","Low","Close")], x[,"Volume"])
- mySAR <- function(x) SAR(x[,c('High','Close')]) [,1]
- myVolat <- function(x) volatility(OHLC(x),calc="garman")[,1]
- library(randomForest)
- data.model <- specifyModel(T.ind(GSPC) ~ Delt(Cl(GSPC),k=1:10) +
- myATR(GSPC) + mySMI(GSPC) + myADX(GSPC) + myAroon(GSPC) +
- myBB(GSPC) + myChaikinVol(GSPC) + myCLV(GSPC) +
- CMO(Cl(GSPC)) + EMA(Delt(Cl(GSPC))) + myEMV(GSPC) +
- myVolat(GSPC) + myMACD(GSPC) + myMFI(GSPC) + RSI(Cl(GSPC)) +
- mySAR(GSPC) + runMean(Cl(GSPC)) + runSD(Cl(GSPC)))
- set.seed(1234)
- rf <- buildModel(data.model,method='randomForest',
- training.per=c(start(GSPC),index(GSPC["1999-12-31"])),
- ntree=50, importance=T)
- ex.model <- specifyModel(T.ind(IBM) ~ Delt(Cl(IBM),k=1:3))
- data <- modelData(ex.model,data.window=c('2009-01-01','2009-08-10'))
- varImpPlot(rf@fitted.model,type=1)
- imp <- importance(rf@fitted.model,type=1)
- rownames(imp)[which(imp > 10)]
- data.model <- specifyModel(T.ind(GSPC) ~ Delt(Cl(GSPC),k=1) + myATR(GSPC) + myADX(GSPC) + myEMV(GSPC) + myVolat(GSPC) + myMACD(GSPC) + mySAR(GSPC) + runMean(Cl(GSPC)) )
- Tdata.train <- as.data.frame(modelData(data.model,
- data.window=c('1970-01-02','1999-12-31')))
- Tdata.eval <- na.omit(as.data.frame(modelData(data.model,
- data.window=c('2000-01-01','2009-09-15'))))
- Tform <- as.formula('T.ind.GSPC ~ .')
- ###################################################
- ### The Prediction Models
- ###################################################
- set.seed(1234)
- library(nnet)
- norm.data <- scale(Tdata.train)
- nn <- nnet(Tform,norm.data[1:1000,],size=10,decay=0.01,maxit=1000,linout=T,trace=F)
- norm.preds <- predict(nn,norm.data[1001:2000,])
- preds <- unscale(norm.preds,norm.data)
- sigs.nn <- trading.signals(preds,0.1,-0.1)
- true.sigs <- trading.signals(Tdata.train[1001:2000,'T.ind.GSPC'],0.1,-0.1)
- sigs.PR(sigs.nn,true.sigs)
- set.seed(1234)
- library(nnet)
- signals <- trading.signals(Tdata.train[,'T.ind.GSPC'],0.1,-0.1)
- norm.data <- data.frame(signals=signals,scale(Tdata.train[,-1]))
- nn <- nnet(signals ~ .,norm.data[1:1000,],size=10,decay=0.01,maxit=1000,trace=F)
- preds <- predict(nn,norm.data[1001:2000,],type='class')
- sigs.PR(preds,norm.data[1001:2000,1])
- library(e1071)
- sv <- svm(Tform,Tdata.train[1:1000,],gamma=0.001,cost=100)
- s.preds <- predict(sv,Tdata.train[1001:2000,])
- sigs.svm <- trading.signals(s.preds,0.1,-0.1)
- true.sigs <- trading.signals(Tdata.train[1001:2000,'T.ind.GSPC'],0.1,-0.1)
- sigs.PR(sigs.svm,true.sigs)
- library(kernlab)
- data <- cbind(signals=signals,Tdata.train[,-1])
- ksv <- ksvm(signals ~ .,data[1:1000,],C=10)
- ks.preds <- predict(ksv,data[1001:2000,])
- sigs.PR(ks.preds,data[1001:2000,1])
- library(earth)
- e <- earth(Tform,Tdata.train[1:1000,])
- e.preds <- predict(e,Tdata.train[1001:2000,])
- sigs.e <- trading.signals(e.preds,0.1,-0.1)
- true.sigs <- trading.signals(Tdata.train[1001:2000,'T.ind.GSPC'],0.1,-0.1)
- sigs.PR(sigs.e,true.sigs)
- ###################################################
- ### From Predictions into Actions
- ###################################################
- policy.1 <- function(signals,market,opened.pos,money,
- bet=0.2,hold.time=10,
- exp.prof=0.025, max.loss= 0.05
- )
- {
- d <- NROW(market) # this is the ID of today
- orders <- NULL
- nOs <- NROW(opened.pos)
- # nothing to do!
- if (!nOs && signals[d] == 'h') return(orders)
- # First lets check if we can open new positions
- # i) long positions
- if (signals[d] == 'b' && !nOs) {
- quant <- round(bet*money/market[d,'Close'],0)
- if (quant > 0)
- orders <- rbind(orders,
- data.frame(order=c(1,-1,-1),order.type=c(1,2,3),
- val = c(quant,
- market[d,'Close']*(1+exp.prof),
- market[d,'Close']*(1-max.loss)
- ),
- action = c('open','close','close'),
- posID = c(NA,NA,NA)
- )
- )
- # ii) short positions
- } else if (signals[d] == 's' && !nOs) {
- # this is the nr of stocks we already need to buy
- # because of currently opened short positions
- need2buy <- sum(opened.pos[opened.pos[,'pos.type']==-1,
- "N.stocks"])*market[d,'Close']
- quant <- round(bet*(money-need2buy)/market[d,'Close'],0)
- if (quant > 0)
- orders <- rbind(orders,
- data.frame(order=c(-1,1,1),order.type=c(1,2,3),
- val = c(quant,
- market[d,'Close']*(1-exp.prof),
- market[d,'Close']*(1+max.loss)
- ),
- action = c('open','close','close'),
- posID = c(NA,NA,NA)
- )
- )
- }
- # Now lets check if we need to close positions
- # because their holding time is over
- if (nOs)
- for(i in 1:nOs) {
- if (d - opened.pos[i,'Odate'] >= hold.time)
- orders <- rbind(orders,
- data.frame(order=-opened.pos[i,'pos.type'],
- order.type=1,
- val = NA,
- action = 'close',
- posID = rownames(opened.pos)[i]
- )
- )
- }
- orders
- }
- policy.2 <- function(signals,market,opened.pos,money,
- bet=0.2,exp.prof=0.025, max.loss= 0.05
- )
- {
- d <- NROW(market) # this is the ID of today
- orders <- NULL
- nOs <- NROW(opened.pos)
- # nothing to do!
- if (!nOs && signals[d] == 'h') return(orders)
- # First lets check if we can open new positions
- # i) long positions
- if (signals[d] == 'b') {
- quant <- round(bet*money/market[d,'Close'],0)
- if (quant > 0)
- orders <- rbind(orders,
- data.frame(order=c(1,-1,-1),order.type=c(1,2,3),
- val = c(quant,
- market[d,'Close']*(1+exp.prof),
- market[d,'Close']*(1-max.loss)
- ),
- action = c('open','close','close'),
- posID = c(NA,NA,NA)
- )
- )
- # ii) short positions
- } else if (signals[d] == 's') {
- # this is the money already committed to buy stocks
- # because of currently opened short positions
- need2buy <- sum(opened.pos[opened.pos[,'pos.type']==-1,
- "N.stocks"])*market[d,'Close']
- quant <- round(bet*(money-need2buy)/market[d,'Close'],0)
- if (quant > 0)
- orders <- rbind(orders,
- data.frame(order=c(-1,1,1),order.type=c(1,2,3),
- val = c(quant,
- market[d,'Close']*(1-exp.prof),
- market[d,'Close']*(1+max.loss)
- ),
- action = c('open','close','close'),
- posID = c(NA,NA,NA)
- )
- )
- }
- orders
- }
- # Train and test periods
- start <- 1
- len.tr <- 1000
- len.ts <- 500
- tr <- start:(start+len.tr-1)
- ts <- (start+len.tr):(start+len.tr+len.ts-1)
- # getting the quotes for the testing period
- data(GSPC)
- date <- rownames(Tdata.train[start+len.tr,])
- market <- GSPC[paste(date,'/',sep='')][1:len.ts]
- # learning the model and obtaining its signal predictions
- library(e1071)
- s <- svm(Tform,Tdata.train[tr,],cost=10,gamma=0.01)
- p <- predict(s,Tdata.train[ts,])
- sig <- trading.signals(p,0.1,-0.1)
- # now using the simulated trader
- t1 <- trading.simulator(market,sig,
- 'policy.1',list(exp.prof=0.05,bet=0.2,hold.time=30))
- t1
- summary(t1)
- tradingEvaluation(t1)
- plot(t1,market,theme='white',name='SP500')
- t2 <- trading.simulator(market,sig,'policy.2',list(exp.prof=0.05,bet=0.3))
- summary(t2)
- tradingEvaluation(t2)
- start <- 2000
- len.tr <- 1000
- len.ts <- 500
- tr <- start:(start+len.tr-1)
- ts <- (start+len.tr):(start+len.tr+len.ts-1)
- s <- svm(Tform,Tdata.train[tr,],cost=10,gamma=0.01)
- p <- predict(s,Tdata.train[ts,])
- sig <- trading.signals(p,0.1,-0.1)
- t2 <- trading.simulator(market,sig,'policy.2',list(exp.prof=0.05,bet=0.3))
- summary(t2)
- tradingEvaluation(t2)
- ###################################################
- ### Model Evaluation and Selection
- ###################################################
- MC.svmR <- function(form,train,test,b.t=0.1,s.t=-0.1,...) {
- require(e1071)
- t <- svm(form,train,...)
- p <- predict(t,test)
- trading.signals(p,b.t,s.t)
- }
- MC.svmC <- function(form,train,test,b.t=0.1,s.t=-0.1,...) {
- require(e1071)
- tgtName <- all.vars(form)[1]
- train[,tgtName] <- trading.signals(train[,tgtName],b.t,s.t)
- t <- svm(form,train,...)
- p <- predict(t,test)
- factor(p,levels=c('s','h','b'))
- }
- MC.nnetR <- function(form,train,test,b.t=0.1,s.t=-0.1,...) {
- require(nnet)
- t <- nnet(form,train,...)
- p <- predict(t,test)
- trading.signals(p,b.t,s.t)
- }
- MC.nnetC <- function(form,train,test,b.t=0.1,s.t=-0.1,...) {
- require(nnet)
- tgtName <- all.vars(form)[1]
- train[,tgtName] <- trading.signals(train[,tgtName],b.t,s.t)
- t <- nnet(form,train,...)
- p <- predict(t,test,type='class')
- factor(p,levels=c('s','h','b'))
- }
- MC.earth <- function(form,train,test,b.t=0.1,s.t=-0.1,...) {
- require(earth)
- t <- earth(form,train,...)
- p <- predict(t,test)
- trading.signals(p,b.t,s.t)
- }
- singleModel <- function(form,train,test,learner,policy.func,...) {
- p <- do.call(paste('MC',learner,sep='.'),list(form,train,test,...))
- eval.stats(form,train,test,p,policy.func=policy.func)
- }
- slide <- function(form,train,test,learner,relearn.step,policy.func,...) {
- real.learner <- learner(paste('MC',learner,sep='.'),pars=list(...))
- p <- slidingWindowTest(real.learner,form,train,test,relearn.step)
- p <- factor(p,levels=1:3,labels=c('s','h','b'))
- eval.stats(form,train,test,p,policy.func=policy.func)
- }
- grow <- function(form,train,test,learner,relearn.step,policy.func,...) {
- real.learner <- learner(paste('MC',learner,sep='.'),pars=list(...))
- p <- growingWindowTest(real.learner,form,train,test,relearn.step)
- p <- factor(p,levels=1:3,labels=c('s','h','b'))
- eval.stats(form,train,test,p,policy.func=policy.func)
- }
- eval.stats <- function(form,train,test,preds,b.t=0.1,s.t=-0.1,...) {
- # Signals evaluation
- tgtName <- all.vars(form)[1]
- test[,tgtName] <- trading.signals(test[,tgtName],b.t,s.t)
- st <- sigs.PR(preds,test[,tgtName])
- dim(st) <- NULL
- names(st) <- paste(rep(c('prec','rec'),each=3),
- c('s','b','sb'),sep='.')
- # Trading evaluation
- date <- rownames(test)[1]
- market <- GSPC[paste(date,"/",sep='')][1:length(preds),]
- trade.res <- trading.simulator(market,preds,...)
- c(st,tradingEvaluation(trade.res))
- }
- pol1 <- function(signals,market,op,money)
- policy.1(signals,market,op,money,
- bet=0.2,exp.prof=0.025,max.loss=0.05,hold.time=10)
- pol2 <- function(signals,market,op,money)
- policy.1(signals,market,op,money,
- bet=0.2,exp.prof=0.05,max.loss=0.05,hold.time=20)
- pol3 <- function(signals,market,op,money)
- policy.2(signals,market,op,money,
- bet=0.5,exp.prof=0.05,max.loss=0.05)
- # The list of learners we will use
- TODO <- c('svmR','svmC','earth','nnetR','nnetC')
- # The data sets used in the comparison
- DSs <- list(dataset(Tform,Tdata.train,'SP500'))
- # Monte Carlo (MC) settings used
- MCsetts <- mcSettings(20, # 20 repetitions of the MC exps
- 2540, # ~ 10 years for training
- 1270, # ~ 5 years for testing
- 1234) # random number generator seed
- # Variants to try for all learners
- VARS <- list()
- VARS$svmR <- list(cost=c(10,150),gamma=c(0.01,0.001),
- policy.func=c('pol1','pol2','pol3'))
- VARS$svmC <- list(cost=c(10,150),gamma=c(0.01,0.001),
- policy.func=c('pol1','pol2','pol3'))
- VARS$earth <- list(nk=c(10,17),degree=c(1,2),thresh=c(0.01,0.001),
- policy.func=c('pol1','pol2','pol3'))
- VARS$nnetR <- list(linout=T,maxit=750,size=c(5,10),
- decay=c(0.001,0.01),
- policy.func=c('pol1','pol2','pol3'))
- VARS$nnetC <- list(maxit=750,size=c(5,10),decay=c(0.001,0.01),
- policy.func=c('pol1','pol2','pol3'))
- # main loop
- for(td in TODO) {
- assign(td,
- experimentalComparison(
- DSs,
- c(
- do.call('variants',
- c(list('singleModel',learner=td),VARS[[td]],
- varsRootName=paste('single',td,sep='.'))),
- do.call('variants',
- c(list('slide',learner=td,
- relearn.step=c(60,120)),
- VARS[[td]],
- varsRootName=paste('slide',td,sep='.'))),
- do.call('variants',
- c(list('grow',learner=td,
- relearn.step=c(60,120)),
- VARS[[td]],
- varsRootName=paste('grow',td,sep='.')))
- ),
- MCsetts)
- )
- # save the results
- save(list=td,file=paste(td,'Rdata',sep='.'))
- }
- load('svmR.Rdata')
- load('svmC.Rdata')
- load('earth.Rdata')
- load('nnetR.Rdata')
- load('nnetC.Rdata')
- tgtStats <- c('prec.sb','Ret','PercProf',
- 'MaxDD','SharpeRatio')
- allSysRes <- join(subset(svmR,stats=tgtStats),
- subset(svmC,stats=tgtStats),
- subset(nnetR,stats=tgtStats),
- subset(nnetC,stats=tgtStats),
- subset(earth,stats=tgtStats),
- by = 'variants')
- rankSystems(allSysRes,5,maxs=c(T,T,T,F,T))
- summary(subset(svmC,
- stats=c('Ret','RetOverBH','PercProf','NTrades'),
- vars=c('slide.svmC.v5','slide.svmC.v6')))
- fullResults <- join(svmR,svmC,earth,nnetC,nnetR,by='variants')
- nt <- statScores(fullResults,'NTrades')[[1]]
- rt <- statScores(fullResults,'Ret')[[1]]
- pp <- statScores(fullResults,'PercProf')[[1]]
- s1 <- names(nt)[which(nt > 20)]
- s2 <- names(rt)[which(rt > 0.5)]
- s3 <- names(pp)[which(pp > 40)]
- namesBest <- intersect(intersect(s1,s2),s3)
- compAnalysis(subset(fullResults,
- stats=tgtStats,
- vars=namesBest))
- plot(subset(fullResults,
- stats=c('Ret','PercProf','MaxDD'),
- vars=namesBest))
- getVariant('single.nnetR.v12',nnetR)
- ###################################################
- ### The Trading System
- ###################################################
- data <- tail(Tdata.train,2540)
- results <- list()
- for(name in namesBest) {
- sys <- getVariant(name,fullResults)
- results[[name]] <- runLearner(sys,Tform,data,Tdata.eval)
- }
- results <- t(as.data.frame(results))
- results[,c('Ret','RetOverBH','MaxDD','SharpeRatio','NTrades','PercProf')]
- getVariant('grow.nnetR.v12',fullResults)
- model <- learner('MC.nnetR',list(maxit=750,linout=T,trace=F,size=10,decay=0.001))
- preds <- growingWindowTest(model,Tform,data,Tdata.eval,relearn.step=120)
- signals <- factor(preds,levels=1:3,labels=c('s','h','b'))
- date <- rownames(Tdata.eval)[1]
- market <- GSPC[paste(date,"/",sep='')][1:length(signals),]
- trade.res <- trading.simulator(market,signals,policy.func='pol2')
- plot(trade.res,market,theme='white',name='SP500 - final test')
- library(PerformanceAnalytics)
- rets <- Return.calculate(trade.res@trading$Equity)
- chart.CumReturns(rets,main='Cumulative returns of the strategy',ylab='returns')
- yearlyReturn(trade.res@trading$Equity)
- plot(100*yearlyReturn(trade.res@trading$Equity),
- main='Yearly percentage returns of the trading system')
- abline(h=0,lty=2)
- table.CalendarReturns(rets)
- table.DownsideRisk(rets)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement