Ihsanul

Python SVR program

Feb 4th, 2020
130
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on Wed Jan 15 23:52:05 2020
  4.  
  5. @author: M. Ihsanul Qamil
  6. """
  7.  
  8. import csv
  9. import pandas as pd
  10. import numpy as np
  11. #import math
  12. import matplotlib.pyplot as plt
  13. import random as Rand
  14. from pandas import DataFrame
  15. from sklearn.model_selection import train_test_split
  16. import pdb
  17. import time
  18. nstart=time.process_time()
  19. #   pdb.set_trace()
  20. # import IPython as IP
  21.  
  22. data = pd.read_csv("TLKM.csv")
  23.  
  24. def Distancetrain(d3, d2, d1):
  25.     d=len(d3.index)
  26.     harray=[]
  27.     for i in range(d):
  28.         harray.clear()
  29.         for j in range(d):
  30.             harray.append(((d3.iloc[i]-d3.iloc[j])**2) + ((d2.iloc[i]-d2.iloc[j])**2) + ((d1.iloc[i]-d1.iloc[j])**2))
  31.         if i < 1:
  32.             distancedata=pd.DataFrame(harray)
  33.         else:
  34.             distancedata[i]=harray
  35.     print("distance train")
  36.     print(time.process_time()-nstart)
  37.     return distancedata
  38.  
  39. def Distancetest(d3train, d2train, d1train, d3test, d2test, d1test):
  40.     dtrain=len(d3train.index)
  41.     dtest=len(d3test.index)
  42.     harray=[]
  43.     for i in range(dtrain):
  44.         harray.clear()
  45.         for j in range(dtest):
  46.             harray.append(((d3test.iloc[j]-d3train.iloc[i])**2) + ((d2test.iloc[j]-d2train.iloc[i])**2) + ((d1test.iloc[j]-d1train.iloc[i])**2))
  47.         if i < 1:
  48.             distancedata=pd.DataFrame(harray)
  49.         else:
  50.             distancedata[i]=harray
  51.     print("distance test")
  52.     print(time.process_time()-nstart)
  53.     return distancedata
  54.  
  55.  
  56. def Hessian(dfdistance, sigma, lamda):
  57.     d=len(dfdistance.index)
  58.     col=len(dfdistance.columns)
  59.     hes = np.array([], dtype=np.float64).reshape(0,col)
  60.     tampung = [[0] * col]
  61.     sig2= 2*(sigma**2)
  62.     lam2=lamda**2
  63.     for i in range(d):
  64.         for j in range(col):
  65.             tampung[0][j]=np.exp(-1*((dfdistance.iloc[i][j])/(sig2))) + (lam2)
  66.         hes=np.vstack([hes, tampung])
  67.     dfhessian=pd.DataFrame(hes)
  68.     print("hessian")
  69.     print(time.process_time()-nstart)
  70.     return dfhessian
  71.  
  72. def Seqlearn(y, dfhessian, gamma, eps, c, itermaxsvr):
  73.     d=len(dfhessian.index)
  74.     a = [[0] * d]
  75.     a_s = [[0] * d]
  76.     la = [[0] * d]
  77.     la_s = [[0] * d]
  78.     E = np.array([], dtype=np.float64).reshape(0,d)
  79.     Etemp = [[0] * d]
  80.     da_s = np.array([], dtype=np.float64).reshape(0,d)
  81.     da = np.array([], dtype=np.float64).reshape(0,d)
  82.     dat_s = [[0] * d]
  83.     dat = [[0] * d]
  84.     tempas = [[0] * d]
  85.     tempa = [[0] * d]
  86.     for i in range(itermaxsvr):
  87.         for j in range(d):
  88.             Rijhelp=0
  89.             for k in range(d):
  90.                 Rijhelp = Rijhelp + ((a_s[i][k] - a[i][k])*(dfhessian.iloc[j][k]))
  91.             Etemp[0][j]= y.iloc[j] - Rijhelp
  92.         E=np.vstack([E, Etemp])
  93.         for l in range(d):
  94.             dat_s[0][l]=min(max(gamma*(E[i][l] - eps), -1*(a_s[i][l])), (c - a_s[i][l]))
  95.             dat[0][l]=min(max(gamma*(-(E[i][l]) - eps), -1*(a[i][l])), (c - a[i][l]))
  96.             tempas[0][l]= a_s[i][l] + dat_s[0][l]
  97.             tempa[0][l]= a[i][l] + dat[0][l]
  98.         da_s=np.vstack([da_s, dat_s])
  99.         da=np.vstack([da, dat])
  100.  
  101.         a=np.vstack([a, tempa])
  102.         a_s=np.vstack([a_s, tempas])
  103.         la=tempa
  104.         la_s=tempas
  105. #       (|da|<eps and |das|<eps ) or max iterasi
  106.         dat_abs=max([abs(xdat) for xdat in dat[0]])
  107.         dat_s_abs=max([abs(xdats) for xdats in dat_s[0]])
  108.         print(dat_abs)
  109.         print(dat_s_abs)
  110.         if (dat_abs < eps) and (dat_s_abs < eps):
  111.             print(time.process_time()-nstart)
  112.             break
  113.     print(time.process_time()-nstart)
  114.     return la, la_s
  115.  
  116.  
  117. def Predictf(a, a_s, dfhessian):
  118. #   predict = sum ((a_s[0][k]-a[0][k]) * hessian[j][k])
  119.     row=len(dfhessian.index)
  120.     col=len(dfhessian.columns)
  121.     for j in range(row):
  122.         datax=0
  123.         for k in range(col):
  124.             datax= datax + ((a_s[0][k] - a[0][k])*(dfhessian.iloc[j][k]))
  125.         if (j == 0):
  126.             dataxm=datax
  127.         elif (j > 0):
  128.             dataxm=np.vstack([dataxm, datax])
  129.     print("predict")
  130.     print(time.process_time()-nstart)
  131.     return dataxm
  132.  
  133. def Normalization(datain, closemax, closemin):
  134.     dataout=(datain - closemin)/(closemax - closemin)
  135.     return dataout
  136.  
  137. def SVRf(df, closemax, closemin, c, lamda, eps, sigma, gamma, itermaxsvr):
  138.     result = df.assign(Day_3 = Normalization(df.Day_3, closemax, closemin), Day_2=Normalization(df.Day_2, closemax, closemin), Day_1=Normalization(df.Day_1, closemax, closemin), Actual=Normalization(df.Actual, closemax, closemin))
  139.  
  140.     X_train, X_test, y_train, y_test, d3_train, d3_test, d2_train, d2_test, d1_train, d1_test, date_train, date_test = train_test_split(result['Index'], result['Actual'], result['Day_3'], result['Day_2'], result['Day_1'], result['Date'], train_size=0.9, test_size=0.1, shuffle=False)
  141.  
  142.     distancetrain=Distancetrain(d3_train, d2_train, d1_train)
  143.     mhessian=Hessian(distancetrain, sigma, lamda)
  144.     a, a_s = Seqlearn(y_train, mhessian, gamma, eps, c, itermaxsvr)
  145.     distancetest=Distancetest(d3_train, d2_train, d1_train, d3_test, d2_test, d1_test)
  146.     testhessian=Hessian(distancetest, sigma, lamda)
  147.     predict = Predictf(a, a_s, testhessian)
  148.     hasilpre=pd.DataFrame()
  149.     tgltest = date_test
  150.     tgltest.reset_index(drop=True, inplace=True)
  151.     hasilpre['Tanggal'] = tgltest
  152.     hasilpre['Close'] = predict
  153.     deresult = hasilpre.assign(Close=(hasilpre.Close * (closemax - closemin) + closemin))
  154.     n=len(y_test)
  155.     aktualtest = (y_test * (closemax - closemin)) + closemin
  156.     aktualtest.reset_index(inplace=True, drop=True)
  157.     dpredict = pd.Series(deresult['Close'], index=deresult.index)
  158.     hasil = aktualtest - dpredict
  159.     hasil1 = (hasil / aktualtest).abs()
  160.     suma = hasil1.sum()
  161.     mape = (1/n) * suma
  162.     print("MAPE")
  163.     print(mape)
  164.     fitness = 1/(1+mape)
  165.     print(fitness)
  166.     return fitness, mape, hasilpre
  167.  
  168. Closemax=data['Close'].max()
  169. Closemin=data['Close'].min()
  170. print(Closemax)
  171. print(Closemin)
  172. day3 = data['Close'][0:((-1)-2)]
  173. day2 = data['Close'][1:((-1)-1)]
  174. day2.index = day2.index - 1
  175. day1 = data['Close'][2:((-1)-0)]
  176. day1.index = day1.index - 2
  177. dayact = data['Close'][3:]
  178. dayact.index = dayact.index - 3
  179. dateact = data['Tanggal'][3:]
  180. dateact.index = dateact.index - 3
  181. mydata = pd.DataFrame({'Index':data['Index'][0:((-1)-2)], 'Date':dateact, 'Day_3':day3, 'Day_2':day2, 'Day_1':day1, 'Actual':dayact})
  182. print("data proses",time.process_time()-nstart)
  183.  
  184. Lamda=0.09
  185. C=200
  186. Eps=0.0013
  187. Sigma=0.11
  188. Gamma=0.004
  189. Itermaxsvr=1000
  190. SVRf(mydata, Closemax, Closemin, C, Lamda, Eps, Sigma, Gamma, Itermaxsvr)
  191.  
  192. nstop=time.process_time()
  193. print(nstop-nstart)
RAW Paste Data