Guest User

Untitled

a guest
Jan 15th, 2018
128
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.34 KB | None | 0 0
  1. import numpy as np
  2. from sklearn.preprocessing import MinMaxScaler
  3. import pandas as pd
  4. from pandas import DataFrame
  5. import pandas_datareader.data as pdr
  6. import pymysql, dbCredentials
  7. from openpyxl import load_workbook
  8.  
  9. class Stock:
  10. def __init__(self, SYMBOL, testShare):
  11. self.symbol = SYMBOL
  12. self.dfData = prepareFundamentalStatistics(self.symbol, printFile=True)
  13. self.X_train, self.y_train, self.X_test, self.y_test, self.scalar = trainAndTestData(self.dfData, testShare)
  14.  
  15. def fetchFromDB(SYMBOL, dbTable='fundamantaltbl'):
  16. # The fundamental data is fetched from an external database
  17. conn = pymysql.connect(
  18. user=dbCredentials.user,
  19. password=dbCredentials.password,
  20. host=dbCredentials.host,
  21. port=3306,
  22. database='innodb')
  23. cur = conn.cursor()
  24.  
  25. insert_stmt = "SELECT DATE, TYPE, VALUE FROM " + dbTable + " WHERE SYMBOL=%(SYMBOL)s"
  26. #print("insert_stmt: ", insert_stmt)
  27. cur.execute(insert_stmt, {'SYMBOL': SYMBOL})
  28. field_names = [i[0] for i in cur.description]
  29. get_data = [xx for xx in cur]
  30. cur.close()
  31. conn.close()
  32. df = DataFrame(get_data, index=[x[0] for x in get_data])
  33. df.columns = field_names
  34. df.sort_index(inplace=True)
  35. return df
  36.  
  37. def writeToFile(df, workbook, worksheet):
  38. # Method writes a DataFrame to a specified worksheet in a Excel-workbook
  39. try:
  40. book = load_workbook(workbook) # Try to open an existing file
  41. with pd.ExcelWriter(workbook, engine='openpyxl') as writer:
  42. writer.book = book
  43. writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
  44. df.to_excel(writer, worksheet) # Append the dataframe
  45. writer.save()
  46. except:
  47. df.to_excel(workbook, worksheet) # If the file does not exist a new file is created
  48.  
  49. def fcff(cfo, intexp, taxrate, capex):
  50. return cfo + intexp * (1.0 - taxrate) + capex
  51.  
  52. def wacc(de_ratio, k_debt=0.05, k_equity=0.12, tax_rate=0.3):
  53. weight_equity = 1.0 / (de_ratio + 1.0)
  54. return (1 - weight_equity) * k_debt * (1 - tax_rate) + weight_equity * k_equity
  55.  
  56. def dcf(cf, growth, dr):
  57. #Growth rate is a scalar
  58. if (np.size(growth) == 1):
  59. return (cf * (1 + growth) / (dr - growth)) / (1 + dr)
  60. # Growth rate is an array
  61. pvCF, term_value = 0, 0
  62. for i in range(0, len(growth)):
  63. cf = cf * (1 + growth[i])
  64. if (i == len(growth) - 1):
  65. term_value = cf * (1 + growth[i]) / (dr - growth[i])
  66. pvCF = pvCF + (cf + term_value) / (1.0 + dr)**(i+1)
  67. return pvCF
  68.  
  69. def getDfPrice(stocklist, start_date, end_date, type='Close'):
  70. df = pdr.get_data_yahoo(stocklist, start_date, end_date)
  71. #df = pdr.DataReader(stocklist, start=start_date, end=end_date)
  72. df = DataFrame(df[type].values, columns=[stocklist], index=df.index.date)
  73. #print("df.index:", df.index, ", df.columns: ", df.columns)
  74. return df
  75.  
  76. def prepareFundamentalStatistics(symbol, printFile=True):
  77.  
  78. dfFund = fetchFromDB(symbol, dbTable='fundamentaltbl') # Fetch fundamental data from database
  79. datesFund = dfFund.ix[:, 'DATE'].unique() # The unique dates
  80. dfPrice = getDfPrice(symbol, datesFund[0], datesFund[-1], type='Close') # Fetch the price data from Yahoo Finance
  81.  
  82. # Prepare fundamental data values for the analysis
  83. rev = dfFund[(dfFund['TYPE'] == 'REV')].ix[:, 'VALUE'].values
  84. ni = dfFund[(dfFund['TYPE'] == 'NI')].ix[:, 'VALUE'].values
  85. ebt = dfFund[(dfFund['TYPE'] == 'EBT')].ix[:, 'VALUE'].values
  86. cfo = dfFund[(dfFund['TYPE'] == 'CFO')].ix[:, 'VALUE'].values
  87. intexp = dfFund[(dfFund['TYPE'] == 'INTEX')].ix[:, 'VALUE'].values
  88. capex = dfFund[(dfFund['TYPE'] == 'CAPEX')].ix[:, 'VALUE'].values
  89. wash = dfFund[(dfFund['TYPE'] == 'WASH')].ix[:, 'VALUE'].values
  90.  
  91. # For every price-date sum over 4 previous quarters and calulate the key ratios based on the current price
  92. dfTemp = DataFrame(np.column_stack((rev, ni, fcff(np.array(cfo), np.array(intexp),
  93. np.divide(np.array(ebt) - np.array(ni), np.array(ebt)),
  94. np.array(capex)))), index=datesFund,
  95. columns=['REV', 'NI', 'FCFF'])
  96. # Annualize the fundamental data and remove the first 3 quarters
  97. dfTemp = DataFrame.rolling(dfTemp, window=4, min_periods=4, center=False).sum()[3:]
  98. dfTemp['WASH'] = wash[3:] # Add weighted average number of shares as the last column
  99.  
  100. # Calculate and save the fundamental
  101. count = 0
  102. output2 = np.array([])
  103. dfPriceGrowth = dfPrice.pct_change(periods=1)
  104.  
  105. price_start = 0
  106. for i in range(1, len(dfPrice.index)):
  107. output = np.array(dfTemp.iloc[dfTemp.index <= dfPrice.index[i]].iloc[-1:].values)
  108. if (output.size > 0):
  109. if (count==0):
  110. price_start = i
  111. count += 1
  112. price = dfPrice.iloc[i].values
  113. priceGrowth = round(dfPriceGrowth.iloc[i], 4)
  114. marketCap = price * output[-1][-1]
  115. output2 = np.append(output2, priceGrowth)
  116. output2 = np.append(output2, np.round(np.divide(marketCap, output[-1][:-1]), 2))
  117.  
  118. # Assign the Class variable
  119. dfData = DataFrame(np.reshape(output2, (count, 4)),
  120. index=dfPriceGrowth[dfPriceGrowth.index >= dfPriceGrowth.index[price_start]].index,
  121. columns=['Price_delta', 'P/S', 'P/E', 'P/FCFF'])
  122. if (printFile == True):
  123. writeToFile(dfData, symbol + '.xlsx', 'python_output')
  124.  
  125. return dfData
  126.  
  127. def trainAndTestData(dfData, trainShare=0.8):
  128. # Prepare Train and Test data by splitting the data set in two parts by trainShare variable
  129. n_train = round(dfData.shape[0] * trainShare) # The total number of rows multiplied by the training share
  130. X = dfData.iloc[:,:].values
  131.  
  132. # Normalize the dataset
  133. scaler = MinMaxScaler(feature_range=(0, 1))
  134. print("scaler: ", scaler)
  135. X = scaler.fit_transform(X)
  136.  
  137. X = X[0:-1,:] # Remove the last row
  138. y = dfData.iloc[:, 0].values
  139. y = y[1:] # Remove the first row, since the y variables should be forward looking
  140. X_train, y_train = X[:n_train,:], y[:n_train]
  141. X_test, y_test = X[n_train:,:], y[n_train:]
  142.  
  143. # Reshape input X to 3D format [samples, timesteps, features]
  144. X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
  145. X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))
  146.  
  147. return X_train, y_train, X_test, y_test, scaler
Add Comment
Please, Sign In to add comment