Advertisement
Guest User

Untitled

a guest
Feb 22nd, 2017
76
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.15 KB | None | 0 0
  1. %matplotlib inline
  2. import os
  3. import numpy as np
  4. import matplotlib as mpl
  5. import matplotlib.pyplot as plt
  6. import pandas as pd
  7. import glob
  8. from joblib import Memory
  9. from pandas_datareader import wb
  10. import datetime as dt
  11. memory = Memory('/tmp/')
  12.  
  13. mpl.rcParams.update({'font.size': 10})
  14. DATA_FILE_PATH = './'
  15.  
  16. def load(file_path):
  17. df = None
  18. try:
  19. df = pd.read_csv(file_path)
  20. except:
  21. pass
  22. return df
  23.  
  24. def get_file_list(path):
  25. l = list()
  26. for d in path:
  27. p = os.path.join(d, '*.txt*')
  28. l += glob.glob(p)
  29. return l
  30.  
  31. @memory.cache
  32. def read_all(path):
  33. return [load(f) for f in get_file_list(path)]
  34.  
  35. def read_date(s):
  36. dts = dt.datetime.strptime(s, '%Y-%m-%d')
  37. return dt.date(dts.year, dts.month, dts.day)
  38.  
  39. def to_weekday(s):
  40. td = read_date(s)
  41. return td.weekday()
  42.  
  43. def find_business_day(s, lis):
  44. today = read_date(s)
  45. if today.weekday() != 4:
  46. next_day = today+dt.timedelta(days=1)
  47. else:
  48. next_day = today+dt.timedelta(days=3)
  49. nds = next_day.strftime('%Y-%m-%d')
  50. return nds if nds in lis else None
  51.  
  52. #@memory.cache
  53. def conv_all(path):
  54. time_list = [10,11,12,13,14,15,16]
  55. l = list()
  56. dfs = read_all(path)
  57. tt =set()
  58. for df in dfs:
  59. if df is not None:
  60. for date in set(df['Date'].values):
  61. dfd = df[df['Date']==date]
  62. next_date = find_business_day(date, df['Date'].values)
  63. # 始値(夏時間対応注意)
  64. flag_summer = '14:35:00' in dfd.columns
  65. df_open = dfd[dfd['Time'] == ('14:35:00' if flag_summer else '15:35:00')]
  66. dfa = df_open[['Date', 'Open']].set_index('Date').rename(columns={'Open':'p0930'})
  67. # 時刻別データの列名付け替え
  68. lis = [dfa]
  69. for time in time_list:
  70. rtime = str(time + ((14-9) if flag_summer else (15-9))) + ':00:00'
  71. ptime = 'p'+str(time)+ '00'
  72. #print(rtime)
  73. p = dfd[dfd['Time']==rtime][['Date', 'Close']].set_index('Date').rename(columns={'Close': ptime})
  74. lis.append(p)
  75. if next_date is not None:
  76. dfn = df[df['Date']==next_date] if next_date is not None else None
  77. df_next_open = dfn[dfn['Time'] == ('14:35:00' if flag_summer else '15:35:00')] # 夏時間と冬時間の境目は無視
  78. q = df_next_open[['Date', 'Open']].set_index('Date').rename(columns={'Open':'p3330'})
  79. q = q.values[0] if len(q) > 0 else np.nan
  80. # q.index = [date]
  81. #else:
  82. # q = pd.DataFrame([np.nan], index=[date], columns=['p0930n'])
  83. q = pd.DataFrame([np.nan if next_date is None else q], index=[date], columns=['p3330'])
  84. lis.append(q)
  85. #print(lis)
  86. df_prices = pd.concat(lis, axis=1)
  87. # 変化率
  88. open_price = df_prices['p0930']
  89. last_price = open_price
  90. for time in time_list:
  91. ptime = 'p'+str(time)+ '00'
  92. ctime = 'c'+str(time)+ '00'
  93. rtime = 'r'+str(time)+ '00'
  94. p = df_prices[ptime]
  95. df_prices[ctime] = (p / open_price).apply(np.log)
  96. df_prices[rtime] = (p / last_price).apply(np.log)
  97. del df_prices[ptime]
  98. last_price = p
  99. if True:
  100. ptime = 'p3330'
  101. ctime = 'c3330'
  102. rtime = 'r3330'
  103. p = df_prices[ptime]
  104. df_prices[ctime] = (p / open_price).apply(np.log)
  105. df_prices[rtime] = (p / last_price).apply(np.log)
  106. del df_prices[ptime]
  107. #print(df_prices)
  108. #del df_prices['p3330']
  109. df_prices = df_prices.dropna()
  110. if len(df_prices.index) > 0:
  111. l.append(df_prices)
  112. r = pd.concat(l, axis = 0).drop_duplicates()
  113. #print(sorted(list(tt)))
  114. return r
  115.  
  116. df_data = conv_all(DATA_FILE_PATH)
  117.  
  118. PERCENTILES = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
  119.  
  120. def get_cumulative(df):
  121. return df.describe(percentiles=PERCENTILES)[['c1000','c1100','c1200','c1300','c1400','c1500','c1600', 'c3330']]
  122.  
  123. def get_relative(df):
  124. return df.describe(percentiles=PERCENTILES)[['r1000','r1100','r1200','r1300','r1400','r1500','r1600', 'r3330']]
  125.  
  126. def explain(df_data, opt=""):
  127. df_cumulative = get_cumulative(df_data)
  128. df_relative = get_relative(df_data)
  129. fig = plt.figure(figsize=(12, 4))
  130. ax = fig.add_subplot(1,2,1)
  131. ax.set_ylim((-0.05, 0.05))
  132. draw(ax, df_cumulative, 'cumulative', opt)
  133. bx = fig.add_subplot(1,2,2)
  134. bx.set_ylim((-0.02, 0.02))
  135. draw(bx, df_relative, 'relative', opt)
  136.  
  137. def draw(ax, df_summary, title, opt):
  138. for c in df_summary.columns:
  139. df_summary.rename(columns={c: int(c[1:])}, inplace=True)
  140. ax.set_title(opt+title+' movement')
  141. ax.grid()
  142. if title != 'relative':
  143. ya = [0]
  144. ia = [930]
  145. else:
  146. ya = ia = []
  147. y = ya+df_summary.ix['mean',:].values.tolist()
  148. i = ia+df_summary.ix['mean',:].index.tolist()
  149. ax.plot(i, y, linewidth=4)
  150. #ax.plot(df_summary.ix['mean',:], linewidth=4)
  151. for p in PERCENTILES:
  152. y = ya+df_summary.ix['{}%'.format(int(p*100)),:].values.tolist()
  153. ax.plot(i, y)
  154.  
  155. explain(df_data)
  156.  
  157. def explain_by_weekday(df_data):
  158. df_wd = df_data.copy()
  159. df_wd['weekday'] = [to_weekday(t) for t in df_wd.index]
  160. for d in range(5):
  161. df = df_wd[df_wd['weekday'] == d]
  162. print(d, len(df))
  163. explain(df, ['mon','tue','wed','thr','fri'][d]+'day / ')
  164.  
  165. explain_by_weekday(df_data)
  166.  
  167. def to_day(s):
  168. tdt = dt.datetime.strptime(s, '%Y-%m-%d')
  169. return tdt.day
  170.  
  171. def explain_by_day(df_data):
  172. df_wd = df_data.copy()
  173. df_wd['day'] = [to_day(t) for t in df_wd.index]
  174. df = df_wd[df_wd['day']<=10]
  175. explain(df, 'early month / ')
  176. df = df_wd[df_wd['day']>10]
  177. df = df[df['day']<20]
  178. explain(df, 'mid month / ')
  179. df = df_wd[df_wd['day']>=20]
  180. explain(df, 'late month / ')
  181.  
  182. explain_by_day(df_data)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement