Advertisement
Guest User

Untitled

a guest
Sep 26th, 2017
56
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.78 KB | None | 0 0
  1. import os
  2.  
  3. import numpy as np
  4. import pandas as pd
  5. from pandas_datareader.data import DataReader
  6. from pandas_datareader._utils import RemoteDataError
  7. import requests
  8.  
  9. from zipline.utils.cli import maybe_show_progress
  10.  
  11.  
  12. def _cachpath(symbol, type_):
  13. return '-'.join((symbol.replace(os.path.sep, '_'), type_))
  14.  
  15.  
  16. def google_equities(symbols, start=None, end=None):
  17. symbols = tuple(symbols)
  18.  
  19. def ingest(environ,
  20. asset_db_writer,
  21. minute_bar_writer, # unused
  22. daily_bar_writer,
  23. adjustment_writer,
  24. calendar,
  25. start_session,
  26. end_session,
  27. cache,
  28. show_progress,
  29. output_dir,
  30. start=start,
  31. end=end):
  32. if start is None:
  33. start = start_session
  34. if end is None:
  35. end = None
  36.  
  37. metadata = pd.DataFrame(np.empty(len(symbols), dtype=[
  38. ('start_date', 'datetime64[ns]'),
  39. ('end_date', 'datetime64[ns]'),
  40. ('auto_close_date', 'datetime64[ns]'),
  41. ('symbol', 'object'),
  42. ]))
  43.  
  44. def _pricing_iter():
  45. sid = 0
  46. with maybe_show_progress(
  47. symbols,
  48. show_progress,
  49. label='Downloading Google pricing data: ') as it, \
  50. requests.Session() as session:
  51. for symbol in it:
  52. path = _cachpath(symbol, 'ohlcv')
  53. try:
  54. df = cache[path]
  55. except KeyError:
  56. df = cache[path] = DataReader(
  57. symbol,
  58. 'google',
  59. start,
  60. end,
  61. session=session,
  62. ).sort_index()
  63.  
  64. # the start date is the date of the first trade and
  65. # the end date is the date of the last trade
  66. start_date = df.index[0]
  67. end_date = df.index[-1]
  68. # The auto_close date is the day after the last trade.
  69. ac_date = end_date + pd.Timedelta(days=1)
  70. metadata.iloc[sid] = start_date, end_date, ac_date, symbol
  71.  
  72. df.rename(
  73. columns={
  74. 'Open': 'open',
  75. 'High': 'high',
  76. 'Low': 'low',
  77. 'Close': 'close',
  78. 'Volume': 'volume',
  79. },
  80. inplace=True,
  81. )
  82. yield sid, df
  83. sid += 1
  84.  
  85. daily_bar_writer.write(_pricing_iter(), show_progress=show_progress)
  86.  
  87. symbol_map = pd.Series(metadata.symbol.index, metadata.symbol)
  88.  
  89. metadata['exchange'] = "GOOGLE"
  90. asset_db_writer.write(equities=metadata)
  91.  
  92. adjustments = []
  93. with maybe_show_progress(
  94. symbols,
  95. show_progress,
  96. label='Downloading Yahoo adjustment data: ') as it, \
  97. requests.Session() as session:
  98. for symbol in it:
  99. path = _cachpath(symbol, 'adjustment')
  100. sid = symbol_map[symbol]
  101. try:
  102. df = cache[path]
  103. except KeyError:
  104. try:
  105. df = cache[path] = DataReader(
  106. symbol,
  107. 'yahoo-actions',
  108. metadata.ix[sid].start_date,
  109. metadata.ix[sid].end_date,
  110. session=session,
  111. ).sort_index()
  112. except RemoteDataError:
  113. print("No data returned from Yahoo for %s" % symbol)
  114. df = pd.DataFrame(columns=['value', 'action'])
  115.  
  116. df['sid'] = sid
  117. adjustments.append(df)
  118.  
  119. adj_df = pd.concat(adjustments)
  120. adj_df.index.name = 'date'
  121. adj_df.reset_index(inplace=True)
  122.  
  123. splits = adj_df[adj_df.action == 'SPLIT']
  124. splits = splits.rename(
  125. columns={'value': 'ratio', 'date': 'effective_date'},
  126. )
  127. splits.drop('action', axis=1, inplace=True)
  128.  
  129. dividends = adj_df[adj_df.action == 'DIVIDEND']
  130. dividends = dividends.rename(
  131. columns={'value': 'amount', 'date': 'ex_date'},
  132. )
  133. dividends.drop('action', axis=1, inplace=True)
  134. # we do not have this data in the yahoo dataset
  135. dividends['record_date'] = pd.NaT
  136. dividends['declared_date'] = pd.NaT
  137. dividends['pay_date'] = pd.NaT
  138.  
  139. adjustment_writer.write(splits=splits, dividends=dividends)
  140.  
  141. return ingest
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement