Advertisement
Guest User

Untitled

a guest
Jul 31st, 2017
214
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.49 KB | None | 0 0
  1. '''
  2. scrape all history stock Data
  3. '''
  4. import datetime
  5. import pandas_datareader.data as web
  6. from dateutil.relativedelta import relativedelta
  7. import mysql.connector
  8. import xml.etree.ElementTree as ET
  9. import pandas as pd
  10. import os
  11.  
  12.  
  13. def get_data(symbol):
  14. start = datetime.datetime(2000, 01, 01);
  15. end = datetime.datetime.now();
  16. print 'scraping [%s]' % symbol
  17. # read the stock information from Google finance
  18. if symbol[0] == '^' or "VIX" in symbol : # we have to call Stooq reader
  19. df = web.DataReader(symbol, 'stooq')
  20. else:
  21. df = web.DataReader(symbol, 'yahoo', start, end)
  22.  
  23. # for backward compatible we have to add column called adj. price
  24. return df
  25.  
  26.  
  27. '''Main Function'''
  28. def main():
  29. os.chdir("/home/ec2-user/data/stock_price")
  30. # init mysql connection
  31. # mysql -h sfdata.cf6ulue4mzq9.us-west-2.rds.amazonaws.com -P 3306 -u spr1ngf0rward -p
  32. cnx = mysql.connector.connect(user='spr1ngf0rward', password='DTXpecgdTQzijTMg2',
  33. host='sfdata.cf6ulue4mzq9.us-west-2.rds.amazonaws.com',
  34. database='sfdata')
  35. # queries
  36. get_symbols = ("SELECT symbol_id, symbol FROM symbol WHERE exchange_id in (1,2,24)")
  37. Insert_Price = ("INSERT INTO stock_prices"
  38. "(symbol, symbol_id, date, open, high, low, close, adj_close, volume, created_on)"
  39. "VALUES (%(symbol)s, %(symbol_id)s, %(date)s, %(open)s, %(high)s, %(low)s, %(close)s, %(adj_close)s, %(volume)s, %(created_on)s)")
  40. symbols = []
  41.  
  42. ## get symbol list
  43. cursor = cnx.cursor()
  44. cursor.execute(get_symbols)
  45. for (symbol_id, symbol) in cursor:
  46. symbols.append((symbol_id, symbol))
  47. cursor.close()
  48.  
  49. # insert stock price into database
  50. output = 'stock_prices.csv'
  51. # debug
  52. # symbols = [(1, 'AABA'), (2, 'YHOO'), (3, 'BABA')]
  53. for (symbol_id, symbol) in symbols:
  54. try:
  55. data=get_data(symbol)
  56. data["symbol"] = symbol
  57. data["symbol_id"] = symbol_id
  58. data["date"] =data.index
  59. data["created_on"]= datetime.datetime.now().date()
  60. order = ['symbol', 'symbol_id', 'date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'created_on']
  61. data = data.ix[:, order]
  62. data.to_csv(output, index = False, mode = 'a', header = False)
  63. print ("download %s success" % (symbol))
  64. except Exception as ex:
  65. print ("download %s fails - %s" % (symbol, ex))
  66.  
  67.  
  68. if __name__ == '__main__':
  69. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement