rs6000

crawler_twse

Jan 6th, 2018
334
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.36 KB | None | 0 0
  1. import requests
  2. import json,csv
  3. import os,time
  4. from bs4 import BeautifulSoup as bs
  5.  
  6. #取得當前工作路徑
  7. workpath=os.getcwd()
  8.  
  9. #建立個股連結(含日期)&抓取資料
  10. def get_data(year, month, stock_id):
  11.    
  12.     date=str(year)+'{0:0=2d}'.format(month)+'01' #格式yyyymmdd
  13.     sid=str(stock_id)
  14.     url_twse='http://www.twse.com.tw/exchangeReport/STOCK_DAY?response=json&date='+str(date)+'&stockNo='+str(stock_id)
  15.     res=requests.post(url_twse,)
  16.     soup=bs(res.text,'lxml')
  17.     data=json.loads(soup.text)
  18.    
  19.     #存檔路徑
  20.     mydir=os.path.join(workpath,str(stock_id),str(year))
  21.     filename='Stock_'+sid+'_'+str(year)+'_'+'{0:0=2d}'.format(month)+'.csv'
  22.    
  23.     if not os.path.isdir(mydir):
  24.        
  25.         os.makedirs(mydir)
  26.  
  27.     #檢查檔案是否存在
  28.    
  29.     if not os.path.isfile(os.path.join(mydir,filename)):
  30.    
  31.         outputfile=open(os.path.join(mydir,filename),'w',newline='')
  32.         outputwriter=csv.writer(outputfile)
  33.         outputwriter.writerow(data['title'])
  34.         outputwriter.writerow(data['fields'])
  35.  
  36.         for data in(data['data']):
  37.            
  38.             outputwriter.writerow(data)
  39.  
  40.         outputfile.close()
  41.        
  42.     else:
  43.        
  44.         print('已有相同檔名的檔案存在!!!')
  45.  
  46.     return data
  47.  
  48. #函數格式 get_data(yyyy,mm,股票代碼)
  49. #測試
  50. data=get_data(2017,10,2330)
Advertisement
Add Comment
Please, Sign In to add comment