Advertisement
rs6000

crawler_twseV2

Jan 8th, 2018
238
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.89 KB | None | 0 0
  1. import requests
  2. import json,csv
  3. import os,time,datetime
  4. from bs4 import BeautifulSoup as bs
  5.  
  6. #取得當前工作路徑加存檔路徑
  7. workpath=os.getcwd()+'\My_Stocks'
  8.  
  9. #股票代碼
  10. stock_list=[2330,3008,2317,6456,4943,2353,3406,6153,2891,3532]
  11.  
  12. #現在
  13. now=datetime.datetime.now()
  14.  
  15. #要抓取的日期範圍
  16. year_list=range(2017,now.year+1)
  17. month_list=range(1,13)
  18.  
  19. #建立個股連結(含日期)&抓取資料
  20. def get_data(year, month, stock_id):
  21.    
  22.     date=str(year)+'{0:0=2d}'.format(month)+'01' #格式yyyymmdd
  23.     sid=str(stock_id)
  24.     url_twse='http://www.twse.com.tw/exchangeReport/STOCK_DAY?response=json&date='+str(date)+'&stockNo='+str(stock_id)
  25.     res=requests.post(url_twse,)
  26.     soup=bs(res.text,'lxml')
  27.     data=json.loads(soup.text)
  28.    
  29.     #存檔路徑
  30.     mydir=os.path.join(workpath,str(stock_id),str(year))
  31.     filename='Stock_'+sid+'_'+str(year)+'_'+'{0:0=2d}'.format(month)+'.csv'
  32.    
  33.     if not os.path.isdir(mydir):
  34.        
  35.         os.makedirs(mydir)
  36.  
  37.     #檢查檔案是否存在
  38.    
  39.     if not os.path.isfile(os.path.join(mydir,filename)):
  40.    
  41.         outputfile=open(os.path.join(mydir,filename),'w',newline='')
  42.         outputwriter=csv.writer(outputfile)
  43.         outputwriter.writerow(data['title'])
  44.         outputwriter.writerow(data['fields'])
  45.  
  46.         for data in(data['data']):
  47.            
  48.             outputwriter.writerow(data)
  49.  
  50.         outputfile.close()
  51.        
  52.     else:
  53.        
  54.         print('已有相同檔名的檔案存在!!!')
  55.  
  56. time_start=time.time()
  57. for stocks in stock_list:
  58.     for year in year_list:
  59.         for month in month_list:
  60.             if (now.year == year and month > now.month) :break
  61.             data=get_data(year,month,stocks)
  62.             #print(year,month)
  63.             #時間間隔請設3秒以上,以免被twse封鎖
  64.             time.sleep(3)
  65.  
  66. time_end=time.time()
  67. print(time_end-time_start)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement