Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- import json,csv
- import os,time,datetime
- from bs4 import BeautifulSoup as bs
- #取得當前工作路徑加存檔路徑
- workpath=os.getcwd()+'\My_Stocks'
- #股票代碼
- stock_list=[2330,3008,2317,6456,4943,2353,3406,6153,2891,3532]
- #現在
- now=datetime.datetime.now()
- #要抓取的日期範圍
- year_list=range(2017,now.year+1)
- month_list=range(1,13)
- #建立個股連結(含日期)&抓取資料
- def get_data(year, month, stock_id):
- date=str(year)+'{0:0=2d}'.format(month)+'01' #格式yyyymmdd
- sid=str(stock_id)
- url_twse='http://www.twse.com.tw/exchangeReport/STOCK_DAY?response=json&date='+str(date)+'&stockNo='+str(stock_id)
- res=requests.post(url_twse,)
- soup=bs(res.text,'lxml')
- data=json.loads(soup.text)
- #存檔路徑
- mydir=os.path.join(workpath,str(stock_id),str(year))
- filename='Stock_'+sid+'_'+str(year)+'_'+'{0:0=2d}'.format(month)+'.csv'
- if not os.path.isdir(mydir):
- os.makedirs(mydir)
- #檢查檔案是否存在
- if not os.path.isfile(os.path.join(mydir,filename)):
- outputfile=open(os.path.join(mydir,filename),'w',newline='')
- outputwriter=csv.writer(outputfile)
- outputwriter.writerow(data['title'])
- outputwriter.writerow(data['fields'])
- for data in(data['data']):
- outputwriter.writerow(data)
- outputfile.close()
- else:
- print('已有相同檔名的檔案存在!!!')
- time_start=time.time()
- for stocks in stock_list:
- for year in year_list:
- for month in month_list:
- if (now.year == year and month > now.month) :break
- data=get_data(year,month,stocks)
- #print(year,month)
- #時間間隔請設3秒以上,以免被twse封鎖
- time.sleep(3)
- time_end=time.time()
- print(time_end-time_start)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement