Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- '''
- author:smilehsu
- blog:smilehsu.cc
- requirements:Windows7、python3.52
- date:2017/02/12
- '''
- import os, re, requests, shutil
- from bs4 import BeautifulSoup
- headers = {'User-Agent':"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1"}
- base_url='http://meizitu.com/a/'
- dir_path='d:\meizitu'
- all_page_link=[]
- #列出全部套圖的連結
- #num的範圍 1 <= num <= 5481
- #先爬50頁試試看
- num=50
- for i in range(1,num+1):
- page=base_url+str(i)+'.html'
- all_page_link.append(page)
- #print(all_page_link)
- error_page=[]
- #把頁面連結的list傳進來,逐頁處理
- #ll_page_link[30:35] 先丟30~35頁測試用
- for get_album in all_page_link[30:35]:
- page_html=requests.get(get_album)
- page_html.encoding='gb2312'
- page_soup=BeautifulSoup(page_html.text,'lxml')
- try:
- #取得頁面的title跟該頁面的圖片連結
- title=page_soup.find('div',{'class':'metaRight'}).find('a')
- #取得圖片連結
- album_pics=page_soup.find('div',{'class':'postContent'}).find_all('img')
- print(get_album)
- print(title.text)
- #print('目前工作目錄:'+os.getcwd())
- dir_name=title.text
- isExists = os.path.exists(os.path.join(dir_path, dir_name))
- mydir_path=os.path.join(dir_path, dir_name)
- if not isExists:
- print('建立資料夾:'+mydir_path)
- os.makedirs(mydir_path)
- else:
- print('資料夾已存在'+mydir_path)
- except:
- print('error: {}'.format(get_album))
- error_page.append(get_album)
- pass
- #開始下載前先切換到要存放圖檔的資料夾
- os.chdir(mydir_path)
- for pic in album_pics:
- #路徑check
- #print('目前工作目錄:'+os.getcwd())
- #頁面裡的圖片連結
- pic_src=pic['src']
- print('要下載的圖檔連結'+pic_src)
- #下載圖片後要存檔的檔名
- pic_name=dir_name+'_'+pic_src.split('/')[-1]
- #下載圖片
- get_pic=requests.get(pic_src,headers=headers,stream=True)
- f=open(pic_name,'wb')
- shutil.copyfileobj(get_pic.raw,f)
- f.close()
- del get_pic
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement