Advertisement
Guest User

Untitled

a guest
Oct 4th, 2015
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.00 KB | None | 0 0
  1. from selenium.webdriver.common.keys import Keys
  2. from selenium import webdriver
  3. import unicodedata
  4. from bs4 import BeautifulSoup
  5. import unicodedata
  6. import requests
  7. import shutil
  8. import time
  9. import os
  10. import re
  11.  
  12. re = re.compile(r'[\\\?\.\/\:]')
  13. site = 'https://annaabi.ee/'
  14. r = requests.get('%s' % site)
  15. soup = BeautifulSoup(r.content)
  16. div = soup.find('div', attrs = {'class' : 'leftwrap', 'id' : 'full2'})
  17. links = div.findAll('a', attrs={'class': 'no-underline'})
  18. for link in links:
  19.     print(unicodedata.normalize('NFKD', link['title']).encode('ascii','ignore'))
  20.     r = requests.get('{}{}'.format(site, link['href']))
  21.     soup = BeautifulSoup(r.content)
  22.     div = soup.find('div', attrs = {'class' : 'leftwrap', 'id' : 'full2'})
  23.     links1 = div.findAll('a', attrs={'class': 'no-underline'})
  24.     for link1 in links1:
  25.         print(unicodedata.normalize('NFKD', link1['title']).encode('ascii','ignore'))
  26.         os.makedirs(re.sub('', link['title'].split(' - ')[1].rstrip())+'\\'+re.sub('', link1['title'].rstrip()), exist_ok=True)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement