Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from selenium.webdriver.common.keys import Keys
- from selenium import webdriver
- import unicodedata
- from bs4 import BeautifulSoup
- import unicodedata
- import requests
- import shutil
- import time
- import os
- import re
- re = re.compile(r'[\\\?\.\/\:]')
- site = 'https://annaabi.ee/'
- r = requests.get('%s' % site)
- soup = BeautifulSoup(r.content)
- div = soup.find('div', attrs = {'class' : 'leftwrap', 'id' : 'full2'})
- links = div.findAll('a', attrs={'class': 'no-underline'})
- for link in links:
- print(unicodedata.normalize('NFKD', link['title']).encode('ascii','ignore'))
- r = requests.get('{}{}'.format(site, link['href']))
- soup = BeautifulSoup(r.content)
- div = soup.find('div', attrs = {'class' : 'leftwrap', 'id' : 'full2'})
- links1 = div.findAll('a', attrs={'class': 'no-underline'})
- for link1 in links1:
- print(unicodedata.normalize('NFKD', link1['title']).encode('ascii','ignore'))
- os.makedirs(re.sub('', link['title'].split(' - ')[1].rstrip())+'\\'+re.sub('', link1['title'].rstrip()), exist_ok=True)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement