Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def scrape_and_save_pages(folder_path, url_list):
- for i, url in enumerate(url_list[:1], start=0):
- print(i)
- with urllib.request.urlopen(url) as response:
- html = str(response.read())
- with open(f'{folder_path}/{i}.html','w') as f:
- f.write(html)
- def parse_pages(folder_path):
- person_number_to_personal_info_dict = {}
- for i, filepath in enumerate(glob.glob(f'{folder_path}/[0-9]*.html'), start=0):
- person_number_to_personal_info_dict[i] = {}
- with open(filepath,'r') as f:
- parsed_file = f.read()
- soup = BeautifulSoup(html, 'lxml')
- ...
Advertisement
Add Comment
Please, Sign In to add comment