Guest User

Python scraping snippet

a guest
Oct 24th, 2017
442
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.64 KB | None | 0 0
  1. def scrape_and_save_pages(folder_path, url_list):
  2.     for i, url in enumerate(url_list[:1], start=0):
  3.         print(i)
  4.         with urllib.request.urlopen(url) as response:
  5.             html = str(response.read())
  6.         with open(f'{folder_path}/{i}.html','w') as f:
  7.             f.write(html)
  8.  
  9. def parse_pages(folder_path):
  10.     person_number_to_personal_info_dict = {}
  11.  
  12.     for i, filepath in enumerate(glob.glob(f'{folder_path}/[0-9]*.html'), start=0):
  13.         person_number_to_personal_info_dict[i] = {}
  14.  
  15.         with open(filepath,'r') as f:
  16.             parsed_file = f.read()
  17.  
  18.             soup = BeautifulSoup(html, 'lxml')
  19.  
  20.             ...
Advertisement
Add Comment
Please, Sign In to add comment