Guest User

Scrape with list

a guest
Jul 14th, 2021
33
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.09 KB | None | 0 0
  1. import requests
  2. from bs4 import BeautifulSoup
  3. import csv
  4.  
  5. #Create a loop to take each line of the list and assign it to the variable url
  6. #--------------------------------------------------------
  7. with open('list_url.txt') as infile:
  8.     url_list = infile.read().splitlines()
  9.  
  10. for url in url_list:
  11.     print(url)
  12. #--------------------------------------------------------
  13.  
  14. #Get first URL in list.txt and use as url
  15. #--------------------------------------------------------
  16. r = requests.get(url)
  17. soup = BeautifulSoup(r.text, 'html.parser')
  18. #league_table = soup.find('div', class_='advanced-properties js-advanced-properties')
  19. #--------------------------------------------------------
  20.  
  21. #Search soup for td data
  22. #--------------------------------------------------------
  23. manno = soup.find(string="Manufacturer No.:").find_next('td').getText()[0:20]
  24. man = soup.find(string="Manufacturers:").find_next('td').getText()[0:10]
  25. cont = soup.find(string="Content:").find_next('td').getText()[0:20]
  26. dia = soup.find(string="Diameter:").find_next('td').getText()[0:11]
  27. typ = soup.find(string="Product types:").find_next('td').getText()[0:20]
  28. colour = soup.find(string="Filament colour:").find_next('td').getText()[0:20]
  29. #--------------------------------------------------------
  30.  
  31. #Print the td data
  32. #--------------------------------------------------------
  33. print (manno)
  34. print (man)
  35. print (cont)
  36. print (dia)
  37. print (typ)
  38. print (colour)
  39. #--------------------------------------------------------
  40.  
  41. #Write data to CSV
  42. #--------------------------------------------------------
  43. with open('test.csv', 'a', newline='') as csvfile:
  44.    
  45.     fieldnames = ['manno', 'man', 'cont', 'dia', 'typ', 'colour']
  46.    
  47.     thewriter = csv.DictWriter(csvfile, fieldnames=fieldnames)
  48.    
  49.     thewriter.writeheader()
  50.    
  51.     for row in row:
  52.         thewriter.writerow({'manno':manno, 'man':man, 'cont':cont, 'dia':dia, 'typ':typ, 'colour':colour})
  53.        
  54. #Once write of 1st url is complete, go back to the beginning and use the 2nd url in list.txt
  55. #Then 3rd
  56. #Then 4th
  57. #--------------------------------------------------------
Advertisement
Add Comment
Please, Sign In to add comment