Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from bs4 import BeautifulSoup
- import csv
- #Create a loop to take each line of the list and assign it to the variable url
- #--------------------------------------------------------
- with open('list_url.txt') as infile:
- url_list = infile.read().splitlines()
- for url in url_list:
- print(url)
- #--------------------------------------------------------
- #Get first URL in list.txt and use as url
- #--------------------------------------------------------
- r = requests.get(url)
- soup = BeautifulSoup(r.text, 'html.parser')
- #league_table = soup.find('div', class_='advanced-properties js-advanced-properties')
- #--------------------------------------------------------
- #Search soup for td data
- #--------------------------------------------------------
- manno = soup.find(string="Manufacturer No.:").find_next('td').getText()[0:20]
- man = soup.find(string="Manufacturers:").find_next('td').getText()[0:10]
- cont = soup.find(string="Content:").find_next('td').getText()[0:20]
- dia = soup.find(string="Diameter:").find_next('td').getText()[0:11]
- typ = soup.find(string="Product types:").find_next('td').getText()[0:20]
- colour = soup.find(string="Filament colour:").find_next('td').getText()[0:20]
- #--------------------------------------------------------
- #Print the td data
- #--------------------------------------------------------
- print (manno)
- print (man)
- print (cont)
- print (dia)
- print (typ)
- print (colour)
- #--------------------------------------------------------
- #Write data to CSV
- #--------------------------------------------------------
- with open('test.csv', 'a', newline='') as csvfile:
- fieldnames = ['manno', 'man', 'cont', 'dia', 'typ', 'colour']
- thewriter = csv.DictWriter(csvfile, fieldnames=fieldnames)
- thewriter.writeheader()
- for row in row:
- thewriter.writerow({'manno':manno, 'man':man, 'cont':cont, 'dia':dia, 'typ':typ, 'colour':colour})
- #Once write of 1st url is complete, go back to the beginning and use the 2nd url in list.txt
- #Then 3rd
- #Then 4th
- #--------------------------------------------------------
Advertisement
Add Comment
Please, Sign In to add comment