Advertisement
gruntfutuk

bs4test

Jun 1st, 2018
187
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.80 KB | None | 0 0
  1. import csv
  2. from urllib.request import urlopen
  3. from bs4 import BeautifulSoup as soup
  4.  
  5. my_url = 'http://ravimaailma.fi/'
  6.  
  7. # Opening up connection and grabbing the page
  8. uClient = urlopen(my_url)
  9. page_html = uClient.read()
  10. uClient.close()
  11.  
  12. page_soup = soup(page_html, 'html.parser')
  13.  
  14. containers = page_soup.findAll('a')
  15.  
  16. Links = []
  17. Link_Text = []
  18.  
  19. filename = "Labels_And_Links.csv"
  20.  
  21. with open(filename, 'w', encoding='utf-8') as csvfile:
  22.     csv = csv.writer(csvfile, delimiter=',',
  23.                             quotechar='\'', quoting=csv.QUOTE_MINIMAL)
  24.     headers = ['Link_Text', 'Links']
  25.     csv.writerow(headers)
  26.     for container in containers:
  27.         href = container.get('href', None)
  28.         if href and href.startswith('http'):
  29.             csv.writerow([container['href'], container.text])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement