Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import csv
- from urllib.request import urlopen
- from bs4 import BeautifulSoup as soup
- my_url = 'http://ravimaailma.fi/'
- # Opening up connection and grabbing the page
- uClient = urlopen(my_url)
- page_html = uClient.read()
- uClient.close()
- page_soup = soup(page_html, 'html.parser')
- containers = page_soup.findAll('a')
- Links = []
- Link_Text = []
- filename = "Labels_And_Links.csv"
- with open(filename, 'w', encoding='utf-8') as csvfile:
- csv = csv.writer(csvfile, delimiter=',',
- quotechar='\'', quoting=csv.QUOTE_MINIMAL)
- headers = ['Link_Text', 'Links']
- csv.writerow(headers)
- for container in containers:
- href = container.get('href', None)
- if href and href.startswith('http'):
- csv.writerow([container['href'], container.text])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement