Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import urllib
- from bs4 import BeautifulSoup
- vis = open("vis444.txt", "w")
- fh = open(r'C:\bla.txt', 'r')
- for line in fh.readlines():
- try:
- url = line
- html = urllib.urlopen(url).read()
- soup = BeautifulSoup(html)
- # get text
- head = soup.find("td", class_="main").find("td", class_="pageHeading")
- text = ' '.join(head.string.splitlines())
- print text+','+head.find_next("td", class_="main").find_next("td", class_="main").next.next
- print >>vis444, text+','+head.find_next("td", class_="main").find_next("td", class_="main").next.next
- except:
- pass
- vis444.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement