Advertisement
Guest User

Untitled

a guest
Nov 24th, 2014
150
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.58 KB | None | 0 0
  1. import urllib
  2. from bs4 import BeautifulSoup
  3.  
  4. vis = open("vis444.txt", "w")
  5. fh = open(r'C:\bla.txt', 'r')
  6. for line in fh.readlines():
  7.  
  8.     try:
  9.         url = line
  10.         html = urllib.urlopen(url).read()
  11.         soup = BeautifulSoup(html)
  12.  
  13.     # get text
  14.         head = soup.find("td", class_="main").find("td", class_="pageHeading")
  15.         text = ' '.join(head.string.splitlines())
  16.         print text+','+head.find_next("td", class_="main").find_next("td", class_="main").next.next
  17.         print >>vis444, text+','+head.find_next("td", class_="main").find_next("td", class_="main").next.next
  18.     except:
  19.         pass
  20. vis444.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement