Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- inputfilename='file.html'
- data=urllib2.urlopen(inputfilename)
- soup = BeautifulSoup(data)
- data=soup.prettify()
- soup = BeautifulSoup(data)
- ti=soup.findAll(attrs={'class':'pno'})
- for t in ti:
- t.extract()
- ti=soup.findAll(attrs={'class':'subhead'})
- for t in ti:
- t.extract()
- lines=[]
- for s in soup(text=True):
- s=s.strip().replace('\t','')
- print s
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement