Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- __author__ = 'Irenicus09'
- __date__ = '2nd June 2011'
- ####################################################################################
- # #
- # This script has been released as an example for learning Beautiful Soup Module. #
- # #
- # The author takes no liability for the use of this script in any way, #
- # and it is to be used for educational purposes only. #
- # #
- ####################################################################################
- import urllib2
- from BeautifulSoup import BeautifulSoup
- def scrapePythonSection():
- """
- This function grabs the first page of the python section @Intern0t forum.
- The data is then parsed using BeautifulSoup to look for title and meta data.
- """
- ua = 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0.1) Gecko/20110506 Firefox/4.0.1'
- req = urllib2.Request('http://forum.intern0t.net/perl-python/')
- req.add_header('User-Agent', ua)
- try:
- html = (urllib2.urlopen(req)).read()
- except (BaseException):
- print '[!] Error Occured. '
- print '[?] Check whether system is Online.'
- exit(1)
- soup = BeautifulSoup(html)
- search = soup.findAll('div', attrs={'class' : 'inner'})
- index = 1
- for base in search:
- title = base.find('h3', attrs={'class' : 'threadtitle'}).a.string
- details = base.find('div', attrs={'class' : 'author'}).span.a['title']
- print '%d. %s >> %s' % (index, title, details)
- index += 1
- print '\n\n'
- def printTitle():
- print '\n\t<<< COOKING WITH BEAUTIFUL SOUP >>>'
- print '\n'
- if __name__ == '__main__':
- printTitle()
- scrapePythonSection()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement