Beautiful Soup Example

#!/usr/bin/env python

__author__ = 'Irenicus09'
__date__ = '2nd June 2011'


####################################################################################
#                                                                                  #
# This script has been released as an example for learning Beautiful Soup Module.  #
#                                                                                  #
# The author takes no liability for the use of this script in any way,             #
# and it is to be used for educational purposes only.                              #
#                                                                                  #
####################################################################################


import urllib2
from BeautifulSoup import BeautifulSoup


def scrapePythonSection():
    """
    This function grabs the first page of the python section @Intern0t forum.
    The data is then parsed using BeautifulSoup to look for title and meta data.
    """


    ua = 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0.1) Gecko/20110506 Firefox/4.0.1'
    req = urllib2.Request('http://forum.intern0t.net/perl-python/')
    req.add_header('User-Agent', ua)

    try:
        html = (urllib2.urlopen(req)).read()
    except (BaseException):
        print '[!] Error Occured. '
        print '[?] Check whether system is Online.'
        exit(1)

    soup = BeautifulSoup(html)

    search = soup.findAll('div', attrs={'class' : 'inner'})

    index = 1

    for base in search:
        title = base.find('h3', attrs={'class' : 'threadtitle'}).a.string
        details = base.find('div', attrs={'class' : 'author'}).span.a['title']
        print '%d. %s >> %s' % (index, title, details)
        index += 1

    print '\n\n'


def printTitle():
    print '\n\t<<< COOKING WITH BEAUTIFUL SOUP >>>'
    print '\n'

if __name__ == '__main__':
    printTitle()
    scrapePythonSection()