Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import urllib2
- from HTMLParser import HTMLParser
- def getgeners(page_in_html):
- parser = Parser()
- try:
- parser.feed(page_in_html)
- except UnicodeDecodeError : return ["UnicodeDecodeError"]
- finally :
- data = parser.HTMLDATA
- real_data = []
- for i in xrange(len(data)):
- if data[i].strip() == "Genres" :
- for j in xrange(i+1,len(data)):
- if data[j].strip() != '' :
- real_data.append(data[j].strip())
- break
- return real_data
- # create a subclass and override the handler methods
- class Parser(HTMLParser):
- def __init__(self):
- self.reset()
- self.HTMLDATA = []
- def handle_starttag(self, tag, attrs):
- pass
- def handle_data(self, data):
- self.HTMLDATA.append(data)
- while True:
- # Get band's name
- band_name = raw_input("Enter the band's name: ")
- # Format it fot Wikipedia
- formated_name = band_name.strip().replace(" " ,"_")
- print formated_name
- ## Handle the program if the link is not found
- try :
- # Request the band to Wikipedia
- req = urllib2.Request('http://en.wikipedia.org/wiki/'+formated_name+"_(band)" )
- # Get response
- response = urllib2.urlopen(req)
- # Get the page in html
- the_page = response.read()
- # Get the geners
- geners = getgeners(the_page)
- print geners
- except urllib2.URLError:
- print "Could not find the link this way... Trying something else."
- try:
- # Request the band to Wikipedia
- req = urllib2.Request('http://en.wikipedia.org/wiki/'+formated_name)
- # Get response
- response = urllib2.urlopen(req)
- # Get the page in html
- the_page = response.read()
- # Get the geners
- geners = getgeners(the_page)
- print geners
- except urllib2.URLError:
- print "Sorry. Could not find the band you are looking for."
- # SAMPLE EXECUTION:
- # Enter the band's name: Slipknot
- # Slipknot
- # ['Groove metal']
- # Enter the band's name: Nirvana
- # Nirvana
- # ['Alternative rock']
- # Enter the band's name: Iron Maiden
- # Iron_Maiden
- # ['Heavy metal']
- # Enter the band's name: Pantera
- # Pantera
- # ['Heavy metal']
- # Enter the band's name: Rage against the machine
- # Rage_against_the_machine
- # ['Rap metal']
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement