Advertisement
DoomProg

Wikipedia genre with Python

Nov 6th, 2014
151
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.13 KB | None | 0 0
  1. import urllib2
  2. from HTMLParser import HTMLParser
  3.  
  4. def getgeners(page_in_html):
  5.     parser = Parser()
  6.     try:
  7.         parser.feed(page_in_html)
  8.     except UnicodeDecodeError : return ["UnicodeDecodeError"]
  9.     finally :
  10.         data  = parser.HTMLDATA
  11.         real_data = []
  12.         for i in xrange(len(data)):
  13.             if data[i].strip() == "Genres" :
  14.                 for j in xrange(i+1,len(data)):
  15.                     if data[j].strip() != '' :
  16.                         real_data.append(data[j].strip())
  17.                         break
  18.         return real_data
  19.  
  20.  
  21. # create a subclass and override the handler methods
  22. class Parser(HTMLParser):
  23.     def __init__(self):
  24.         self.reset()
  25.         self.HTMLDATA = []
  26.     def handle_starttag(self, tag, attrs):
  27.         pass
  28.     def handle_data(self, data):
  29.         self.HTMLDATA.append(data)
  30.  
  31. while True:
  32.     # Get band's name
  33.     band_name = raw_input("Enter the band's name: ")
  34.     # Format it fot Wikipedia
  35.     formated_name = band_name.strip().replace(" " ,"_")
  36.     print formated_name
  37.     ## Handle the program if the link is not found
  38.     try :
  39.         # Request the band to Wikipedia
  40.         req = urllib2.Request('http://en.wikipedia.org/wiki/'+formated_name+"_(band)" )
  41.         # Get response
  42.         response = urllib2.urlopen(req)
  43.         # Get the page in html
  44.         the_page = response.read()
  45.         # Get the geners
  46.         geners = getgeners(the_page)
  47.         print geners
  48.     except urllib2.URLError:
  49.         print "Could not find the link this way... Trying something else."
  50.         try:
  51.             # Request the band to Wikipedia
  52.             req = urllib2.Request('http://en.wikipedia.org/wiki/'+formated_name)
  53.             # Get response
  54.             response = urllib2.urlopen(req)
  55.             # Get the page in html
  56.             the_page = response.read()
  57.             # Get the geners
  58.             geners = getgeners(the_page)
  59.             print geners
  60.         except urllib2.URLError:
  61.             print "Sorry. Could not find the band you are looking for."
  62.  
  63. # SAMPLE EXECUTION:
  64.  
  65. # Enter the band's name: Slipknot
  66. # Slipknot
  67. # ['Groove metal']
  68. # Enter the band's name: Nirvana
  69. # Nirvana
  70. # ['Alternative rock']
  71. # Enter the band's name: Iron Maiden
  72. # Iron_Maiden
  73. # ['Heavy metal']
  74. # Enter the band's name: Pantera
  75. # Pantera
  76. # ['Heavy metal']
  77. # Enter the band's name: Rage against the machine
  78. # Rage_against_the_machine
  79. # ['Rap metal']
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement