Guest User

bio_amg.py

a guest
Jun 21st, 2010
1,634
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # -*- coding: utf-8 -*-
  2. import encodings.utf_8
  3. import sys, urllib, re
  4.  
  5. class MyOpener(urllib.FancyURLopener):
  6.     version = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3'
  7.  
  8. def strip_white(stp):
  9.     stp = stp.replace('amp;','')
  10.     stp = re.sub('\W+?','', stp.lower())
  11.     return stp
  12.  
  13. def levenshtein(s1, s2):
  14.   l1 = len(s1)
  15.   l2 = len(s2)
  16.   matrix = [list(range(l1 + 1))] * (l2 + 1)
  17.   for zz in range(l2 + 1):
  18.     matrix[zz] = list(range(zz,zz + l1 + 1))
  19.   for zz in range(0,l2):
  20.     for sz in range(0,l1):
  21.       if s1[sz] == s2[zz]:
  22.         matrix[zz+1][sz+1] = min(matrix[zz+1][sz] + 1, matrix[zz][sz+1] + 1, matrix[zz][sz])
  23.       else:
  24.         matrix[zz+1][sz+1] = min(matrix[zz+1][sz] + 1, matrix[zz][sz+1] + 1, matrix[zz][sz] + 1)
  25.   return matrix[l2][l1]
  26.  
  27. def re_parse(dt, rx):
  28.     match = re.search(rx, dt)
  29.     if match: return match.group(1)
  30.  
  31. def go(artist, album):
  32.     src = '/cg/amg.dll?p=amg&sql=%s&P=amg&opt1=2' % (urllib.quote(album))
  33.     rx_src = re.compile('(<tr class=\"visible\".*?<td class=\"cell\" style.*?>)(.*?)(<.*?;"><a href=\")(.*?)(\">)', re.M|re.S)
  34.     req = MyOpener().open(str(url + src)).read().decode('latin-1').encode('utf-8')
  35.     for match in rx_src.finditer(str(req)):
  36.         if strip_white(artist) == strip_white(match.group(2)) or levenshtein(strip_white(artist), strip_white(match.group(2))) <= 3:
  37.             global rel, link
  38.             link = match.group(4).replace('amp;','')
  39.             req = MyOpener().open(url + link).read().decode('latin-1').encode('utf-8')
  40.             rel = re.search('<!--Begin Content-->(.*?)<!--ADBANNER-->', req, re.S|re.M)
  41.             break
  42.  
  43. def get_review():
  44.     revw = re_parse(rel.group(1), '<td align=\"left\" class=\"title\">.*?\"author\">by\s.*?</td>.*?<p>(.*?)</p>')
  45.     if revw:
  46.         amg_review = re.sub('<.*?>', '', revw)
  47.         if amg_review.find('Read More...') > 0:
  48.             rx_more = re.compile('<td align="left" class="title">.*?"author">by\s.*?</td>.*?<p>(.*?)</p>', re.M|re.S)
  49.             more = re_parse(rel.group(1), '\.\.\. <a href="(.*?)\">Read More\.\.\.</a>')
  50.             more_review = MyOpener().open(url + more).read().decode('latin-1').encode('utf-8')
  51.             amg_review = re.sub('<.*?>', '', rx_more.search(more_review).group(1))
  52.         amg = amg_review + '\r\nAMG Review by ' + re_parse(rel.group(1), '<td align=\"left\" class=\"title\">.*?"author\">by\s.(.*?)<')
  53.         print amg
  54.  
  55. url = 'http://www.allmusic.com'
  56. rel = None;
  57.  
  58. try:
  59.     go(sys.argv[1], sys.argv[2])
  60.     if not rel == None: get_review()
  61. except Exception, e:
  62.     print e
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×