Advertisement
Guest User

Untitled

a guest
Oct 9th, 2010
250
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 10.29 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. import encodings.utf_8
  3. import os, sys, subprocess, urllib, re, pickle, win32com.client
  4.  
  5. class MyOpener(urllib.FancyURLopener):
  6.     version = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11'
  7.  
  8. def usage(): print 'Usage: AMG file_path artist album [-track tracknumber title]'
  9.  
  10. def q(enq):
  11.     return '"' + enq + '"'
  12.  
  13. def last_arg(arg):
  14.     return re.search(arg, sys.argv[len(sys.argv) - 1])
  15.  
  16. def strip_white(stp):
  17.     stp = stp.replace('amp;','')
  18.     stp = re.sub('\W+?','', stp.lower())
  19.     return stp
  20.  
  21. def clean_up(cln):
  22.     i = ['\\','/','"','`',';',',']
  23.     for j in range(len(i)/2):
  24.         cln = cln.replace(i[2*j-2], i[2*j-1])
  25.     return cln
  26.  
  27. def re_parse(dx, dt, rx):
  28.     match = re.search(rx, dt)
  29.     if match:
  30.         if dx == 'r': return match.group(1)
  31.         else: amg[dx] = match.group(1)
  32.  
  33. def re_parse_multi(dx, dt, rx):
  34.     if dx <> None:
  35.         res = []
  36.         match = re.findall(rx, dx, re.M)
  37.         if match:
  38.             for i in range(len(match)): res.append(match[i])
  39.             res = '; '.join(res)
  40.             amg[dt] = res
  41.  
  42. def re_song_review(link):
  43.     req = MyOpener().open(link).read().decode('latin-1').encode('utf-8')
  44.     res = re.search('Song Review.*?by\s.(.*?)<.*?<p>(.*?)</p>', req, re.S)
  45.     if res: return re.sub('<.*?>', '', res.group(2).replace('  ',' ') + ' AMG Song Review by ' + res.group(1))
  46.  
  47. def go():
  48.     src = '/cg/amg.dll?p=amg&sql=%s&P=amg&opt1=2' % (urllib.quote(album))
  49.     rx_src = re.compile('(<tr class=\"visible\".*?<td class=\"cell\" style.*?>)(.*?)(<.*?;"><a href=\")(.*?)(\">)', re.M|re.S)
  50.  
  51.     req = MyOpener().open(url + src).read().decode('latin-1').encode('utf-8')
  52.     if last_arg('_dbg_') <> None: print '\n\r' + 'Search result: ' + url + src
  53.  
  54.     for match in rx_src.finditer(req):
  55.         if strip_white(artist).count(strip_white(match.group(2))) + strip_white(match.group(2)).count(strip_white(artist)) > 0:
  56.             global rel, trx
  57.             if last_arg('_dbg_') <> None:
  58.                 print 'AMG artist match: ' + match.group(2).replace('amp;','') + ', for artist: ' + artist + ' [' + album + ']'
  59.             link = match.group(4).replace('amp;','')
  60.             amg['AMG_Release_URL'] = url + link
  61.             req = MyOpener().open(url + link).read().decode('latin-1').encode('utf-8')
  62.             rel = re.search('<!--Begin Content-->(.*?)<!--End Center Content-->', req, re.S)
  63.             if chk_trx() == 1: trx = rel.group(1)
  64.             break
  65.  
  66. def re_parse_release():
  67.     if last_arg('_noc_') == None:
  68.         cover = re.search('http://image\.allmusic\.com.*?\.jpg', rel.group(1))
  69.         if cover and not os.path.isfile(out + 'AMG_' + album + '.jpg'):
  70.             urllib.urlretrieve(cover.group(0), out + 'AMG_' + album + '.jpg')
  71.  
  72.     re_parse('AMG_Rating', rel.group(1), 'rating-stars.*?title=\"(.*?) Star')
  73.     re_parse('AMG_Release_Date', rel.group(1), '<span>Release Date.*?(\d{4})')
  74.     re_parse('AMG_Label', rel.group(1), '<span>Label.*?>(\w.*?)</td></tr></table>')
  75.  
  76.     re_parse_multi(re_parse('r', rel.group(1), 'Genre Listing-->(.*)<!--Genre'), 'AMG_Genre', '\d">(.*?)</a>')
  77.     re_parse_multi(re_parse('r', rel.group(1), 'Styles Listing-->(.*)<!--Styles'), 'AMG_Style', '\d">(.*?)</a>')
  78.     re_parse_multi(re_parse('r', rel.group(1), 'Moods Listing-->(.*)<!--Moods'), 'AMG_Mood', '\d">(.*?)</a>')
  79.     re_parse_multi(re_parse('r', rel.group(1), 'Themes Listing-->(.*)<!--Themes'), 'AMG_Theme', '\d">(.*?)</a>')
  80.  
  81.     revw = re_parse('r', rel.group(1), '<td align=\"left\" class=\"title\">.*?\"author\">by\s.*?</td>.*?<p>(.*?)</p>')
  82.     if revw:
  83.         AMG_review = re.sub('<.*?>', '', revw)
  84.         if AMG_review.find('Read More...') > 0:
  85.             more = re_parse('r', '\.\.\. <a href="(.*?)\">Read More\.\.\.</a>')
  86.             more_review = MyOpener().open(url + more).read().decode('latin-1').encode('utf-8')
  87.             AMG_Review = re.sub('<.*?>', '', re_parse('r', more_review, '<td align=\"left\" class=\"title\">.*?\"author\">by\s.*?</td>.*?<p>(.*?)</p>'))
  88.  
  89.         amg['AMG_Review'] = AMG_review + ' AMG Review by ' + re_parse('r', rel.group(1), '<td align=\"left\" class=\"title\">.*?"author\">by\s.(.*?)<')
  90.  
  91.     amg['AMG_Artist_URL'] = url + re_parse('r', rel.group(1), '<span>Artist.*?<a href=\"(.*?)\">').replace('amp;','')
  92.     if last_arg('_dbg_') <> None: print 'Release parsed'
  93.  
  94. def re_parse_tracks():
  95.     for match in re.finditer(r'(^<a href="(.*?)"><.*?)?^<TD class=\"cell\">(\d.*?)</TD>.*?<a href=\".*?\">(\w.*?)</a>.*?^<TD class=\"cell\">\r\n(^(<a.*?>)?(\w.*?)?(</.*?)?\r\n)?', trx, re.S|re.M):
  96.         if match.group(3):
  97.             if last_arg('_dbg_') <> None: print 'Track ' + match.group(3) + ' regex succesful'
  98.             auth = None
  99.             perf = None
  100.             revw = None
  101.             #if strip_white(match.group(4)) == strip_white(title):
  102.             if match.group(6) is None: auth = match.group(7)
  103.             else:
  104.                 if match.group(6): perf = match.group(7)
  105.             if match.group(2): revw = re_song_review(url + match.group(2).replace('amp;', ''))
  106.             #else: name = 'No match'
  107.             amg_trx[match.group(3)] = {'Track' : match.group(3), 'Title' : match.group(4), 'Composer' : auth, 'Performer' : perf, 'Song_Review' : revw}
  108.         else: print 'Please report this release: ' + album + ' by ' + artist
  109.  
  110. def read_tempfile():
  111.     global amg
  112.     f = open(tmp)
  113.     amg = pickle.load(f)
  114.     f.close
  115.     if os.path.isfile(tmp.replace('.tmp','t.tmp')):
  116.         global amg_trx
  117.         g = open(tmp.replace('.tmp','t.tmp'))
  118.         amg_trx = pickle.load(g)
  119.         g.close
  120.     if last_arg('_dbg_') <> None: print 'Temp file read'
  121.  
  122. def write_tempfile():
  123.     if last_arg('_dbg_') <> None: print 'Writting temp file...'
  124.     if os.path.isfile(py):
  125.         h = open(py)
  126.         tpy = str(h.readline())
  127.         if not tpy == tmp and os.path.isfile(tpy):
  128.             os.remove(tpy)
  129.             if os.path.isfile(tpy.replace('.tmp','t.tmp')):
  130.                 os.remove(tpy.replace('.tmp','t.tmp'))
  131.         h.close
  132.     f = open(tmp, 'w')
  133.     pickle.dump(amg,f)
  134.     f.close
  135.     if chk_trx() == 1 :
  136.         g = open(tmp.replace('.tmp','t.tmp'), 'w')
  137.         pickle.dump(amg_trx,g)
  138.         g.close
  139.     h = open(py, 'w')
  140.     h.write(tmp)
  141.     h.close
  142.     if last_arg('_dbg_') <> None: print 'Writting temp file done'
  143.  
  144. def get_data():
  145.     global rel, amg, amg_trx
  146.     if last_arg('_dbg_') <> None: print 'Getting data...'
  147.     rel = None
  148.     go()
  149.     if rel <> None:
  150.         if last_arg('_dbg_') <> None: print 'Release found'
  151.         try: re_parse_release()
  152.         except: pass
  153.         if chk_trx() == 1:
  154.             try: re_parse_tracks()
  155.             except: pass
  156.     write_tempfile()
  157.  
  158. def process_data():
  159.     if last_arg('_dbg_') <> None: print 'Processing...'
  160.     arg = ' /tag:'
  161.     args = []
  162.     for key, value in dict.items(amg): args.append(key + ': ' + clean_up(value))
  163.     if os.path.isfile(foo) and last_arg('_nofoo_') == None:
  164.         if chk_trx() == 1:
  165.             for i in amg_trx.keys():
  166.                 if int(amg_trx[i]['Track']) == int(track):
  167.                     for j in ('Composer', 'Performer', 'Song_Review'):
  168.                         if amg_trx[i][j] <> None: arg += j + '=' + '"' + clean_up(amg_trx[i][j]) + '";'
  169.         arg += 'AMG="' + '\\'.join(args) + '" "'
  170.         if last_arg('_dbg_') <> None: print 'Starting subprocess...'
  171.         if args: subprocess.call(q(str(foo)) + arg + str(path) + '"')
  172.         #subprocess.Popen(args = arg + path, executable = foo, stdin=subprocess.PIPE)
  173.         if last_arg('_dbg_') <> None: print 'Subprocess ended \r\n'
  174.     else:
  175.         if last_arg('_dbg_') <> None: print 'Writing csv...'
  176.         if not os.path.isfile(out + album + '.amg_release.csv'):
  177.             f = open(out + album + '.amg_release.csv', 'w')
  178.             for key, value in dict.items(amg): f.write(key + '\t' + value + '\r')
  179.             f.close
  180.             if chk_trx() == 1:
  181.                 g = open(out + album + '.amg_tracks.csv', 'w')
  182.                 csvs = []
  183.                 for i in amg_trx.keys():
  184.                     csv = ''
  185.                     for j in ('Track', 'Title', 'Composer', 'Performer', 'Song_Review'):
  186.                         if not amg_trx[i][j] is None: csv += j + ':' + amg_trx[i][j] + '\t'
  187.                     csvs.append(csv.rstrip('\t'))
  188.                 csvs.sort()
  189.                 g.write('\r'.join(csvs))
  190.                 g.close
  191.             if last_arg('_dbg_') <> None: print 'Writing csv done /r/n'
  192.  
  193. def chk_trx():
  194.     if state == None:
  195.         if track <> '': return 1
  196.  
  197. def get_args():
  198.     if last_arg('_dbg_') <> None: print 'Getting tags'
  199.     try:
  200.         global path, artist, album
  201.         path = sys.argv[1]
  202.         artist = sys.argv[2]
  203.         album = sys.argv[3]
  204.         if sys.argv[4] == '-track':
  205.             global track, title
  206.             track = sys.argv[5]
  207.             title = sys.argv[6]
  208.         start()
  209.     except: usage()
  210.  
  211. def start():
  212.     global tmp, out, amg, amg_trx
  213.     tmp = os.environ['TEMP'] + '\\' + str(abs(hash(album))) + '.tmp'
  214.     out = re.search(r'.*\\', path).group(0)
  215.     amg = {}
  216.     amg_trx = {}
  217.     if last_arg('_dbg_') <> None:
  218.         for i in range(len(sys.argv)): print 'sys.argv[%d] = %s' % (i, sys.argv[i])
  219.         for j in py, tmp, tmp.replace('.tmp','t.tmp'):
  220.             if os.path.isfile(j): print j + ' [exist]'
  221.             else: print j + ' [not there]'
  222.     if os.path.isfile(tmp): read_tempfile()
  223.     else: get_data()
  224.     process_data()
  225.  
  226. py = os.environ['TEMP'] + '\\' + '_py_amg_'
  227. url = 'http://www.allmusic.com'
  228. state = None
  229. if last_arg('_nosr_') <> None: state = 0
  230.  
  231. try:
  232.     fb2k = win32com.client.Dispatch("Foobar2000.Application.0.7")
  233.     foo = fb2k.ApplicationPath
  234.     pls = fb2k.Playlists
  235.     for i in pls:
  236.         if i.Name.lower() == 'amg':
  237.             for j in i.GetTracks():
  238.                 path = j.Path.replace('file://','').encode('ascii','ignore')
  239.                 artist = j.FormatTitle("%album artist%").encode('ascii','ignore')
  240.                 album = j.FormatTitle("%album%").encode('ascii','ignore')
  241.                 track = j.FormatTitle("%tracknumber%")
  242.                 title = j.FormatTitle("%title%").replace(',','')
  243.                 if last_arg('_dbg_') <> None: print 'foo Starting ' + path
  244.                 if os.path.isfile(path): start()
  245. except: get_args()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement