Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- import encodings.utf_8
- import os, sys, subprocess, urllib, re, pickle, win32com.client
- class MyOpener(urllib.FancyURLopener):
- version = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11'
- def usage(): print 'Usage: AMG file_path artist album [-track tracknumber title]'
- def q(enq):
- return '"' + enq + '"'
- def last_arg(arg):
- return re.search(arg, sys.argv[len(sys.argv) - 1])
- def strip_white(stp):
- stp = stp.replace('amp;','')
- stp = re.sub('\W+?','', stp.lower())
- return stp
- def clean_up(cln):
- i = ['\\','/','"','`',';',',']
- for j in range(len(i)/2):
- cln = cln.replace(i[2*j-2], i[2*j-1])
- return cln
- def re_parse(dx, dt, rx):
- match = re.search(rx, dt)
- if match:
- if dx == 'r': return match.group(1)
- else: amg[dx] = match.group(1)
- def re_parse_multi(dx, dt, rx):
- if dx <> None:
- res = []
- match = re.findall(rx, dx, re.M)
- if match:
- for i in range(len(match)): res.append(match[i])
- res = '; '.join(res)
- amg[dt] = res
- def re_song_review(link):
- req = MyOpener().open(link).read().decode('latin-1').encode('utf-8')
- res = re.search('Song Review.*?by\s.(.*?)<.*?<p>(.*?)</p>', req, re.S)
- if res: return re.sub('<.*?>', '', res.group(2).replace(' ',' ') + ' AMG Song Review by ' + res.group(1))
- def go():
- src = '/cg/amg.dll?p=amg&sql=%s&P=amg&opt1=2' % (urllib.quote(album))
- rx_src = re.compile('(<tr class=\"visible\".*?<td class=\"cell\" style.*?>)(.*?)(<.*?;"><a href=\")(.*?)(\">)', re.M|re.S)
- req = MyOpener().open(url + src).read().decode('latin-1').encode('utf-8')
- if last_arg('_dbg_') <> None: print '\n\r' + 'Search result: ' + url + src
- for match in rx_src.finditer(req):
- if strip_white(artist).count(strip_white(match.group(2))) + strip_white(match.group(2)).count(strip_white(artist)) > 0:
- global rel, trx
- if last_arg('_dbg_') <> None:
- print 'AMG artist match: ' + match.group(2).replace('amp;','') + ', for artist: ' + artist + ' [' + album + ']'
- link = match.group(4).replace('amp;','')
- amg['AMG_Release_URL'] = url + link
- req = MyOpener().open(url + link).read().decode('latin-1').encode('utf-8')
- rel = re.search('<!--Begin Content-->(.*?)<!--End Center Content-->', req, re.S)
- if chk_trx() == 1: trx = rel.group(1)
- break
- def re_parse_release():
- if last_arg('_noc_') == None:
- cover = re.search('http://image\.allmusic\.com.*?\.jpg', rel.group(1))
- if cover and not os.path.isfile(out + 'AMG_' + album + '.jpg'):
- urllib.urlretrieve(cover.group(0), out + 'AMG_' + album + '.jpg')
- re_parse('AMG_Rating', rel.group(1), 'rating-stars.*?title=\"(.*?) Star')
- re_parse('AMG_Release_Date', rel.group(1), '<span>Release Date.*?(\d{4})')
- re_parse('AMG_Label', rel.group(1), '<span>Label.*?>(\w.*?)</td></tr></table>')
- re_parse_multi(re_parse('r', rel.group(1), 'Genre Listing-->(.*)<!--Genre'), 'AMG_Genre', '\d">(.*?)</a>')
- re_parse_multi(re_parse('r', rel.group(1), 'Styles Listing-->(.*)<!--Styles'), 'AMG_Style', '\d">(.*?)</a>')
- re_parse_multi(re_parse('r', rel.group(1), 'Moods Listing-->(.*)<!--Moods'), 'AMG_Mood', '\d">(.*?)</a>')
- re_parse_multi(re_parse('r', rel.group(1), 'Themes Listing-->(.*)<!--Themes'), 'AMG_Theme', '\d">(.*?)</a>')
- revw = re_parse('r', rel.group(1), '<td align=\"left\" class=\"title\">.*?\"author\">by\s.*?</td>.*?<p>(.*?)</p>')
- if revw:
- AMG_review = re.sub('<.*?>', '', revw)
- if AMG_review.find('Read More...') > 0:
- more = re_parse('r', '\.\.\. <a href="(.*?)\">Read More\.\.\.</a>')
- more_review = MyOpener().open(url + more).read().decode('latin-1').encode('utf-8')
- AMG_Review = re.sub('<.*?>', '', re_parse('r', more_review, '<td align=\"left\" class=\"title\">.*?\"author\">by\s.*?</td>.*?<p>(.*?)</p>'))
- amg['AMG_Review'] = AMG_review + ' AMG Review by ' + re_parse('r', rel.group(1), '<td align=\"left\" class=\"title\">.*?"author\">by\s.(.*?)<')
- amg['AMG_Artist_URL'] = url + re_parse('r', rel.group(1), '<span>Artist.*?<a href=\"(.*?)\">').replace('amp;','')
- if last_arg('_dbg_') <> None: print 'Release parsed'
- def re_parse_tracks():
- for match in re.finditer(r'(^<a href="(.*?)"><.*?)?^<TD class=\"cell\">(\d.*?)</TD>.*?<a href=\".*?\">(\w.*?)</a>.*?^<TD class=\"cell\">\r\n(^(<a.*?>)?(\w.*?)?(</.*?)?\r\n)?', trx, re.S|re.M):
- if match.group(3):
- if last_arg('_dbg_') <> None: print 'Track ' + match.group(3) + ' regex succesful'
- auth = None
- perf = None
- revw = None
- #if strip_white(match.group(4)) == strip_white(title):
- if match.group(6) is None: auth = match.group(7)
- else:
- if match.group(6): perf = match.group(7)
- if match.group(2): revw = re_song_review(url + match.group(2).replace('amp;', ''))
- #else: name = 'No match'
- amg_trx[match.group(3)] = {'Track' : match.group(3), 'Title' : match.group(4), 'Composer' : auth, 'Performer' : perf, 'Song_Review' : revw}
- else: print 'Please report this release: ' + album + ' by ' + artist
- def read_tempfile():
- global amg
- f = open(tmp)
- amg = pickle.load(f)
- f.close
- if os.path.isfile(tmp.replace('.tmp','t.tmp')):
- global amg_trx
- g = open(tmp.replace('.tmp','t.tmp'))
- amg_trx = pickle.load(g)
- g.close
- if last_arg('_dbg_') <> None: print 'Temp file read'
- def write_tempfile():
- if last_arg('_dbg_') <> None: print 'Writting temp file...'
- if os.path.isfile(py):
- h = open(py)
- tpy = str(h.readline())
- if not tpy == tmp and os.path.isfile(tpy):
- os.remove(tpy)
- if os.path.isfile(tpy.replace('.tmp','t.tmp')):
- os.remove(tpy.replace('.tmp','t.tmp'))
- h.close
- f = open(tmp, 'w')
- pickle.dump(amg,f)
- f.close
- if chk_trx() == 1 :
- g = open(tmp.replace('.tmp','t.tmp'), 'w')
- pickle.dump(amg_trx,g)
- g.close
- h = open(py, 'w')
- h.write(tmp)
- h.close
- if last_arg('_dbg_') <> None: print 'Writting temp file done'
- def get_data():
- global rel, amg, amg_trx
- if last_arg('_dbg_') <> None: print 'Getting data...'
- rel = None
- go()
- if rel <> None:
- if last_arg('_dbg_') <> None: print 'Release found'
- try: re_parse_release()
- except: pass
- if chk_trx() == 1:
- try: re_parse_tracks()
- except: pass
- write_tempfile()
- def process_data():
- if last_arg('_dbg_') <> None: print 'Processing...'
- arg = ' /tag:'
- args = []
- for key, value in dict.items(amg): args.append(key + ': ' + clean_up(value))
- if os.path.isfile(foo) and last_arg('_nofoo_') == None:
- if chk_trx() == 1:
- for i in amg_trx.keys():
- if int(amg_trx[i]['Track']) == int(track):
- for j in ('Composer', 'Performer', 'Song_Review'):
- if amg_trx[i][j] <> None: arg += j + '=' + '"' + clean_up(amg_trx[i][j]) + '";'
- arg += 'AMG="' + '\\'.join(args) + '" "'
- if last_arg('_dbg_') <> None: print 'Starting subprocess...'
- if args: subprocess.call(q(str(foo)) + arg + str(path) + '"')
- #subprocess.Popen(args = arg + path, executable = foo, stdin=subprocess.PIPE)
- if last_arg('_dbg_') <> None: print 'Subprocess ended \r\n'
- else:
- if last_arg('_dbg_') <> None: print 'Writing csv...'
- if not os.path.isfile(out + album + '.amg_release.csv'):
- f = open(out + album + '.amg_release.csv', 'w')
- for key, value in dict.items(amg): f.write(key + '\t' + value + '\r')
- f.close
- if chk_trx() == 1:
- g = open(out + album + '.amg_tracks.csv', 'w')
- csvs = []
- for i in amg_trx.keys():
- csv = ''
- for j in ('Track', 'Title', 'Composer', 'Performer', 'Song_Review'):
- if not amg_trx[i][j] is None: csv += j + ':' + amg_trx[i][j] + '\t'
- csvs.append(csv.rstrip('\t'))
- csvs.sort()
- g.write('\r'.join(csvs))
- g.close
- if last_arg('_dbg_') <> None: print 'Writing csv done /r/n'
- def chk_trx():
- if state == None:
- if track <> '': return 1
- def get_args():
- if last_arg('_dbg_') <> None: print 'Getting tags'
- try:
- global path, artist, album
- path = sys.argv[1]
- artist = sys.argv[2]
- album = sys.argv[3]
- if sys.argv[4] == '-track':
- global track, title
- track = sys.argv[5]
- title = sys.argv[6]
- start()
- except: usage()
- def start():
- global tmp, out, amg, amg_trx
- tmp = os.environ['TEMP'] + '\\' + str(abs(hash(album))) + '.tmp'
- out = re.search(r'.*\\', path).group(0)
- amg = {}
- amg_trx = {}
- if last_arg('_dbg_') <> None:
- for i in range(len(sys.argv)): print 'sys.argv[%d] = %s' % (i, sys.argv[i])
- for j in py, tmp, tmp.replace('.tmp','t.tmp'):
- if os.path.isfile(j): print j + ' [exist]'
- else: print j + ' [not there]'
- if os.path.isfile(tmp): read_tempfile()
- else: get_data()
- process_data()
- py = os.environ['TEMP'] + '\\' + '_py_amg_'
- url = 'http://www.allmusic.com'
- state = None
- if last_arg('_nosr_') <> None: state = 0
- try:
- fb2k = win32com.client.Dispatch("Foobar2000.Application.0.7")
- foo = fb2k.ApplicationPath
- pls = fb2k.Playlists
- for i in pls:
- if i.Name.lower() == 'amg':
- for j in i.GetTracks():
- path = j.Path.replace('file://','').encode('ascii','ignore')
- artist = j.FormatTitle("%album artist%").encode('ascii','ignore')
- album = j.FormatTitle("%album%").encode('ascii','ignore')
- track = j.FormatTitle("%tracknumber%")
- title = j.FormatTitle("%title%").replace(',','')
- if last_arg('_dbg_') <> None: print 'foo Starting ' + path
- if os.path.isfile(path): start()
- except: get_args()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement