# -*- coding: utf-8 -*-
import encodings.utf_8
import os, sys, subprocess, urllib, re, pickle, win32com.client
class MyOpener(urllib.FancyURLopener):
version = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11'
def usage(): print 'Usage: AMG file_path artist album [-track tracknumber title]'
def q(enq):
return '"' + enq + '"'
def last_arg(arg):
return re.search(arg, sys.argv[len(sys.argv) - 1])
def strip_white(stp):
stp = stp.replace('amp;','')
stp = re.sub('\W+?','', stp.lower())
return stp
def clean_up(cln):
i = ['\\','/','"','`',';',',']
for j in range(len(i)/2):
cln = cln.replace(i[2*j-2], i[2*j-1])
return cln
def re_parse(dx, dt, rx):
match = re.search(rx, dt)
if match:
if dx == 'r': return match.group(1)
else: amg[dx] = match.group(1)
def re_parse_multi(dx, dt, rx):
if dx <> None:
res = []
match = re.findall(rx, dx, re.M)
if match:
for i in range(len(match)): res.append(match[i])
res = '; '.join(res)
amg[dt] = res
def re_song_review(link):
req = MyOpener().open(link).read().decode('latin-1').encode('utf-8')
res = re.search('Song Review.*?by\s.(.*?)<.*?<p>(.*?)</p>', req, re.S)
if res: return re.sub('<.*?>', '', res.group(2).replace(' ',' ') + ' AMG Song Review by ' + res.group(1))
def go():
src = '/cg/amg.dll?p=amg&sql=%s&P=amg&opt1=2' % (urllib.quote(album))
rx_src = re.compile('(<tr class=\"visible\".*?<td class=\"cell\" style.*?>)(.*?)(<.*?;"><a href=\")(.*?)(\">)', re.M|re.S)
req = MyOpener().open(url + src).read().decode('latin-1').encode('utf-8')
if last_arg('_dbg_') <> None: print '\n\r' + 'Search result: ' + url + src
for match in rx_src.finditer(req):
if strip_white(artist).count(strip_white(match.group(2))) + strip_white(match.group(2)).count(strip_white(artist)) > 0:
global rel, trx
if last_arg('_dbg_') <> None:
print 'AMG artist match: ' + match.group(2).replace('amp;','') + ', for artist: ' + artist + ' [' + album + ']'
link = match.group(4).replace('amp;','')
amg['AMG_Release_URL'] = url + link
req = MyOpener().open(url + link).read().decode('latin-1').encode('utf-8')
rel = re.search('<!--Begin Content-->(.*?)<!--End Center Content-->', req, re.S)
if chk_trx() == 1: trx = rel.group(1)
break
def re_parse_release():
if last_arg('_noc_') == None:
cover = re.search('http://image\.allmusic\.com.*?\.jpg', rel.group(1))
if cover and not os.path.isfile(out + 'AMG_' + album + '.jpg'):
urllib.urlretrieve(cover.group(0), out + 'AMG_' + album + '.jpg')
re_parse('AMG_Rating', rel.group(1), 'rating-stars.*?title=\"(.*?) Star')
re_parse('AMG_Release_Date', rel.group(1), '<span>Release Date.*?(\d{4})')
re_parse('AMG_Label', rel.group(1), '<span>Label.*?>(\w.*?)</td></tr></table>')
re_parse_multi(re_parse('r', rel.group(1), 'Genre Listing-->(.*)<!--Genre'), 'AMG_Genre', '\d">(.*?)</a>')
re_parse_multi(re_parse('r', rel.group(1), 'Styles Listing-->(.*)<!--Styles'), 'AMG_Style', '\d">(.*?)</a>')
re_parse_multi(re_parse('r', rel.group(1), 'Moods Listing-->(.*)<!--Moods'), 'AMG_Mood', '\d">(.*?)</a>')
re_parse_multi(re_parse('r', rel.group(1), 'Themes Listing-->(.*)<!--Themes'), 'AMG_Theme', '\d">(.*?)</a>')
revw = re_parse('r', rel.group(1), '<td align=\"left\" class=\"title\">.*?\"author\">by\s.*?</td>.*?<p>(.*?)</p>')
if revw:
AMG_review = re.sub('<.*?>', '', revw)
if AMG_review.find('Read More...') > 0:
more = re_parse('r', '\.\.\. <a href="(.*?)\">Read More\.\.\.</a>')
more_review = MyOpener().open(url + more).read().decode('latin-1').encode('utf-8')
AMG_Review = re.sub('<.*?>', '', re_parse('r', more_review, '<td align=\"left\" class=\"title\">.*?\"author\">by\s.*?</td>.*?<p>(.*?)</p>'))
amg['AMG_Review'] = AMG_review + ' AMG Review by ' + re_parse('r', rel.group(1), '<td align=\"left\" class=\"title\">.*?"author\">by\s.(.*?)<')
amg['AMG_Artist_URL'] = url + re_parse('r', rel.group(1), '<span>Artist.*?<a href=\"(.*?)\">').replace('amp;','')
if last_arg('_dbg_') <> None: print 'Release parsed'
def re_parse_tracks():
for match in re.finditer(r'(^<a href="(.*?)"><.*?)?^<TD class=\"cell\">(\d.*?)</TD>.*?<a href=\".*?\">(\w.*?)</a>.*?^<TD class=\"cell\">\r\n(^(<a.*?>)?(\w.*?)?(</.*?)?\r\n)?', trx, re.S|re.M):
if match.group(3):
if last_arg('_dbg_') <> None: print 'Track ' + match.group(3) + ' regex succesful'
auth = None
perf = None
revw = None
#if strip_white(match.group(4)) == strip_white(title):
if match.group(6) is None: auth = match.group(7)
else:
if match.group(6): perf = match.group(7)
if match.group(2): revw = re_song_review(url + match.group(2).replace('amp;', ''))
#else: name = 'No match'
amg_trx[match.group(3)] = {'Track' : match.group(3), 'Title' : match.group(4), 'Composer' : auth, 'Performer' : perf, 'Song_Review' : revw}
else: print 'Please report this release: ' + album + ' by ' + artist
def read_tempfile():
global amg
f = open(tmp)
amg = pickle.load(f)
f.close
if os.path.isfile(tmp.replace('.tmp','t.tmp')):
global amg_trx
g = open(tmp.replace('.tmp','t.tmp'))
amg_trx = pickle.load(g)
g.close
if last_arg('_dbg_') <> None: print 'Temp file read'
def write_tempfile():
if last_arg('_dbg_') <> None: print 'Writting temp file...'
if os.path.isfile(py):
h = open(py)
tpy = str(h.readline())
if not tpy == tmp and os.path.isfile(tpy):
os.remove(tpy)
if os.path.isfile(tpy.replace('.tmp','t.tmp')):
os.remove(tpy.replace('.tmp','t.tmp'))
h.close
f = open(tmp, 'w')
pickle.dump(amg,f)
f.close
if chk_trx() == 1 :
g = open(tmp.replace('.tmp','t.tmp'), 'w')
pickle.dump(amg_trx,g)
g.close
h = open(py, 'w')
h.write(tmp)
h.close
if last_arg('_dbg_') <> None: print 'Writting temp file done'
def get_data():
global rel, amg, amg_trx
if last_arg('_dbg_') <> None: print 'Getting data...'
rel = None
go()
if rel <> None:
if last_arg('_dbg_') <> None: print 'Release found'
try: re_parse_release()
except: pass
if chk_trx() == 1:
try: re_parse_tracks()
except: pass
write_tempfile()
def process_data():
if last_arg('_dbg_') <> None: print 'Processing...'
arg = ' /tag:'
args = []
for key, value in dict.items(amg): args.append(key + ': ' + clean_up(value))
if os.path.isfile(foo) and last_arg('_nofoo_') == None:
if chk_trx() == 1:
for i in amg_trx.keys():
if int(amg_trx[i]['Track']) == int(track):
for j in ('Composer', 'Performer', 'Song_Review'):
if amg_trx[i][j] <> None: arg += j + '=' + '"' + clean_up(amg_trx[i][j]) + '";'
arg += 'AMG="' + '\\'.join(args) + '" "'
if last_arg('_dbg_') <> None: print 'Starting subprocess...'
if args: subprocess.call(q(str(foo)) + arg + str(path) + '"')
#subprocess.Popen(args = arg + path, executable = foo, stdin=subprocess.PIPE)
if last_arg('_dbg_') <> None: print 'Subprocess ended \r\n'
else:
if last_arg('_dbg_') <> None: print 'Writing csv...'
if not os.path.isfile(out + album + '.amg_release.csv'):
f = open(out + album + '.amg_release.csv', 'w')
for key, value in dict.items(amg): f.write(key + '\t' + value + '\r')
f.close
if chk_trx() == 1:
g = open(out + album + '.amg_tracks.csv', 'w')
csvs = []
for i in amg_trx.keys():
csv = ''
for j in ('Track', 'Title', 'Composer', 'Performer', 'Song_Review'):
if not amg_trx[i][j] is None: csv += j + ':' + amg_trx[i][j] + '\t'
csvs.append(csv.rstrip('\t'))
csvs.sort()
g.write('\r'.join(csvs))
g.close
if last_arg('_dbg_') <> None: print 'Writing csv done /r/n'
def chk_trx():
if state == None:
if track <> '': return 1
def get_args():
if last_arg('_dbg_') <> None: print 'Getting tags'
try:
global path, artist, album
path = sys.argv[1]
artist = sys.argv[2]
album = sys.argv[3]
if sys.argv[4] == '-track':
global track, title
track = sys.argv[5]
title = sys.argv[6]
start()
except: usage()
def start():
global tmp, out, amg, amg_trx
tmp = os.environ['TEMP'] + '\\' + str(abs(hash(album))) + '.tmp'
out = re.search(r'.*\\', path).group(0)
amg = {}
amg_trx = {}
if last_arg('_dbg_') <> None:
for i in range(len(sys.argv)): print 'sys.argv[%d] = %s' % (i, sys.argv[i])
for j in py, tmp, tmp.replace('.tmp','t.tmp'):
if os.path.isfile(j): print j + ' [exist]'
else: print j + ' [not there]'
if os.path.isfile(tmp): read_tempfile()
else: get_data()
process_data()
py = os.environ['TEMP'] + '\\' + '_py_amg_'
url = 'http://www.allmusic.com'
state = None
if last_arg('_nosr_') <> None: state = 0
try:
fb2k = win32com.client.Dispatch("Foobar2000.Application.0.7")
foo = fb2k.ApplicationPath
pls = fb2k.Playlists
for i in pls:
if i.Name.lower() == 'amg':
for j in i.GetTracks():
path = j.Path.replace('file://','').encode('ascii','ignore')
artist = j.FormatTitle("%album artist%").encode('ascii','ignore')
album = j.FormatTitle("%album%").encode('ascii','ignore')
track = j.FormatTitle("%tracknumber%")
title = j.FormatTitle("%title%").replace(',','')
if last_arg('_dbg_') <> None: print 'foo Starting ' + path
if os.path.isfile(path): start()
except: get_args()