Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from mutagen.easyid3 import EasyID3
- import os
- import re
- from lxml import html
- import csv
- import glob
- import time
- from fuzzywuzzy import fuzz
- import sys
- from pathlib import Path
- def return_tag_data(mp3file):
- audio = EasyID3(mp3file)
- return audio['artist'][0], audio['title'][0]
- def write_tag_data(mp3file,style):
- audio = EasyID3(mp3file)
- audio['genre'] = style
- audio.save()
- def write_tag_data(mp3file,year):
- audio = EasyID3(mp3file)
- audio['date'] = year
- audio.save()
- def get_style(artist_c, title_c, artist, title):
- s = requests.Session()
- headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
- 'Accept-Encoding': 'gzip, deflate, br',
- 'Accept-Language': 'en-US,en;q=0.9',
- 'Connection': 'keep-alive',
- 'Host': 'www.discogs.com',
- 'Referer': 'https://www.discogs.com/search/advanced',
- 'Upgrade-Insecure-Requests': '1',
- 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'}
- try:
- r = s.get('https://www.discogs.com/search/?type=master&title='+title_c.replace(' ','+')+'&artist='+artist_c.replace(' ','+')+\
- '&label=&track=&catno=&barcode=&anv=&format=&credit=&genre=&style=&country=&year=&submitter=&contributor=&matrix=&advanced=1',headers=headers)
- except:
- time.sleep(2)
- try:
- r = s.get('https://www.discogs.com/search/?type=master&title='+title_c.replace(' ','+')+'&artist='+artist_c.replace(' ','+')+\
- '&label=&track=&catno=&barcode=&anv=&format=&credit=&genre=&style=&country=&year=&submitter=&contributor=&matrix=&advanced=1',headers=headers)
- except:
- r = []
- if r != []:
- if 'find anything in the Discogs database matching your search criteria' not in r.text:
- sections = html.fromstring(r.text).xpath('//div[contains(@class,"card card_large float_fix")]')
- for section in sections:
- x = lambda x: html.fromstring(html.tostring(section)).xpath(x)
- y = lambda y: re.findall(y,str(html.tostring(section)),re.DOTALL)
- new_title = x('//a[@class="search_result_title"]//@title')[0].strip()
- new_artist = y('spanitemprop title="(.+?)"')[0].strip()
- if (fuzz.token_sort_ratio(title.lower().strip(),new_title.lower().strip()) > 50) and (fuzz.token_sort_ratio(artist.lower().strip(),new_artist.lower().strip()) > 50):
- link = "https://www.discogs.com"+x('//a[@class="search_result_title"]//@href')[0].strip()
- headers['Referer'] = 'https://www.discogs.com/search/?type=master&title='+title_c.replace(' ','+')+'&artist='+artist_c.replace(' ','+')+\
- '&label=&track=&catno=&barcode=&anv=&format=&credit=&genre=&style=&country=&year=&submitter=&contributor=&matrix=&advanced=1'
- r1 = s.get(link,headers=headers)
- if '404! Oh no!' not in r1.text:
- style = ", ".join(html.fromstring(r1.text).xpath('//div[contains(text(),"Style:")]/following::div[@class="content"][1]//a//text()'))
- return style
- def clean(string):
- string = re.sub(r'\([^)]*\)', '', string).strip()
- if ',' in string:
- string = string.split(',')[0].strip()
- if '&' in string:
- string = string.split('&')[0].strip()
- return string
- def get_year(artist_c, title_c, artist, title):
- s = requests.Session()
- headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
- 'Accept-Encoding': 'gzip, deflate, br',
- 'Accept-Language': 'en-US,en;q=0.9',
- 'Connection': 'keep-alive',
- 'Host': 'www.discogs.com',
- 'Referer': 'https://www.discogs.com/search/advanced',
- 'Upgrade-Insecure-Requests': '1',
- 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'}
- try:
- r = s.get('https://www.discogs.com/search/?type=master&title='+title_c.replace(' ','+')+'&artist='+artist_c.replace(' ','+')+\
- '&label=&track=&catno=&barcode=&anv=&format=&credit=&genre=&style=&country=&year=&submitter=&contributor=&matrix=&advanced=1',headers=headers)
- except:
- time.sleep(2)
- try:
- r = s.get('https://www.discogs.com/search/?type=master&title='+title_c.replace(' ','+')+'&artist='+artist_c.replace(' ','+')+\
- '&label=&track=&catno=&barcode=&anv=&format=&credit=&genre=&style=&country=&year=&submitter=&contributor=&matrix=&advanced=1',headers=headers)
- except:
- r = []
- if r != []:
- if 'find anything in the Discogs database matching your search criteria' not in r.text:
- sections = html.fromstring(r.text).xpath('//div[contains(@class,"card card_large float_fix")]')
- for section in sections:
- x = lambda x: html.fromstring(html.tostring(section)).xpath(x)
- y = lambda y: re.findall(y,str(html.tostring(section)),re.DOTALL)
- new_title = x('//a[@class="search_result_title"]//@title')[0].strip()
- new_artist = y('spanitemprop title="(.+?)"')[0].strip()
- if (fuzz.token_sort_ratio(title.lower().strip(),new_title.lower().strip()) > 50) and (fuzz.token_sort_ratio(artist.lower().strip(),new_artist.lower().strip()) > 50):
- link = "https://www.discogs.com"+x('//a[@class="search_result_title"]//@href')[0].strip()
- headers['Referer'] = 'https://www.discogs.com/search/?type=master&title='+title_c.replace(' ','+')+'&artist='+artist_c.replace(' ','+')+\
- '&label=&track=&catno=&barcode=&anv=&format=&credit=&genre=&style=&country=&year=&submitter=&contributor=&matrix=&advanced=1'
- r1 = s.get(link,headers=headers)
- if '404! Oh no!' not in r1.text:
- year = ", ".join(html.fromstring(r1.text).xpath('//div[contains(text(),"Year:")]/following::div[@class="content"][1]//a//text()'))
- return year
- def clean(string):
- string = re.sub(r'\([^)]*\)', '', string).strip()
- if ',' in string:
- string = string.split(',')[0].strip()
- if '&' in string:
- string = string.split('&')[0].strip()
- return string
- for mp3file in Path(sys.argv[1]).glob('**/*.mp3'):
- print (mp3file)
- artist, title = return_tag_data(mp3file)
- artist_c = clean(artist)
- title_c = clean(title)
- style = get_style(artist_c, title_c, artist, title)
- year = get_year(artist_c, title_c, artist, title)
- if style != None:
- print ("Artist : {}\nTitle : {}\nStyle : {}\nYear : {}\n".format(artist, title, style, year))
- write_tag_data(mp3file, style)
- write_tag_data(mp3file, year)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement