Advertisement
Guest User

style-mp3

a guest
Jul 22nd, 2018
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.84 KB | None | 0 0
  1. import requests
  2. from mutagen.easyid3 import EasyID3
  3. import os
  4. import re
  5. from lxml import html
  6. import csv
  7. import glob
  8. import time
  9. from fuzzywuzzy import fuzz
  10. import sys
  11. from pathlib import Path
  12.  
  13. def return_tag_data(mp3file):
  14. audio = EasyID3(mp3file)
  15. return audio['artist'][0], audio['title'][0]
  16.  
  17. def write_tag_data(mp3file,style):
  18. audio = EasyID3(mp3file)
  19. audio['genre'] = style
  20. audio.save()
  21.  
  22. def get_style(artist_c, title_c, artist, title):
  23. s = requests.Session()
  24. headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
  25. 'Accept-Encoding': 'gzip, deflate, br',
  26. 'Accept-Language': 'en-US,en;q=0.9',
  27. 'Connection': 'keep-alive',
  28. 'Host': 'www.discogs.com',
  29. 'Referer': 'https://www.discogs.com/search/advanced',
  30. 'Upgrade-Insecure-Requests': '1',
  31. 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'}
  32. try:
  33. r = s.get('https://www.discogs.com/search/?type=master&title='+title_c.replace(' ','+')+'&artist='+artist_c.replace(' ','+')+\
  34. '&label=&track=&catno=&barcode=&anv=&format=&credit=&genre=&style=&country=&year=&submitter=&contributor=&matrix=&advanced=1',headers=headers)
  35. except:
  36. time.sleep(2)
  37. try:
  38. r = s.get('https://www.discogs.com/search/?type=master&title='+title_c.replace(' ','+')+'&artist='+artist_c.replace(' ','+')+\
  39. '&label=&track=&catno=&barcode=&anv=&format=&credit=&genre=&style=&country=&year=&submitter=&contributor=&matrix=&advanced=1',headers=headers)
  40. except:
  41. r = []
  42. if r != []:
  43. if 'find anything in the Discogs database matching your search criteria' not in r.text:
  44. sections = html.fromstring(r.text).xpath('//div[contains(@class,"card card_large float_fix")]')
  45. for section in sections:
  46. x = lambda x: html.fromstring(html.tostring(section)).xpath(x)
  47. y = lambda y: re.findall(y,str(html.tostring(section)),re.DOTALL)
  48. new_title = x('//a[@class="search_result_title"]//@title')[0].strip()
  49. new_artist = y('spanitemprop title="(.+?)"')[0].strip()
  50. if (fuzz.token_sort_ratio(title.lower().strip(),new_title.lower().strip()) > 50) and (fuzz.token_sort_ratio(artist.lower().strip(),new_artist.lower().strip()) > 50):
  51. link = "https://www.discogs.com"+x('//a[@class="search_result_title"]//@href')[0].strip()
  52. headers['Referer'] = 'https://www.discogs.com/search/?type=master&title='+title_c.replace(' ','+')+'&artist='+artist_c.replace(' ','+')+\
  53. '&label=&track=&catno=&barcode=&anv=&format=&credit=&genre=&style=&country=&year=&submitter=&contributor=&matrix=&advanced=1'
  54. r1 = s.get(link,headers=headers)
  55. if '404! Oh no!' not in r1.text:
  56. style = ", ".join(html.fromstring(r1.text).xpath('//div[contains(text(),"Style:")]/following::div[@class="content"][1]//a//text()'))
  57. return style
  58.  
  59. def clean(string):
  60. string = re.sub(r'\([^)]*\)', '', string).strip()
  61. if ',' in string:
  62. string = string.split(',')[0].strip()
  63. if '&' in string:
  64. string = string.split('&')[0].strip()
  65. return string
  66.  
  67. for mp3file in Path(sys.argv[1]).glob('**/*.mp3'):
  68. print (mp3file)
  69. artist, title = return_tag_data(mp3file)
  70. artist_c = clean(artist)
  71. title_c = clean(title)
  72. style = get_style(artist_c, title_c, artist, title)
  73. if style != None:
  74. print ("Artist : {}\nTitle : {}\nStyle : {}\n".format(artist, title, style))
  75. write_tag_data(mp3file, style)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement