Advertisement
Guest User

styleyear-mp3

a guest
Jul 22nd, 2018
91
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.10 KB | None | 0 0
  1. import requests
  2. from mutagen.easyid3 import EasyID3
  3. import os
  4. import re
  5. from lxml import html
  6. import csv
  7. import glob
  8. import time
  9. from fuzzywuzzy import fuzz
  10. import sys
  11. from pathlib import Path
  12.  
  13. def return_tag_data(mp3file):
  14. audio = EasyID3(mp3file)
  15. return audio['artist'][0], audio['title'][0]
  16.  
  17. def write_tag_data(mp3file,style):
  18. audio = EasyID3(mp3file)
  19. audio['genre'] = style
  20. audio.save()
  21.  
  22. def write_tag_data(mp3file,year):
  23. audio = EasyID3(mp3file)
  24. audio['date'] = year
  25. audio.save()
  26.  
  27. def get_style(artist_c, title_c, artist, title):
  28. s = requests.Session()
  29. headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
  30. 'Accept-Encoding': 'gzip, deflate, br',
  31. 'Accept-Language': 'en-US,en;q=0.9',
  32. 'Connection': 'keep-alive',
  33. 'Host': 'www.discogs.com',
  34. 'Referer': 'https://www.discogs.com/search/advanced',
  35. 'Upgrade-Insecure-Requests': '1',
  36. 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'}
  37. try:
  38. r = s.get('https://www.discogs.com/search/?type=master&title='+title_c.replace(' ','+')+'&artist='+artist_c.replace(' ','+')+\
  39. '&label=&track=&catno=&barcode=&anv=&format=&credit=&genre=&style=&country=&year=&submitter=&contributor=&matrix=&advanced=1',headers=headers)
  40. except:
  41. time.sleep(2)
  42. try:
  43. r = s.get('https://www.discogs.com/search/?type=master&title='+title_c.replace(' ','+')+'&artist='+artist_c.replace(' ','+')+\
  44. '&label=&track=&catno=&barcode=&anv=&format=&credit=&genre=&style=&country=&year=&submitter=&contributor=&matrix=&advanced=1',headers=headers)
  45. except:
  46. r = []
  47. if r != []:
  48. if 'find anything in the Discogs database matching your search criteria' not in r.text:
  49. sections = html.fromstring(r.text).xpath('//div[contains(@class,"card card_large float_fix")]')
  50. for section in sections:
  51. x = lambda x: html.fromstring(html.tostring(section)).xpath(x)
  52. y = lambda y: re.findall(y,str(html.tostring(section)),re.DOTALL)
  53. new_title = x('//a[@class="search_result_title"]//@title')[0].strip()
  54. new_artist = y('spanitemprop title="(.+?)"')[0].strip()
  55. if (fuzz.token_sort_ratio(title.lower().strip(),new_title.lower().strip()) > 50) and (fuzz.token_sort_ratio(artist.lower().strip(),new_artist.lower().strip()) > 50):
  56. link = "https://www.discogs.com"+x('//a[@class="search_result_title"]//@href')[0].strip()
  57. headers['Referer'] = 'https://www.discogs.com/search/?type=master&title='+title_c.replace(' ','+')+'&artist='+artist_c.replace(' ','+')+\
  58. '&label=&track=&catno=&barcode=&anv=&format=&credit=&genre=&style=&country=&year=&submitter=&contributor=&matrix=&advanced=1'
  59. r1 = s.get(link,headers=headers)
  60. if '404! Oh no!' not in r1.text:
  61. style = ", ".join(html.fromstring(r1.text).xpath('//div[contains(text(),"Style:")]/following::div[@class="content"][1]//a//text()'))
  62. return style
  63.  
  64. def clean(string):
  65. string = re.sub(r'\([^)]*\)', '', string).strip()
  66. if ',' in string:
  67. string = string.split(',')[0].strip()
  68. if '&' in string:
  69. string = string.split('&')[0].strip()
  70. return string
  71.  
  72. def get_year(artist_c, title_c, artist, title):
  73. s = requests.Session()
  74. headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
  75. 'Accept-Encoding': 'gzip, deflate, br',
  76. 'Accept-Language': 'en-US,en;q=0.9',
  77. 'Connection': 'keep-alive',
  78. 'Host': 'www.discogs.com',
  79. 'Referer': 'https://www.discogs.com/search/advanced',
  80. 'Upgrade-Insecure-Requests': '1',
  81. 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'}
  82. try:
  83. r = s.get('https://www.discogs.com/search/?type=master&title='+title_c.replace(' ','+')+'&artist='+artist_c.replace(' ','+')+\
  84. '&label=&track=&catno=&barcode=&anv=&format=&credit=&genre=&style=&country=&year=&submitter=&contributor=&matrix=&advanced=1',headers=headers)
  85. except:
  86. time.sleep(2)
  87. try:
  88. r = s.get('https://www.discogs.com/search/?type=master&title='+title_c.replace(' ','+')+'&artist='+artist_c.replace(' ','+')+\
  89. '&label=&track=&catno=&barcode=&anv=&format=&credit=&genre=&style=&country=&year=&submitter=&contributor=&matrix=&advanced=1',headers=headers)
  90. except:
  91. r = []
  92. if r != []:
  93. if 'find anything in the Discogs database matching your search criteria' not in r.text:
  94. sections = html.fromstring(r.text).xpath('//div[contains(@class,"card card_large float_fix")]')
  95. for section in sections:
  96. x = lambda x: html.fromstring(html.tostring(section)).xpath(x)
  97. y = lambda y: re.findall(y,str(html.tostring(section)),re.DOTALL)
  98. new_title = x('//a[@class="search_result_title"]//@title')[0].strip()
  99. new_artist = y('spanitemprop title="(.+?)"')[0].strip()
  100. if (fuzz.token_sort_ratio(title.lower().strip(),new_title.lower().strip()) > 50) and (fuzz.token_sort_ratio(artist.lower().strip(),new_artist.lower().strip()) > 50):
  101. link = "https://www.discogs.com"+x('//a[@class="search_result_title"]//@href')[0].strip()
  102. headers['Referer'] = 'https://www.discogs.com/search/?type=master&title='+title_c.replace(' ','+')+'&artist='+artist_c.replace(' ','+')+\
  103. '&label=&track=&catno=&barcode=&anv=&format=&credit=&genre=&style=&country=&year=&submitter=&contributor=&matrix=&advanced=1'
  104. r1 = s.get(link,headers=headers)
  105. if '404! Oh no!' not in r1.text:
  106. year = ", ".join(html.fromstring(r1.text).xpath('//div[contains(text(),"Year:")]/following::div[@class="content"][1]//a//text()'))
  107. return year
  108.  
  109.  
  110. def clean(string):
  111. string = re.sub(r'\([^)]*\)', '', string).strip()
  112. if ',' in string:
  113. string = string.split(',')[0].strip()
  114. if '&' in string:
  115. string = string.split('&')[0].strip()
  116. return string
  117.  
  118. for mp3file in Path(sys.argv[1]).glob('**/*.mp3'):
  119. print (mp3file)
  120. artist, title = return_tag_data(mp3file)
  121. artist_c = clean(artist)
  122. title_c = clean(title)
  123. style = get_style(artist_c, title_c, artist, title)
  124. year = get_year(artist_c, title_c, artist, title)
  125. if style != None:
  126. print ("Artist : {}\nTitle : {}\nStyle : {}\nYear : {}\n".format(artist, title, style, year))
  127. write_tag_data(mp3file, style)
  128. write_tag_data(mp3file, year)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement