SHARE
TWEET

Untitled

a guest Aug 25th, 2019 83 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. from urllib.request import urlopen
  2. import csv
  3. import pickle
  4. from PyLyrics import *
  5. import bs4 as bs
  6.  
  7. year = 1957
  8.  
  9. def splitSong(song):
  10.     if " / " in song:
  11.         splitSong = song.split(" / ",1)[0]
  12.         #print("Song split went from", song, "to", splitSong)
  13.         return splitSong
  14.     #elif "(" in song:
  15.     #    splitSong = song.split("(",1)[0]
  16.     #    print("Song split went from", song, "to", splitSong)
  17.     #    return splitSong
  18.     else:
  19.         return song
  20.    
  21. def splitArtist(artist):
  22.     if "/" in artist:
  23.         splitArtist = artist.split("/",1)[0]
  24.         print("Artist split went from", artist, "to", splitArtist)
  25.         artist = splitArtist
  26.     if " & " in artist:
  27.         splitArtist = artist.split(" & ",1)[0]
  28.         print("Artist split went from", artist, "to", splitArtist)
  29.         artist = splitArtist
  30.     if " feat " in artist:
  31.         splitArtist = artist.split(" feat ",1)[0]
  32.         print("Artist split went from", artist, "to", splitArtist)
  33.         return splitArtist
  34.     elif " ft " in artist:
  35.         splitArtist = artist.split(" ft ",1)[0]
  36.         print("Artist split went from", artist, "to", splitArtist)
  37.         return splitArtist
  38.     elif " featuring " in artist:
  39.         splitArtist = artist.split(" featuring ",1)[0]
  40.         print("Artist split went from", artist, "to", splitArtist)
  41.         return splitArtist
  42.     elif " ft. " in artist:
  43.         splitArtist = artist.split(" ft. ",1)[0]
  44.         print("Artist split went from", artist, "to", splitArtist)
  45.         return splitArtist
  46.     elif " feat. " in artist:
  47.         splitArtist = artist.split(" feat. ",1)[0]
  48.         print("Artist split went from", artist, "to", splitArtist)
  49.         return splitArtist
  50.     elif " Feat " in artist:
  51.         splitArtist = artist.split(" Feat ",1)[0]
  52.         print("Artist split went from", artist, "to", splitArtist)
  53.         return splitArtist
  54.     elif " Ft " in artist:
  55.         splitArtist = artist.split(" Ft ",1)[0]
  56.         print("Artist split went from", artist, "to", splitArtist)
  57.         return splitArtist
  58.     elif " Featuring " in artist:
  59.         splitArtist = artist.split(" Featuring ",1)[0]
  60.         print("Artist split went from", artist, "to", splitArtist)
  61.         return splitArtist
  62.     elif " Ft. " in artist:
  63.         splitArtist = artist.split(" Ft. ",1)[0]
  64.         print("Artist split went from", artist, "to", splitArtist)
  65.         return splitArtist
  66.     elif " Feat. " in artist:
  67.         splitArtist = artist.split(" Feat. ",1)[0]
  68.         print("Artist split went from", artist, "to", splitArtist)
  69.         return splitArtist
  70.     else:
  71.         return artist
  72. nr_of_songs = 0
  73. while year < 1961:
  74.     src = urlopen('http://www.uk-charts.top-source.info/top-100-' + str(year) + '.shtml').read()
  75.     soup = bs.BeautifulSoup(src, 'lxml')
  76.     table = soup.table
  77.     data = []
  78.     for row in soup.findAll('table')[0].tbody.findAll('tr'):
  79.         artist = row.findAll('td')[1].text
  80.         song = row.findAll('td')[2].text
  81.         if(year == 2009):
  82.             data.append((splitArtist(artist.lower()), splitSong(song.lower())))
  83.         else:
  84.             data.append((splitArtist(artist), splitSong(song)))
  85.                  
  86.     mylist = []
  87.     myfailedlist = []
  88.     failedArtist = ""
  89.     i = 0;
  90.     for d in data:
  91.         try:
  92.             music_info = PyLyrics.getLyrics(d[0], d[1])
  93.             music_info = music_info.replace('\n','. ')
  94.             mylist.append(music_info)
  95.         except:
  96.             failedArtist = d[0] + d[1]
  97.             print(failedArtist)
  98.             myfailedlist.append(d)
  99.  
  100.     file_name = "songs" + str(year) + ".pickle"
  101.     file = open(file_name,"wb+")
  102.     pickle.dump(mylist, file)
  103.     file.close()
  104.    
  105.     print(str(year) + " is finished")
  106.     print(len(myfailedlist))
  107.     if len(myfailedlist) > nr_of_songs:
  108.         nr_of_songs = len(myfailedlist)
  109.     filename = "fail" + str(year) + ".csv"
  110.     with open(filename, 'w+') as csvFile:
  111.         writer = csv.writer(csvFile)
  112.         artist_and_song = failedArtist
  113.         writer.writerows(myfailedlist)
  114.     csvFile.close()
  115.    
  116.    
  117.     year += 1
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Not a member of Pastebin yet?
Sign Up, it unlocks many cool features!
 
Top