Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from urllib.request import urlopen
- import csv
- import pickle
- from PyLyrics import *
- import bs4 as bs
- year = 1957
- def splitSong(song):
- if " / " in song:
- splitSong = song.split(" / ",1)[0]
- #print("Song split went from", song, "to", splitSong)
- return splitSong
- #elif "(" in song:
- # splitSong = song.split("(",1)[0]
- # print("Song split went from", song, "to", splitSong)
- # return splitSong
- else:
- return song
- def splitArtist(artist):
- if "/" in artist:
- splitArtist = artist.split("/",1)[0]
- print("Artist split went from", artist, "to", splitArtist)
- artist = splitArtist
- if " & " in artist:
- splitArtist = artist.split(" & ",1)[0]
- print("Artist split went from", artist, "to", splitArtist)
- artist = splitArtist
- if " feat " in artist:
- splitArtist = artist.split(" feat ",1)[0]
- print("Artist split went from", artist, "to", splitArtist)
- return splitArtist
- elif " ft " in artist:
- splitArtist = artist.split(" ft ",1)[0]
- print("Artist split went from", artist, "to", splitArtist)
- return splitArtist
- elif " featuring " in artist:
- splitArtist = artist.split(" featuring ",1)[0]
- print("Artist split went from", artist, "to", splitArtist)
- return splitArtist
- elif " ft. " in artist:
- splitArtist = artist.split(" ft. ",1)[0]
- print("Artist split went from", artist, "to", splitArtist)
- return splitArtist
- elif " feat. " in artist:
- splitArtist = artist.split(" feat. ",1)[0]
- print("Artist split went from", artist, "to", splitArtist)
- return splitArtist
- elif " Feat " in artist:
- splitArtist = artist.split(" Feat ",1)[0]
- print("Artist split went from", artist, "to", splitArtist)
- return splitArtist
- elif " Ft " in artist:
- splitArtist = artist.split(" Ft ",1)[0]
- print("Artist split went from", artist, "to", splitArtist)
- return splitArtist
- elif " Featuring " in artist:
- splitArtist = artist.split(" Featuring ",1)[0]
- print("Artist split went from", artist, "to", splitArtist)
- return splitArtist
- elif " Ft. " in artist:
- splitArtist = artist.split(" Ft. ",1)[0]
- print("Artist split went from", artist, "to", splitArtist)
- return splitArtist
- elif " Feat. " in artist:
- splitArtist = artist.split(" Feat. ",1)[0]
- print("Artist split went from", artist, "to", splitArtist)
- return splitArtist
- else:
- return artist
- nr_of_songs = 0
- while year < 1961:
- src = urlopen('http://www.uk-charts.top-source.info/top-100-' + str(year) + '.shtml').read()
- soup = bs.BeautifulSoup(src, 'lxml')
- table = soup.table
- data = []
- for row in soup.findAll('table')[0].tbody.findAll('tr'):
- artist = row.findAll('td')[1].text
- song = row.findAll('td')[2].text
- if(year == 2009):
- data.append((splitArtist(artist.lower()), splitSong(song.lower())))
- else:
- data.append((splitArtist(artist), splitSong(song)))
- mylist = []
- myfailedlist = []
- failedArtist = ""
- i = 0;
- for d in data:
- try:
- music_info = PyLyrics.getLyrics(d[0], d[1])
- music_info = music_info.replace('\n','. ')
- mylist.append(music_info)
- except:
- failedArtist = d[0] + d[1]
- print(failedArtist)
- myfailedlist.append(d)
- file_name = "songs" + str(year) + ".pickle"
- file = open(file_name,"wb+")
- pickle.dump(mylist, file)
- file.close()
- print(str(year) + " is finished")
- print(len(myfailedlist))
- if len(myfailedlist) > nr_of_songs:
- nr_of_songs = len(myfailedlist)
- filename = "fail" + str(year) + ".csv"
- with open(filename, 'w+') as csvFile:
- writer = csv.writer(csvFile)
- artist_and_song = failedArtist
- writer.writerows(myfailedlist)
- csvFile.close()
- year += 1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement