Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- import pylast
- import numpy as np
- import pandas as pd
- from pandas.io.sql import read_sql
- from sklearn import preprocessing
- import scipy
- import psycopg2 as psy
- import sys
- import operator
- def Connect():
- API_KEY = "myApiKey"
- API_SECRET = "myApiSecret"
- # Authenticate
- username = "myUserName"
- password_hash = pylast.md5("myPass")
- network = pylast.LastFMNetwork(api_key = API_KEY, api_secret =
- API_SECRET, username = username, password_hash = password_hash)
- return network
- def dbCon():
- con = psy.connect(dbname='myDbName', user='postgres', host='localhost', password='myDbPass')
- cur = con.cursor()
- return cur
- def getTags(artist1, artist2, artist3, artist4, artist5):
- # User given artists from web form
- user_artist1 = artist1
- user_artist2 = artist2
- user_artist3 = artist3
- user_artist4 = artist4
- user_artist5 = artist5
- # Connect to API
- network = Connect()
- # Find artists
- get_artist1 = network.get_artist(user_artist1)
- get_artist2 = network.get_artist(user_artist2)
- get_artist3 = network.get_artist(user_artist3)
- get_artist4 = network.get_artist(user_artist4)
- get_artist5 = network.get_artist(user_artist5)
- # Get artist tags
- info1 = get_artist1.get_top_tags(limit=5)
- info2 = get_artist2.get_top_tags(limit=5)
- info3 = get_artist3.get_top_tags(limit=5)
- info4 = get_artist4.get_top_tags(limit=5)
- info5 = get_artist5.get_top_tags(limit=5)
- # Initialize empty tag list
- tag_list = []
- tag_weight = []
- # For each artist, add in top 5 tags and weights of those tags
- for tag in info1:
- tag_list.append(str(tag[0]))
- tag_list.append(tag[1])
- for tag2 in info2:
- tag_list.append(str(tag2[0]))
- tag_list.append(tag2[1])
- for tag3 in info3:
- tag_list.append(str(tag3[0]))
- tag_list.append(tag3[1])
- for tag4 in info4:
- tag_list.append(str(tag4[0]))
- tag_list.append(tag4[1])
- for tag5 in info5:
- tag_list.append(str(tag5[0]))
- tag_list.append(tag5[1])
- return tag_list
- def Convert(artist1, artist2, artist3, artist4, artist5):
- tag_list = getTags(artist1, artist2, artist3, artist4, artist5)
- tag_name = []
- tag_weight = []
- d = {}
- # Separate the tag list into two lists: names and weights
- i = 2
- for tag in tag_list:
- if i % 2 == 0:
- tag_name.append(tag)
- else:
- tag_weight.append(int(tag))
- i+=1
- for tag2, weight in zip(tag_name, tag_weight):
- if tag2 in d:
- d[tag2] += weight
- else:
- d[tag2] = weight
- new_tags = [tag for tag in d]
- new_weights = [d[tag] for tag in new_tags]
- # Combine the two lists into a single DF
- tag_frame = pd.DataFrame({'tag' : new_tags,
- 'weight' : new_weights})
- return tag_frame
- def Recommend(artist1, artist2, artist3, artist4, artist5):
- tag_frame = Convert(artist1, artist2, artist3, artist4, artist5)
- # Scale tag weights to 100
- x = tag_frame['weight']
- min_max_scaler = preprocessing.MinMaxScaler()
- x_scaled = min_max_scaler.fit_transform(x)
- df = pd.DataFrame(x_scaled) * 100
- tag_frame['weight'] = df
- # When calling API, it would always bring the results in different orders
- # So, I sort the DF and save it as new DF, so it's always in same order
- userFrame = tag_frame.sort('weight', ascending=False)
- userFrame.index = range(1, len(userFrame) + 1)
- # Connect to DB and get all of the data stored in a DF
- cur = dbCon()
- cur.execute('SELECT * FROM "Artists"')
- results = cur.fetchall()
- cur.close()
- columns = ['artist', 'tag1', 'weight1', 'tag2', 'weight2',
- 'tag3', 'weight3', 'tag4', 'weight4', 'tag5', 'weight5']
- # Put DB call into a DF
- dbFrame = pd.DataFrame(results, columns=columns)
- # I strip all whitespace because character fields are set at length of 50
- # So there's a lot of whitespace to be trimmed for comparisons later
- dbFrame["artist"] = dbFrame["artist"].map(str.strip)
- dbFrame["tag1"] = dbFrame["tag1"].map(str.strip)
- dbFrame["tag2"] = dbFrame["tag2"].map(str.strip)
- dbFrame["tag3"] = dbFrame["tag3"].map(str.strip)
- dbFrame["tag4"] = dbFrame["tag4"].map(str.strip)
- dbFrame["tag5"] = dbFrame["tag5"].map(str.strip)
- # empty lists to append the tag weight differences in
- # one list for each tag weight in the db
- dif1 = []
- dif2 = []
- dif3 = []
- dif4 = []
- dif5 = []
- # Begin loop through both DF's to find tag matches
- i = 1
- j = 0
- for u in userFrame.iterrows():
- for d in dbFrame.iterrows():
- if userFrame['tag'][i] == dbFrame['tag1'][j]:
- dif1.append(abs(int(userFrame['weight'][i] - int(dbFrame['weight1'][j]))))
- else:
- dif1.append(100)
- if userFrame['tag'][i] == dbFrame['tag2'][j]:
- dif2.append(abs(int(userFrame['weight'][i] - int(dbFrame['weight2'][j]))))
- else:
- dif2.append(100)
- if userFrame['tag'][i] == dbFrame['tag3'][j]:
- dif3.append(abs(int(userFrame['weight'][i] - int(dbFrame['weight3'][j]))))
- else:
- dif3.append(100)
- if userFrame['tag'][i] == dbFrame['tag4'][j]:
- dif4.append(abs(int(userFrame['weight'][i] - int(dbFrame['weight4'][j]))))
- else:
- dif4.append(100)
- if userFrame['tag'][i] == dbFrame['tag5'][j]:
- dif5.append(abs(int(userFrame['weight'][i] - int(dbFrame['weight5'][j]))))
- else:
- dif5.append(100)
- j += 1
- j = 0
- i += 1
- # add together weights for tags
- totalDif = [dif1[k]+dif2[k]+dif3[k]+dif4[k]+dif5[k] for k in range(len(dif1))]
- # add together all weights for artists
- finalDif = [sum(totalDif[k::len(dbFrame)]) for k in range(len(dbFrame))]
- # put the differences in a data frame and sort it
- d2 = {'weights': finalDif}
- totalDF = pd.DataFrame(d2)
- totalDF_sort = totalDF.sort('weights', ascending=True)
- # get the indexes of the top artists so I can index the dbFrame to get the actual artist names
- top50 = totalDF_sort.head(55)
- topIndex = top50.index.values
- recArtists = []
- # get recommended artists
- for result in topIndex:
- if dbFrame['artist'][result] == artist1:
- pass
- elif dbFrame['artist'][result] == artist2:
- pass
- elif dbFrame['artist'][result] == artist3:
- pass
- elif dbFrame['artist'][result] == artist4:
- pass
- elif dbFrame['artist'][result] == artist5:
- pass
- else:
- recArtists.append(dbFrame['artist'][result])
- return recArtists
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement