Advertisement
Guest User

Untitled

a guest
Feb 17th, 2016
76
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.14 KB | None | 0 0
  1. import requests
  2. import pylast
  3. import numpy as np
  4. import pandas as pd
  5. from pandas.io.sql import read_sql
  6. from sklearn import preprocessing
  7. import scipy
  8. import psycopg2 as psy
  9. import sys
  10. import operator
  11.  
  12. def Connect():
  13.  
  14.     API_KEY = "myApiKey"
  15.     API_SECRET = "myApiSecret"
  16.  
  17.     # Authenticate
  18.     username = "myUserName"
  19.     password_hash = pylast.md5("myPass")
  20.  
  21.     network = pylast.LastFMNetwork(api_key = API_KEY, api_secret =
  22.         API_SECRET, username = username, password_hash = password_hash)
  23.  
  24.     return network
  25.  
  26. def dbCon():
  27.     con = psy.connect(dbname='myDbName', user='postgres', host='localhost', password='myDbPass')
  28.     cur = con.cursor()
  29.     return cur
  30.  
  31. def getTags(artist1, artist2, artist3, artist4, artist5):
  32.  
  33.  
  34.     # User given artists from web form
  35.     user_artist1 = artist1
  36.     user_artist2 = artist2
  37.     user_artist3 = artist3
  38.     user_artist4 = artist4
  39.     user_artist5 = artist5
  40.  
  41.     # Connect to API
  42.     network = Connect()
  43.  
  44.     # Find artists
  45.     get_artist1 = network.get_artist(user_artist1)
  46.     get_artist2 = network.get_artist(user_artist2)
  47.     get_artist3 = network.get_artist(user_artist3)
  48.     get_artist4 = network.get_artist(user_artist4)
  49.     get_artist5 = network.get_artist(user_artist5)
  50.  
  51.     # Get artist tags
  52.     info1 = get_artist1.get_top_tags(limit=5)
  53.     info2 = get_artist2.get_top_tags(limit=5)
  54.     info3 = get_artist3.get_top_tags(limit=5)
  55.     info4 = get_artist4.get_top_tags(limit=5)
  56.     info5 = get_artist5.get_top_tags(limit=5)
  57.  
  58.     # Initialize empty tag list
  59.     tag_list = []  
  60.     tag_weight = []
  61.  
  62.     # For each artist, add in top 5 tags and weights of those tags
  63.     for tag in info1:
  64.         tag_list.append(str(tag[0]))
  65.         tag_list.append(tag[1])
  66.  
  67.     for tag2 in info2:
  68.         tag_list.append(str(tag2[0]))
  69.         tag_list.append(tag2[1])
  70.  
  71.     for tag3 in info3:
  72.         tag_list.append(str(tag3[0]))
  73.         tag_list.append(tag3[1])
  74.  
  75.     for tag4 in info4:
  76.         tag_list.append(str(tag4[0]))
  77.         tag_list.append(tag4[1])
  78.  
  79.     for tag5 in info5:
  80.         tag_list.append(str(tag5[0]))
  81.         tag_list.append(tag5[1])
  82.  
  83.     return tag_list
  84.  
  85.  
  86.  
  87. def Convert(artist1, artist2, artist3, artist4, artist5):
  88.  
  89.     tag_list = getTags(artist1, artist2, artist3, artist4, artist5)
  90.     tag_name = []
  91.     tag_weight = []
  92.     d = {}
  93.  
  94.     # Separate the tag list into two lists: names and weights
  95.     i = 2
  96.     for tag in tag_list:
  97.         if i % 2 == 0:
  98.             tag_name.append(tag)
  99.         else:
  100.             tag_weight.append(int(tag))
  101.         i+=1
  102.  
  103.     for tag2, weight in zip(tag_name, tag_weight):
  104.         if tag2 in d:
  105.             d[tag2] += weight
  106.         else:
  107.             d[tag2] = weight
  108.  
  109.     new_tags = [tag for tag in d]
  110.     new_weights = [d[tag] for tag in new_tags]
  111.  
  112.     # Combine the two lists into a single DF
  113.     tag_frame = pd.DataFrame({'tag' : new_tags,
  114.         'weight' : new_weights})
  115.    
  116.     return tag_frame
  117.  
  118.  
  119. def Recommend(artist1, artist2, artist3, artist4, artist5):
  120.  
  121.     tag_frame = Convert(artist1, artist2, artist3, artist4, artist5)
  122.  
  123.     # Scale tag weights to 100
  124.     x = tag_frame['weight']
  125.     min_max_scaler = preprocessing.MinMaxScaler()
  126.     x_scaled = min_max_scaler.fit_transform(x)
  127.     df = pd.DataFrame(x_scaled) * 100
  128.     tag_frame['weight'] = df
  129.  
  130.     # When calling API, it would always bring the results in different orders
  131.     # So, I sort the DF and save it as new DF, so it's always in same order
  132.     userFrame = tag_frame.sort('weight', ascending=False)
  133.     userFrame.index = range(1, len(userFrame) + 1)
  134.  
  135.     # Connect to DB and get all of the data stored in a DF
  136.     cur = dbCon()
  137.     cur.execute('SELECT * FROM "Artists"')
  138.     results = cur.fetchall()   
  139.     cur.close()
  140.  
  141.     columns = ['artist', 'tag1', 'weight1', 'tag2', 'weight2',
  142.             'tag3', 'weight3', 'tag4', 'weight4', 'tag5', 'weight5']
  143.  
  144.     # Put DB call into a DF    
  145.     dbFrame = pd.DataFrame(results, columns=columns)
  146.  
  147.     # I strip all whitespace because character fields are set at length of 50
  148.     # So there's a lot of whitespace to be trimmed for comparisons later
  149.     dbFrame["artist"] = dbFrame["artist"].map(str.strip)
  150.     dbFrame["tag1"] = dbFrame["tag1"].map(str.strip)
  151.     dbFrame["tag2"] = dbFrame["tag2"].map(str.strip)
  152.     dbFrame["tag3"] = dbFrame["tag3"].map(str.strip)
  153.     dbFrame["tag4"] = dbFrame["tag4"].map(str.strip)
  154.     dbFrame["tag5"] = dbFrame["tag5"].map(str.strip)
  155.  
  156.     # empty lists to append the tag weight differences in
  157.     # one list for each tag weight in the db
  158.     dif1 = []
  159.     dif2 = []
  160.     dif3 = []
  161.     dif4 = []
  162.     dif5 = []
  163.  
  164.     # Begin loop through both DF's to find tag matches
  165.     i = 1
  166.     j = 0
  167.     for u in userFrame.iterrows():
  168.         for d in dbFrame.iterrows():
  169.             if userFrame['tag'][i] == dbFrame['tag1'][j]:
  170.                 dif1.append(abs(int(userFrame['weight'][i] - int(dbFrame['weight1'][j]))))
  171.             else:
  172.                 dif1.append(100)
  173.             if userFrame['tag'][i] == dbFrame['tag2'][j]:
  174.                 dif2.append(abs(int(userFrame['weight'][i] - int(dbFrame['weight2'][j]))))
  175.             else:
  176.                 dif2.append(100)
  177.             if userFrame['tag'][i] == dbFrame['tag3'][j]:
  178.                 dif3.append(abs(int(userFrame['weight'][i] - int(dbFrame['weight3'][j]))))
  179.             else:
  180.                 dif3.append(100)
  181.             if userFrame['tag'][i] == dbFrame['tag4'][j]:
  182.                 dif4.append(abs(int(userFrame['weight'][i] - int(dbFrame['weight4'][j]))))
  183.             else:
  184.                 dif4.append(100)
  185.             if userFrame['tag'][i] == dbFrame['tag5'][j]:
  186.                 dif5.append(abs(int(userFrame['weight'][i] - int(dbFrame['weight5'][j]))))
  187.             else:
  188.                 dif5.append(100)
  189.             j += 1
  190.         j = 0
  191.         i += 1
  192.  
  193.     # add together weights for tags
  194.     totalDif = [dif1[k]+dif2[k]+dif3[k]+dif4[k]+dif5[k] for k in range(len(dif1))]
  195.     # add together all weights for artists
  196.     finalDif = [sum(totalDif[k::len(dbFrame)]) for k in range(len(dbFrame))]
  197.    
  198.     # put the differences in a data frame and sort it
  199.     d2 = {'weights': finalDif}
  200.     totalDF = pd.DataFrame(d2)
  201.     totalDF_sort =  totalDF.sort('weights', ascending=True)
  202.    
  203.     # get the indexes of the top artists so I can index the dbFrame to get the actual artist names
  204.     top50 = totalDF_sort.head(55)
  205.     topIndex = top50.index.values
  206.  
  207.  
  208.     recArtists = []
  209.     # get recommended artists
  210.    
  211.     for result in topIndex:
  212.         if dbFrame['artist'][result] == artist1:
  213.             pass       
  214.         elif dbFrame['artist'][result] == artist2:
  215.             pass
  216.         elif dbFrame['artist'][result] == artist3:
  217.             pass
  218.         elif dbFrame['artist'][result] == artist4:
  219.             pass
  220.         elif dbFrame['artist'][result] == artist5:
  221.             pass
  222.         else:
  223.             recArtists.append(dbFrame['artist'][result])
  224.  
  225.    
  226.     return recArtists
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement