Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- from scipy.sparse import csr_matrix
- import pymysql
- from fuzzywuzzy import fuzz
- # display results to 3 decimal points, not in scientific notation
- pd.set_option('display.float_format', lambda x: '%.3f' % x)
- conn = pymysql.connect(user='', password='', host='localhost', database='boardgamecollection')
- #query = 'select ur.userid, ur.bggid, ur.rating, ur.ratingmonth from userratings as ur'
- query = '''
- select u.username, gs.name, ur.rating
- from (
- select ur.bggid, count(*) as cntRatings
- from userratings as ur
- group by ur.bggid
- having count(*) > 50
- ) as g
- join userratings as ur on ur.bggid = g.bggid
- join users as u on u.userid = ur.userid
- join games as gs on gs.bggid = g.bggid
- '''
- df = pd.read_sql(query, conn)
- wideGameData = df.pivot_table(index='name', columns='username', values = 'rating').fillna(0)
- wideGameDataSparse = csr_matrix(wideGameData.values)
- from scipy.sparse import csr_matrix
- def save_sparse_csr(filename,array):
- np.savez(filename,data = array.data ,indices=array.indices,
- indptr =array.indptr, shape=array.shape )
- def load_sparse_csr(filename):
- loader = np.load(filename)
- return csr_matrix((loader['data'], loader['indices'], loader['indptr']),
- shape = loader['shape'])
- save_sparse_csr(r'C:\Users\Allison\Downloads\bggWideGameDataSparcse.npz', wideGameDataSparse)
- from sklearn.neighbors import NearestNeighbors
- model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute')
- model_knn.fit(wideGameDataSparse)
- def getRecommendationFromGame(queryGame, gameRatingMatrix, knn_model, k):
- """
- Inputs:
- queryGame: query game name
- gameRatingMatrix: game rating dataframe (not the sparse one, the pandas dataframe)
- knn_model: our previously fitted sklearn knn model
- k: the number of nearest neighbors.
- Prints: Artist recommendations for the query artist
- Returns: None
- """
- query_index = None
- ratio_tuples = []
- for i in gameRatingMatrix.index:
- ratio = fuzz.ratio(i.lower(), queryGame.lower())
- if ratio >= 75:
- current_query_index = gameRatingMatrix.index.tolist().index(i)
- ratio_tuples.append((i, ratio, current_query_index))
- print('Possible matches: {0}\n'.format([(x[0], x[1]) for x in ratio_tuples]))
- try:
- query_index = max(ratio_tuples, key=lambda x: x[1])[2] # get the index of the best artist match in the data
- except:
- print('Your artist didn\'t match any artists in the data. Try again')
- return None
- distances, indices = knn_model.kneighbors(gameRatingMatrix.iloc[query_index, :].values.reshape(1, -1),
- n_neighbors=k + 1)
- for i in range(0, len(distances.flatten())):
- if i == 0:
- print('Recommendations for {0}:\n'.format(gameRatingMatrix.index[query_index]))
- else:
- print('{0}: {1}, with distance of {2}:'.format(i, gameRatingMatrix.index[indices.flatten()[i]],
- distances.flatten()[i]))
- return None
- getRecommendationFromGame('catan', wideGameData, model_knn, k=10)
- getRecommendationFromGame('5-Minute Dungeon', wideGameData, model_knn, k=10)
Add Comment
Please, Sign In to add comment