Untitled

import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
import pymysql
from fuzzywuzzy import fuzz


# display results to 3 decimal points, not in scientific notation
pd.set_option('display.float_format', lambda x: '%.3f' % x)

conn = pymysql.connect(user='', password='', host='localhost', database='boardgamecollection')

#query = 'select ur.userid, ur.bggid, ur.rating, ur.ratingmonth from userratings as ur'
query = '''
select u.username, gs.name, ur.rating
from (
select ur.bggid, count(*) as cntRatings
from userratings as ur
group by ur.bggid
having count(*) > 50
) as g
join userratings as ur on ur.bggid = g.bggid
join users as u on u.userid = ur.userid
join games as gs on gs.bggid = g.bggid
'''
df = pd.read_sql(query, conn)

wideGameData = df.pivot_table(index='name', columns='username', values = 'rating').fillna(0)
wideGameDataSparse = csr_matrix(wideGameData.values)

from scipy.sparse import csr_matrix

def save_sparse_csr(filename,array):
    np.savez(filename,data = array.data ,indices=array.indices,
             indptr =array.indptr, shape=array.shape )

def load_sparse_csr(filename):
    loader = np.load(filename)
    return csr_matrix((loader['data'], loader['indices'], loader['indptr']),
                         shape = loader['shape'])

save_sparse_csr(r'C:\Users\Allison\Downloads\bggWideGameDataSparcse.npz', wideGameDataSparse)


from sklearn.neighbors import NearestNeighbors

model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute')
model_knn.fit(wideGameDataSparse)


def getRecommendationFromGame(queryGame, gameRatingMatrix, knn_model, k):
    """
    Inputs:
    queryGame: query game name
    gameRatingMatrix: game rating dataframe (not the sparse one, the pandas dataframe)
    knn_model: our previously fitted sklearn knn model
    k: the number of nearest neighbors.

    Prints: Artist recommendations for the query artist
    Returns: None
    """
    query_index = None
    ratio_tuples = []

    for i in gameRatingMatrix.index:
        ratio = fuzz.ratio(i.lower(), queryGame.lower())
        if ratio >= 75:
            current_query_index = gameRatingMatrix.index.tolist().index(i)
            ratio_tuples.append((i, ratio, current_query_index))

    print('Possible matches: {0}\n'.format([(x[0], x[1]) for x in ratio_tuples]))

    try:
        query_index = max(ratio_tuples, key=lambda x: x[1])[2]  # get the index of the best artist match in the data
    except:
        print('Your artist didn\'t match any artists in the data. Try again')
        return None

    distances, indices = knn_model.kneighbors(gameRatingMatrix.iloc[query_index, :].values.reshape(1, -1),
                                              n_neighbors=k + 1)

    for i in range(0, len(distances.flatten())):
        if i == 0:
            print('Recommendations for {0}:\n'.format(gameRatingMatrix.index[query_index]))
        else:
            print('{0}: {1}, with distance of {2}:'.format(i, gameRatingMatrix.index[indices.flatten()[i]],
                                                     distances.flatten()[i]))

    return None


getRecommendationFromGame('catan', wideGameData, model_knn, k=10)
getRecommendationFromGame('5-Minute Dungeon', wideGameData, model_knn, k=10)