Guest User

Untitled

a guest
Feb 25th, 2018
123
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.17 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. from scipy.sparse import csr_matrix
  4. import pymysql
  5. from fuzzywuzzy import fuzz
  6.  
  7.  
  8. # display results to 3 decimal points, not in scientific notation
  9. pd.set_option('display.float_format', lambda x: '%.3f' % x)
  10.  
  11. conn = pymysql.connect(user='', password='', host='localhost', database='boardgamecollection')
  12.  
  13. #query = 'select ur.userid, ur.bggid, ur.rating, ur.ratingmonth from userratings as ur'
  14. query = '''
  15. select u.username, gs.name, ur.rating
  16. from (
  17. select ur.bggid, count(*) as cntRatings
  18. from userratings as ur
  19. group by ur.bggid
  20. having count(*) > 50
  21. ) as g
  22. join userratings as ur on ur.bggid = g.bggid
  23. join users as u on u.userid = ur.userid
  24. join games as gs on gs.bggid = g.bggid
  25. '''
  26. df = pd.read_sql(query, conn)
  27.  
  28. wideGameData = df.pivot_table(index='name', columns='username', values = 'rating').fillna(0)
  29. wideGameDataSparse = csr_matrix(wideGameData.values)
  30.  
  31. from scipy.sparse import csr_matrix
  32.  
  33. def save_sparse_csr(filename,array):
  34. np.savez(filename,data = array.data ,indices=array.indices,
  35. indptr =array.indptr, shape=array.shape )
  36.  
  37. def load_sparse_csr(filename):
  38. loader = np.load(filename)
  39. return csr_matrix((loader['data'], loader['indices'], loader['indptr']),
  40. shape = loader['shape'])
  41.  
  42. save_sparse_csr(r'C:\Users\Allison\Downloads\bggWideGameDataSparcse.npz', wideGameDataSparse)
  43.  
  44.  
  45. from sklearn.neighbors import NearestNeighbors
  46.  
  47. model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute')
  48. model_knn.fit(wideGameDataSparse)
  49.  
  50.  
  51. def getRecommendationFromGame(queryGame, gameRatingMatrix, knn_model, k):
  52. """
  53. Inputs:
  54. queryGame: query game name
  55. gameRatingMatrix: game rating dataframe (not the sparse one, the pandas dataframe)
  56. knn_model: our previously fitted sklearn knn model
  57. k: the number of nearest neighbors.
  58.  
  59. Prints: Artist recommendations for the query artist
  60. Returns: None
  61. """
  62. query_index = None
  63. ratio_tuples = []
  64.  
  65. for i in gameRatingMatrix.index:
  66. ratio = fuzz.ratio(i.lower(), queryGame.lower())
  67. if ratio >= 75:
  68. current_query_index = gameRatingMatrix.index.tolist().index(i)
  69. ratio_tuples.append((i, ratio, current_query_index))
  70.  
  71. print('Possible matches: {0}\n'.format([(x[0], x[1]) for x in ratio_tuples]))
  72.  
  73. try:
  74. query_index = max(ratio_tuples, key=lambda x: x[1])[2] # get the index of the best artist match in the data
  75. except:
  76. print('Your artist didn\'t match any artists in the data. Try again')
  77. return None
  78.  
  79. distances, indices = knn_model.kneighbors(gameRatingMatrix.iloc[query_index, :].values.reshape(1, -1),
  80. n_neighbors=k + 1)
  81.  
  82. for i in range(0, len(distances.flatten())):
  83. if i == 0:
  84. print('Recommendations for {0}:\n'.format(gameRatingMatrix.index[query_index]))
  85. else:
  86. print('{0}: {1}, with distance of {2}:'.format(i, gameRatingMatrix.index[indices.flatten()[i]],
  87. distances.flatten()[i]))
  88.  
  89. return None
  90.  
  91.  
  92. getRecommendationFromGame('catan', wideGameData, model_knn, k=10)
  93. getRecommendationFromGame('5-Minute Dungeon', wideGameData, model_knn, k=10)
Add Comment
Please, Sign In to add comment