Advertisement
Guest User

Untitled

a guest
Sep 28th, 2016
69
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.06 KB | None | 0 0
  1. import pandas
  2. import operator
  3. import math
  4. import sys
  5.  
  6.  
  7. dataframe = None
  8.  
  9.  
  10. def __avg__(lst):
  11. return sum(lst)/len(lst)
  12.  
  13.  
  14. def define_dataframe(csv_file):
  15. try:
  16. global dataframe
  17. dataframe = pandas.read_csv(csv_file)
  18. except pandas.io.common.EmptyDataError:
  19. raise Exception('Not a valid file.')
  20.  
  21.  
  22. def print_dataframe():
  23. try:
  24. assert dataframe is not None, 'Dataframe was not created.'
  25. print('\n')
  26. print(dataframe)
  27. print('\n')
  28. except AssertionError, e:
  29. print(e.args[0])
  30.  
  31.  
  32. def rate(user, item):
  33. try:
  34. return float(dataframe.get_value(user, item))
  35. except ValueError:
  36. return None
  37. except KeyError:
  38. raise Exception('Not a valid user or item.')
  39.  
  40.  
  41. def item_rates(item, **kwargs):
  42. rates = []
  43. for rate in dataframe[item]:
  44. try:
  45. rates.append(float(rate))
  46. except ValueError:
  47. if kwargs.get('complete', False):
  48. rates.append(None)
  49. return rates
  50.  
  51.  
  52. def user_rates(user, **kwargs):
  53. rates = []
  54. for rate in dataframe.ix[user]:
  55. try:
  56. rates.append(float(rate))
  57. except ValueError:
  58. if kwargs.get('complete', False):
  59. rates.append(None)
  60. return rates
  61.  
  62.  
  63. def sim(user_a, user_b):
  64. user_a_rates = user_rates(user_a, complete=True)
  65. user_b_rates = user_rates(user_b, complete=True)
  66. user_a_avg = __avg__(user_rates(user_a))
  67. user_b_avg = __avg__(user_rates(user_b))
  68.  
  69. numerator = 0
  70. denominator_1 = 0
  71. denominator_2 = 0
  72.  
  73. for element_a, element_b in zip(user_a_rates, user_b_rates):
  74. if element_a is not None and element_b is not None:
  75. numerator += (element_a - user_a_avg)*(element_b - user_b_avg)
  76. denominator_1 += math.pow(element_a - user_a_avg, 2)
  77. denominator_2 += math.pow(element_b - user_b_avg, 2)
  78.  
  79. return numerator/(math.sqrt(denominator_1)*math.sqrt(denominator_2))
  80.  
  81.  
  82. def __user_pred__(similarities, curr_user, curr_item):
  83. numerator = 0
  84. denominator = 0
  85.  
  86. for data_user, sim_ratio in similarities.items():
  87. numerator += sim_ratio * \
  88. (rate(data_user, curr_item) -
  89. __avg__(user_rates(data_user)))
  90. denominator += sim_ratio
  91.  
  92. return __avg__(user_rates(curr_user)) + (numerator / denominator)
  93.  
  94.  
  95. def __item_pred__(similarities, curr_user, curr_item):
  96. return -1.0
  97.  
  98.  
  99. def pred(user, item, **kwargs):
  100. similarities = {}
  101.  
  102. for data_user in dataframe.index.values:
  103. if data_user != user:
  104. similarities[data_user] = sim(user, data_user)
  105.  
  106. if 'N' in kwargs:
  107. try:
  108. assert kwargs['N'] <= len(similarities), \
  109. 'N cannot be larger than the number of rows.'
  110. except AssertionError, e:
  111. print(e.args[0])
  112. sys.exit(0)
  113.  
  114. similarities = dict(sorted(similarities.items(),
  115. key=operator.itemgetter(1),
  116. reverse=True)[:kwargs.get('N', 2)])
  117.  
  118. return (__item_pred__(similarities, user, item),
  119. __user_pred__(similarities, user, item))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement