Advertisement
Guest User

Dinara koten

a guest
Nov 20th, 2017
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.44 KB | None | 0 0
  1. import pydelicious
  2. from math import sqrt
  3.  
  4. critics={'Lisa Rose': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5,
  5. 'Just My Luck': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5,
  6. 'The Night Listener': 3.0},
  7. 'Gene Seymour': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5,
  8. 'Just My Luck': 1.5, 'Superman Returns': 5.0, 'The Night Listener': 3.0,
  9. 'You, Michael and Dupree': 3.5},
  10. 'Michael Phillips': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.0,
  11. 'Superman Returns': 3.5, 'The Night Listener': 4.0},
  12. 'Claudia Puig': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0,
  13. 'The Night Listener': 4.5, 'Superman Returns': 4.0,
  14. 'You, Me and Dupree': 2.5},
  15. 'Mick LaSalle': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
  16. 'Just My Luck': 2.0, 'Superman Returns': 3.0, 'The Night Listener': 3.0,
  17. 'You, Me and Dupree': 2.0},
  18. 'Jack Matthews': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
  19. 'The Night Listener': 3.0, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5},
  20. 'Toby': {'Snakes on a Plane':4.5,'You, Me and Dupree':1.0,'Superman Returns':4.0}}
  21.  
  22. def sim_distance(prefs,person1,person2):
  23. si={}
  24. for item in prefs[person1]:
  25. if item in prefs[person2]:
  26. si[item]=1
  27. if len(si)==0: return 0
  28. sum_of_squares=sum([pow(prefs[person1][item]-prefs[person2][item],2)
  29. for item in prefs[person1] if item in prefs[person2]])
  30.  
  31. return 1/(1+sum_of_squares)
  32.  
  33.  
  34.  
  35. def sim_pearson(prefs,p1,p2):
  36. si={}
  37. for item in prefs[p1]:
  38. if item in prefs[p2]: si[item]=1
  39.  
  40. n=len(si)
  41. if n==0: return 0
  42.  
  43. sum1=sum([prefs[p1][it] for it in si])
  44. sum2=sum([prefs[p2][it] for it in si])
  45.  
  46. sum1Sq=sum([pow(prefs[p1][it],2) for it in si])
  47. sum2Sq=sum([pow(prefs[p2][it],2) for it in si])
  48.  
  49.  
  50. pSum=sum([prefs[p1][it]*prefs[p2][it] for it in si])
  51.  
  52. num=pSum-(sum1*sum2/n)
  53. den=sqrt((sum1Sq-pow(sum1,2)/n)*(sum2Sq-pow(sum2,2)/n))
  54. if den==0: return 0
  55. r=num/den
  56. return r
  57.  
  58.  
  59. def topMatches(prefs,person,n=5,similarity=sim_pearson):
  60. scores=[(similarity(prefs,person,other),other)
  61. for other in prefs if other!=person]
  62. scores.sort( )
  63. scores.reverse( )
  64. return scores[0:n]
  65.  
  66. # Gets recommendations for a person by using a weighted average
  67. # of every other user's rankings
  68. def getRecommendations(prefs,person,similarity=sim_pearson):
  69. totals={}
  70. simSums={}
  71. for other in prefs:
  72. if other==person: continue
  73. sim=similarity(prefs,person,other)
  74.  
  75. if sim<=0: continue
  76. for item in prefs[other]:
  77. if item not in prefs[person] or prefs[person][item]==0:
  78. totals.setdefault(item,0)
  79. totals[item]+=prefs[other][item]*sim
  80. simSums.setdefault(item,0)
  81. simSums[item]+=sim
  82.  
  83. rankings=[(total/simSums[item],item) for item,total in totals.items( )]
  84. rankings.sort( )
  85. rankings.reverse( )
  86. return rankings
  87.  
  88.  
  89. def transformPrefs(prefs):
  90. result={}
  91. for person in prefs:
  92. for item in prefs[person]:
  93. result.setdefault(item,{})
  94. result[item][person]=prefs[person][item]
  95. return result
  96.  
  97. def calculateSimilarItems(prefs,n=10):
  98. result={}
  99. itemPrefs=transformPrefs(prefs)
  100. c=0
  101. for item in itemPrefs:
  102. c+=1
  103. if c%100==0: print "%d / %d" % (c,len(itemPrefs))
  104. scores=topMatches(itemPrefs,item,n=n,similarity=sim_distance)
  105. result[item]=scores
  106. return result
  107.  
  108. def getRecommendedItems(prefs,itemMatch,user):
  109. userRatings=prefs[user]
  110. scores={}
  111. totalSim={}
  112. for (item,rating) in userRatings.items( ):
  113. for (similarity,item2) in itemMatch[item]:
  114. if item2 in userRatings: continue
  115. scores.setdefault(item2,0)
  116. scores[item2]+=similarity*rating
  117.  
  118. totalSim.setdefault(item2,0)
  119. totalSim[item2]+=similarity
  120. rankings=[(score/totalSim[item],item) for item,score in scores.items( )]
  121. rankings.sort( )
  122. rankings.reverse( )
  123. return rankings
  124.  
  125.  
  126. def loadMovieLens(path='ml-100k'):
  127. movies={}
  128. for line in open(path+'/u.item'):
  129. (id,title)=line.split('|')[0:2]
  130. movies[id]=title
  131. prefs={}
  132. for line in open(path+'/u.data'):
  133. (user,movieid,rating,ts)=line.split('\t')
  134. prefs.setdefault(user,{})
  135. prefs[user][movies[movieid]]=float(rating)
  136. return prefs
  137.  
  138.  
  139.  
  140.  
  141.  
  142. '''example for testing code'''
  143.  
  144. # itemsim=calculateSimilarItems(critics)
  145. # print(getRecommendedItems(critics,itemsim,'Toby'))
  146. # print(itemsim)
  147.  
  148. # print(sim_distance(critics,'Lisa Rose','Gene Seymour'))
  149. # print(sim_pearson(critics,'Lisa Rose','Gene Seymour'))
  150.  
  151. prefs=loadMovieLens()
  152. print(prefs['87'])
  153. print(getRecommendations(prefs,'87')[0:30])
  154. itemsim=calculateSimilarItems(prefs,n=50)
  155. print(getRecommendedItems(prefs,itemsim,'87')[0:30])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement