Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- oceniPoKorisnici = {
- 'Lisa Rose': {'Catch Me If You Can': 3.0, 'Snakes on a Plane': 3.5, 'Superman Returns': 3.5,
- 'You, Me and Dupree': 2.5, 'The Night Listener': 3.0, 'Snitch': 3.0},
- 'Gene Seymour': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5, 'Just My Luck': 1.5, 'The Night Listener': 3.0,
- 'You, Me and Dupree': 3.5},
- 'Michael Phillips': {'Catch Me If You Can': 2.5, 'Lady in the Water': 2.5, 'Superman Returns': 3.5,
- 'The Night Listener': 4.0, 'Snitch': 2.0},
- 'Claudia Puig': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0, 'The Night Listener': 4.5, 'Superman Returns': 4.0,
- 'You, Me and Dupree': 2.5},
- 'Mick LaSalle': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0, 'Just My Luck': 2.0, 'Superman Returns': 3.0,
- 'You, Me and Dupree': 2.0},
- 'Jack Matthews': {'Catch Me If You Can': 4.5, 'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
- 'The Night Listener': 3.0, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5, 'Snitch': 4.5},
- 'Toby': {'Snakes on a Plane': 4.5, 'Snitch': 5.0},
- 'Michelle Nichols': {'Just My Luck': 1.0, 'The Night Listener': 4.5, 'You, Me and Dupree': 3.5,
- 'Catch Me If You Can': 2.5, 'Snakes on a Plane': 3.0},
- 'Gary Coleman': {'Lady in the Water': 1.0, 'Catch Me If You Can': 1.5, 'Superman Returns': 1.5,
- 'You, Me and Dupree': 2.0},
- 'Larry': {'Lady in the Water': 3.0, 'Just My Luck': 3.5, 'Snitch': 1.5, 'The Night Listener': 3.5}
- }
- from math import *
- import math
- # Vrakja merka za slicnost bazirana na rastojanieto za person1 i person2
- def sim_distance(prefs, person1, person2):
- si = {}
- for item in prefs[person1]:
- if item in prefs[person2]:
- si[item] = 1
- if len(si) == 0: return 0
- sum_of_squares = sum([pow(prefs[person1][item] - prefs[person2][item], 2)
- for item in prefs[person1] if item in prefs[person2]])
- return (round(1 / (1 + math.sqrt(sum_of_squares)), 2))
- def sim_pearson(prefs, person1, person2):
- si = {}
- for item in prefs[person1]:
- if item in prefs[person2]:
- si[item] = 1
- n = len(si)
- if len(si) == 0: return 0
- sum1 = sum([prefs[person1][item] for item in si])
- sum2 = sum([prefs[person2][item] for item in si])
- sum1Sq = sum([pow(prefs[person1][item], 2) for item in si])
- sum2Sq = sum([pow(prefs[person2][item], 2) for item in si])
- pSum = sum([prefs[person1][item] * prefs[person2][item] for item in si])
- num = pSum - (sum1 * sum2 / n)
- den = math.sqrt((sum1Sq - pow(sum1, 2) / n) * (sum2Sq - pow(sum2, 2) / n))
- if den == 0: return 0
- r = num / den
- return (round(r, 2))
- def transformPrefs(prefs):
- result = {}
- for person in prefs:
- for item in prefs[person]:
- result.setdefault(item, {})
- result[item][person] = prefs[person][item]
- return result
- def topMatches(prefs, person, n=5, similarity=sim_pearson):
- scores = [(similarity(prefs, person, other), other)
- for other in prefs if other != person]
- scores.sort()
- scores.reverse()
- return scores[0:n]
- def getRecommendations(prefs, person, similarity=sim_pearson, n=3):
- totals = {}
- simSums = {}
- for other in prefs:
- if other == person: continue
- sim = similarity(prefs, person, other)
- if sim <= 0: continue
- for item in prefs[other]:
- if item not in prefs[person] or prefs[person][item] == 0:
- totals.setdefault(item, 0)
- totals[item] += prefs[other][item] * sim
- simSums.setdefault(item, 0)
- simSums[item] += sim
- rankings = [(total / simSums[item], item) for item, total in totals.items()]
- rankings.sort()
- rankings.reverse()
- return rankings[0:n]
- def getUserBasedRecomendations(oceni, korisnik, similarity=sim_pearson):
- rankings = []
- slicni = getRecommendations(oceni, korisnik)
- for item in slicni:
- rankings.append(item[1])
- rankings = rankings[0:3]
- rankings.sort()
- return rankings
- def getItemBasedRecomendations(oceni, korisnik, similarity=sim_pearson):
- rankings = []
- itembased = transformPrefs(oceni)
- movies = oceniPoKorisnici[korisnik]
- preporaka = []
- for movie in movies:
- slicni = topMatches(itembased, movie)
- for slicen in slicni:
- ocenka = slicen[0]
- if slicen[1] not in movies and ocenka >= 0:
- preporaka.append(slicen)
- preporaka.sort()
- preporaka.reverse()
- preporaka2 = []
- for preporaki in preporaka:
- if preporaki[1] not in preporaka2:
- preporaka2.append(preporaki[1])
- rankings = preporaka2[0:3]
- rankings.sort()
- return rankings
- #def square_rooted(x):
- #return round(sqrt(sum([a * a for a in x])), 3)
- def cosine_similarity(oceni,p1,p2):
- zaednicki = {}
- for item in oceni[p1]:
- if item in oceni[p2]: zaednicki[item] = 1
- numerator = sum([oceni[p1][it] * oceni[p2][it] for it in zaednicki])
- denominator = sqrt(sum([oceni[p1][it]*oceni[p1][it] for it in zaednicki])) * sqrt(sum([oceni[p2][it]*oceni[p2][it] for it in zaednicki]))
- if denominator == 0:
- return -1
- vkupno= round(numerator / float(denominator),2)
- return vkupno
- def transformoceni(oceni):
- result={}
- for person in oceni:
- for item in oceni[person]:
- result.setdefault(item,{})
- # Zameni gi mestata na licnosta i predmetot
- result[item][person]=oceni[person][item]
- return result
- def similarity(recnik,film):
- total={}
- for item in recnik.keys():
- if item!=film:
- total.setdefault(item,{})
- total[item]=(cosine_similarity(recnik,film,item),sim_pearson(recnik,film,item),sim_distance(recnik,film,item))
- return total
- if __name__ == "__main__":
- film = "Catch Me If You Can"
- filmovi=transformoceni(oceniPoKorisnici)
- lista = similarity(filmovi, film)
- lista1 = lista.items()
- lista1.sort()
- for x in lista1:
- print x[0]
- print x[1][0], x[1][1], x[1][2]
- print '\n'
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement