Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import math
- oceniPoKorisnici = {
- 'Lisa Rose': {'Catch Me If You Can': 3.0, 'Snakes on a Plane': 3.5, 'Superman Returns': 3.5,
- 'You, Me and Dupree': 2.5, 'The Night Listener': 3.0, 'Snitch': 3.0},
- 'Gene Seymour': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5, 'Just My Luck': 1.5, 'The Night Listener': 3.0,
- 'You, Me and Dupree': 3.5},
- 'Michael Phillips': {'Catch Me If You Can': 2.5, 'Lady in the Water': 2.5, 'Superman Returns': 3.5,
- 'The Night Listener': 4.0, 'Snitch': 2.0},
- 'Claudia Puig': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0, 'The Night Listener': 4.5, 'Superman Returns': 4.0,
- 'You, Me and Dupree': 2.5},
- 'Mick LaSalle': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0, 'Just My Luck': 2.0, 'Superman Returns': 3.0,
- 'You, Me and Dupree': 2.0},
- 'Jack Matthews': {'Catch Me If You Can': 4.5, 'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
- 'The Night Listener': 3.0, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5, 'Snitch': 4.5},
- 'Toby': {'Snakes on a Plane': 4.5, 'Snitch': 5.0},
- 'Michelle Nichols': {'Just My Luck': 1.0, 'The Night Listener': 4.5, 'You, Me and Dupree': 3.5,
- 'Catch Me If You Can': 2.5, 'Snakes on a Plane': 3.0},
- 'Gary Coleman': {'Lady in the Water': 1.0, 'Catch Me If You Can': 1.5, 'Superman Returns': 1.5,
- 'You, Me and Dupree': 2.0},
- 'Larry': {'Lady in the Water': 3.0, 'Just My Luck': 3.5, 'Snitch': 1.5, 'The Night Listener': 3.5}
- }
- # Vrakja merka za slicnost bazirana na rastojanieto za person1 i person2
- def sim_distance(oceni, person1, person2):
- # Se pravi lista na zaednicki predmeti (filmovi)
- zaednicki = {}
- for item in oceni[person1].keys():
- if item in oceni[person2]:
- zaednicki[item] = 1
- # ako nemaat zaednicki rejtinzi, vrati 0
- if len(zaednicki) == 0: return 0
- # Soberi gi kvadratite na zaednickite razliki
- sum_of_squares = sum([pow(oceni[person1][item] - oceni[person2][item], 2)
- for item in oceni[person1] if item in oceni[person2]])
- ret = 1 / (1 + math.sqrt(sum_of_squares))
- return (round(ret, 3), len(zaednicki))
- def sim_pearson(oceni, person1, person2):
- # Se kreira recnik vo koj ke se cuvaat predmetite (filmovi) koi se oceneti od dvajcata
- # Vo recnikot ni se vazni samo klucevite za da gi cuvame iminjata na filmovite koi se zaednicki, a vrednostite ne ni se vazni
- zaednicki = {}
- for item in oceni[person1]:
- if item in oceni[person2]: zaednicki[item] = 1
- # Se presmetuva brojot na predmeti oceneti od dvajcata
- n = len(zaednicki)
- # Ako nemaat zaednicki predmeti vrati korelacija 0
- if n == 0: return 0
- # Soberi gi zaednickite oceni (rejtinzi) za sekoja licnost posebno
- sum1 = sum([oceni[person1][it] for it in zaednicki])
- sum2 = sum([oceni[person2][it] for it in zaednicki])
- # Soberi gi kvadratite od zaednickite oceni (rejtinzi) za sekoja licnost posebno
- sum1Sq = sum([pow(oceni[person1][it], 2) for it in zaednicki])
- sum2Sq = sum([pow(oceni[person2][it], 2) for it in zaednicki])
- # Soberi gi proizvodite od ocenite na dvete licnosti
- pSum = sum([oceni[person1][it] * oceni[person2][it] for it in zaednicki])
- # Presmetaj go koeficientot na korelacija
- num = pSum - (sum1 * sum2 / n)
- den = math.sqrt((sum1Sq - pow(sum1, 2) / n) * (sum2Sq - pow(sum2, 2) / n))
- if den == 0: return 0
- r = num / den
- # return (round(r, 3), len(zaednicki))
- return round(r, 3)
- def transformPrefs(prefs):
- result = {}
- for person in prefs:
- for item in prefs[person]:
- result.setdefault(item, {})
- # Zameni gi mestata na licnosta i predmetot
- result[item][person] = prefs[person][item]
- return result
- def topMatches(prefs, person, n=5, similarity=sim_pearson):
- scores = [(similarity(prefs, person, other), other)
- for other in prefs if other != person]
- # Se sortira listata vo rastecki redosled
- scores.sort()
- # Se prevrtuva za najslicnite (so najgolema vrednost) da bidat prvi
- scores.reverse()
- return scores[0:n]
- def getUserBasedRecomendations(oceni, korisnik, similarity=sim_pearson):
- rankings = []
- totals = {}
- simSums = {}
- for person in oceni:
- if person == korisnik:
- continue
- bliskost = similarity(oceni, korisnik, person)
- # bliskostVred = bliskost[0]
- if bliskost <= 0:
- continue
- for movie in oceni[person]:
- if movie not in oceni[korisnik] or oceni[korisnik][movie] == 0:
- totals.setdefault(movie, 0)
- # print oceni[person][movie]
- totals[movie] += oceni[person][movie] * bliskost
- simSums.setdefault(movie, 0)
- simSums[movie] += bliskost
- rankings = [(total / simSums[movie], movie) for movie, total in totals.items()]
- rankings.sort()
- rankings.reverse()
- return rankings
- def getItemBasedRecomendations(oceni, korisnik, similarity=sim_pearson):
- oceni_po_film = transformPrefs(oceni)
- similarity_per_item = {}
- for item in oceni[korisnik].keys():
- similar_items = topMatches(oceni_po_film, item, n=None)
- my_rating = oceni[korisnik][item]
- for similarity, item in similar_items:
- if item in oceni[korisnik] or similarity <= 0:
- continue
- similarity_per_item.setdefault(item, [])
- # similarity_per_item[item].append(similarity)
- similarity_per_item[item].append(similarity * my_rating)
- similarity_per_item_avg = []
- for item in similarity_per_item:
- avg_sim = mean(similarity_per_item[item])
- similarity_per_item_avg.append((avg_sim, item))
- similarity_per_item_avg.sort(reverse=True)
- return similarity_per_item_avg
- def mean(x):
- if len(x) == 0: return 0.0
- return sum(x) / len(x)
- def getTopMovies(recommendations, n=3):
- i = 0
- topmov = []
- for movie in recommendations:
- if i == n:
- break
- topmov.append(movie[1])
- i += 1
- return sorted(topmov)
- if __name__ == "__main__":
- korisnik = input()
- userbased = getUserBasedRecomendations(oceniPoKorisnici, korisnik)
- itembased = getItemBasedRecomendations(oceniPoKorisnici, korisnik)
- print 'user-based:', getTopMovies(userbased)
- print 'item-based:', getTopMovies(itembased)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement