Advertisement
TwiNNeR

snzl5z1

Dec 4th, 2016
97
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.56 KB | None | 0 0
  1. import math
  2.  
  3. oceniPoKorisnici = {
  4.     'Lisa Rose': {'Catch Me If You Can': 3.0, 'Snakes on a Plane': 3.5, 'Superman Returns': 3.5,
  5.                   'You, Me and Dupree': 2.5, 'The Night Listener': 3.0, 'Snitch': 3.0},
  6.     'Gene Seymour': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5, 'Just My Luck': 1.5, 'The Night Listener': 3.0,
  7.                      'You, Me and Dupree': 3.5},
  8.     'Michael Phillips': {'Catch Me If You Can': 2.5, 'Lady in the Water': 2.5, 'Superman Returns': 3.5,
  9.                          'The Night Listener': 4.0, 'Snitch': 2.0},
  10.     'Claudia Puig': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0, 'The Night Listener': 4.5, 'Superman Returns': 4.0,
  11.                      'You, Me and Dupree': 2.5},
  12.     'Mick LaSalle': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0, 'Just My Luck': 2.0, 'Superman Returns': 3.0,
  13.                      'You, Me and Dupree': 2.0},
  14.     'Jack Matthews': {'Catch Me If You Can': 4.5, 'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
  15.                       'The Night Listener': 3.0, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5, 'Snitch': 4.5},
  16.     'Toby': {'Snakes on a Plane': 4.5, 'Snitch': 5.0},
  17.     'Michelle Nichols': {'Just My Luck': 1.0, 'The Night Listener': 4.5, 'You, Me and Dupree': 3.5,
  18.                          'Catch Me If You Can': 2.5, 'Snakes on a Plane': 3.0},
  19.     'Gary Coleman': {'Lady in the Water': 1.0, 'Catch Me If You Can': 1.5, 'Superman Returns': 1.5,
  20.                      'You, Me and Dupree': 2.0},
  21.     'Larry': {'Lady in the Water': 3.0, 'Just My Luck': 3.5, 'Snitch': 1.5, 'The Night Listener': 3.5}
  22. }
  23.  
  24.  
  25. # Vrakja merka za slicnost bazirana na rastojanieto za person1 i person2
  26. def sim_distance(oceni, person1, person2):
  27.     # Se pravi lista na zaednicki predmeti (filmovi)
  28.     zaednicki = {}
  29.     for item in oceni[person1].keys():
  30.         if item in oceni[person2]:
  31.             zaednicki[item] = 1
  32.     # ako nemaat zaednicki rejtinzi, vrati 0
  33.     if len(zaednicki) == 0: return 0
  34.     # Soberi gi kvadratite na zaednickite razliki
  35.     sum_of_squares = sum([pow(oceni[person1][item] - oceni[person2][item], 2)
  36.                           for item in oceni[person1] if item in oceni[person2]])
  37.     ret = 1 / (1 + math.sqrt(sum_of_squares))
  38.     return (round(ret, 3), len(zaednicki))
  39.  
  40.  
  41. def sim_pearson(oceni, person1, person2):
  42.     # Se kreira recnik vo koj ke se cuvaat predmetite (filmovi) koi se oceneti od dvajcata
  43.     # Vo recnikot ni se vazni samo klucevite za da gi cuvame iminjata na filmovite koi se zaednicki, a vrednostite ne ni se vazni
  44.     zaednicki = {}
  45.     for item in oceni[person1]:
  46.         if item in oceni[person2]: zaednicki[item] = 1
  47.  
  48.     # Se presmetuva brojot na predmeti oceneti od dvajcata
  49.     n = len(zaednicki)
  50.  
  51.     # Ako nemaat zaednicki predmeti vrati korelacija 0
  52.     if n == 0: return 0
  53.  
  54.     # Soberi gi zaednickite oceni (rejtinzi) za  sekoja licnost posebno
  55.     sum1 = sum([oceni[person1][it] for it in zaednicki])
  56.     sum2 = sum([oceni[person2][it] for it in zaednicki])
  57.  
  58.     # Soberi gi kvadratite od zaednickite oceni (rejtinzi) za  sekoja licnost posebno
  59.     sum1Sq = sum([pow(oceni[person1][it], 2) for it in zaednicki])
  60.     sum2Sq = sum([pow(oceni[person2][it], 2) for it in zaednicki])
  61.  
  62.     # Soberi gi proizvodite od ocenite na dvete licnosti
  63.     pSum = sum([oceni[person1][it] * oceni[person2][it] for it in zaednicki])
  64.  
  65.     # Presmetaj go koeficientot na korelacija
  66.     num = pSum - (sum1 * sum2 / n)
  67.     den = math.sqrt((sum1Sq - pow(sum1, 2) / n) * (sum2Sq - pow(sum2, 2) / n))
  68.     if den == 0: return 0
  69.     r = num / den
  70.     # return (round(r, 3), len(zaednicki))
  71.     return round(r, 3)
  72.  
  73.  
  74. def transformPrefs(prefs):
  75.     result = {}
  76.     for person in prefs:
  77.         for item in prefs[person]:
  78.             result.setdefault(item, {})
  79.             # Zameni gi mestata na licnosta i predmetot
  80.             result[item][person] = prefs[person][item]
  81.     return result
  82.  
  83.  
  84. def topMatches(prefs, person, n=5, similarity=sim_pearson):
  85.     scores = [(similarity(prefs, person, other), other)
  86.               for other in prefs if other != person]
  87.     # Se sortira listata vo rastecki redosled
  88.     scores.sort()
  89.     # Se prevrtuva za najslicnite (so najgolema vrednost) da bidat prvi
  90.     scores.reverse()
  91.     return scores[0:n]
  92.  
  93.  
  94. def getUserBasedRecomendations(oceni, korisnik, similarity=sim_pearson):
  95.     rankings = []
  96.  
  97.     totals = {}
  98.     simSums = {}
  99.  
  100.     for person in oceni:
  101.         if person == korisnik:
  102.             continue
  103.         bliskost = similarity(oceni, korisnik, person)
  104.         # bliskostVred = bliskost[0]
  105.         if bliskost <= 0:
  106.             continue
  107.         for movie in oceni[person]:
  108.             if movie not in oceni[korisnik] or oceni[korisnik][movie] == 0:
  109.                 totals.setdefault(movie, 0)
  110.                 # print oceni[person][movie]
  111.                 totals[movie] += oceni[person][movie] * bliskost
  112.  
  113.                 simSums.setdefault(movie, 0)
  114.                 simSums[movie] += bliskost
  115.  
  116.     rankings = [(total / simSums[movie], movie) for movie, total in totals.items()]
  117.     rankings.sort()
  118.     rankings.reverse()
  119.     return rankings
  120.  
  121.  
  122. def getItemBasedRecomendations(oceni, korisnik, similarity=sim_pearson):
  123.     oceni_po_film = transformPrefs(oceni)
  124.  
  125.     similarity_per_item = {}
  126.     for item in oceni[korisnik].keys():
  127.         similar_items = topMatches(oceni_po_film, item, n=None)
  128.         my_rating = oceni[korisnik][item]
  129.         for similarity, item in similar_items:
  130.             if item in oceni[korisnik] or similarity <= 0:
  131.                 continue
  132.             similarity_per_item.setdefault(item, [])
  133.             # similarity_per_item[item].append(similarity)
  134.             similarity_per_item[item].append(similarity * my_rating)
  135.     similarity_per_item_avg = []
  136.  
  137.     for item in similarity_per_item:
  138.         avg_sim = mean(similarity_per_item[item])
  139.         similarity_per_item_avg.append((avg_sim, item))
  140.     similarity_per_item_avg.sort(reverse=True)
  141.     return similarity_per_item_avg
  142.  
  143.  
  144. def mean(x):
  145.     if len(x) == 0: return 0.0
  146.     return sum(x) / len(x)
  147.  
  148.  
  149. def getTopMovies(recommendations, n=3):
  150.     i = 0
  151.     topmov = []
  152.     for movie in recommendations:
  153.         if i == n:
  154.             break
  155.         topmov.append(movie[1])
  156.         i += 1
  157.  
  158.     return sorted(topmov)
  159.  
  160.  
  161. if __name__ == "__main__":
  162.     korisnik = input()
  163.  
  164.     userbased = getUserBasedRecomendations(oceniPoKorisnici, korisnik)
  165.     itembased = getItemBasedRecomendations(oceniPoKorisnici, korisnik)
  166.  
  167.     print 'user-based:', getTopMovies(userbased)
  168.     print 'item-based:', getTopMovies(itembased)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement