Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- """
- u.data -- The full u data set, 100000 ratings by 943 users on 1682 items.
- Each user has rated at least 20 movies. Users and items are
- numbered consecutively from 1. The data is randomly
- ordered. This is a tab separated list of
- user id | item id | rating | timestamp.
- The time stamps are unix seconds since 1/1/1970 UTC
- """
- import numpy as np
- import math
- def srednia_filmu(movie_id, in_array):
- suma=0
- ilosc=0
- for x in in_array:
- if x[1]==movie_id:
- ilosc+=1
- suma+=x[2]
- return suma/ilosc;
- def srednia_uzytkownika(user_id,in_array):
- suma=0
- ilosc=0
- for x in in_array:
- if x[0]==user_id:
- ilosc+=1
- suma+=x[2]
- print('Suma %s (%f)' % (suma))
- return suma/ilosc;
- def select(parameter_index,value,in_array):
- out_list=[]
- for x in in_array:
- if x[parameter_index]==value:
- out_list.append(x)
- return out_list;
- def sim(a,b):
- P=[]
- P1=select(0,a,arr)
- P2=select(0,b,arr)
- for x in P1:
- for y in P2:
- if y[1]==x[1]:
- P.append([x[1], x[2], y[2]])
- break
- if not P:
- return 0
- sr_a=srednia_uzytkownika(a,arr)
- sr_b=srednia_uzytkownika(b,arr)
- dzielna=0
- for p in P:
- dzielna+=(p[1]-sr_a)*(p[2]-sr_b)
- s1=0
- for p in P:
- s1+=(p[1]-sr_a)*(p[1]-sr_a)
- s2=0
- for p in P:
- s2+=(p[2]-sr_b)*(p[2]-sr_b)
- dzielnik=math.sqrt(s1)*math.sqrt(s2)
- return dzielna/dzielnik;
- def users():
- print("Generowanie słownika użytkownikow")
- users_dictionary={}
- size=0
- for x in arr:
- if x[0] not in users_dictionary:
- users_dictionary[x[0]]=select(0,x[0],arr)
- size+=1
- if size % 20 ==0:
- print("Ilość użytkownikow: "+str(size))
- return users_dictionary;
- def users1():
- print("Generowanie słownika użytkownikow")
- users_dictionary={}
- size=0
- for x in arr:
- if x[0] not in users_dictionary:
- l=select(0,x[0],arr)
- node={}
- for y in l:
- node[y[1]]=y[2]
- users_dictionary[x[0]]=node
- size+=1
- if size % 20 ==0:
- print("Ilość użytkownikow: "+str(size))
- return users_dictionary;
- def pred(a,p):
- sr_a=srednia_uzytkownika(a,arr)
- dzielna=0
- for k,value in users_movies_rate.items():
- if k!= a:
- if p in value:
- r_k_p=value[p]
- else:
- r_k_p=0
- dzielna+=sim(a,k)*(r_k_p-srednia_uzytkownika(k,arr))
- dzielnik=0
- for k in users_movies_rate:
- if k!=a:
- dzielnik+=sim(a,k)
- return sr_a+dzielna/dzielnik;
- f=open("c:\\Users\\Computer\\Desktop\\System_rekomendacyjny-master\\System_rekomendacyjny-master\\DB\\u.data")
- lista=[]
- for line in f:
- splited=line.split("\t",4)
- lista.append([int(splited[0]),int(splited[1]),\
- int(splited[2]),int(splited[3])])
- arr=np.array(lista)
- users_movies_rate=users1()
- ar = []
- #print('Najbardziej podobny %s (%f)' % (srednia_filmu(1, ar)))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement