Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import time
- start = time.time()
- print("hello")
- # Code to create a DF with the right shape (NxN) and initializing each cell to 0
- similarity_matrix = cosine_similarity(df_ratings_dummy, df_ratings_dummy)
- similarity_matrix_df = pd.DataFrame(similarity_matrix, index=df_ratings.index, columns=df_ratings.index)
- similarity_matrix_df[:] = 0
- sim_score = 0
- for user1_id, row in similarity_matrix_df.iterrows(): #iterate over rows
- print("Calculating sim column for user_id: " + str(user1_id))
- for user2_id, value in row.items():
- if(user1_id == user2_id):
- similarity_matrix_df[user1_id][user2_id] = 1
- else:
- #Step 1: Get co-rated items between user1 and user 2
- user1_user2_df = df_ratings.loc[[user1_id, user2_id]]
- all_rated_items = user1_user2_df[user1_user2_df.columns[~user1_user2_df.isnull().all()]]
- co_rated_items = all_rated_items.dropna(axis=1)
- #Step 2: For each co-rated item, calc the sim using PIP
- #Step 3: Loop over each co-rated item/movie, and calculate the sim using user1_rating and user2_rating
- for movie_id in co_rated_items.columns:
- mu_k = df_ratings[movie_id].mean()
- user1_rating = co_rated_items[movie_id][user1_id]
- user2_rating = co_rated_items[movie_id][user2_id]
- pip = proximity(user1_rating, user2_rating) * impact(user1_rating, user2_rating) * popularity(user1_rating, user2_rating, mu_k)
- sim_score = sim_score + pip
- similarity_matrix_df[user1_id][user2_id] = sim_score
- #Step 4: Reset the sim_score
- sim_score = 0
- end = time.time()
- print(end - start)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement