Advertisement
Guest User

Untitled

a guest
May 23rd, 2017
74
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.31 KB | None | 0 0
  1. import pandas as pd
  2.  
  3. unames = ['user_id','gender','age','occupation','zip']
  4. users = pd.read_table('ch02\movielens\users.dat', sep = '::', header = None, names = unames)
  5.  
  6. rnames = ['user_id', 'movie_id', 'rating', 'timestamp']
  7. ratings = pd.read_table('ch02/movielens/ratings.dat', sep = '::', header = None, names = rnames)
  8.  
  9. mnames = ['movie_id','title','genres']
  10. movies = pd.read_table('ch02/movielens/movies.dat', sep = '::', header = None, names = mnames)
  11.  
  12. data = pd.merge(pd.merge(users, ratings), movies)
  13.  
  14. data.ix[0]
  15. mean_ratings = data.pivot_table('rating', index = 'title', columns =
  16. 'gender', aggfunc = 'mean')
  17.  
  18. #next to filter teh movies that received at leaset 250 ratings
  19. ratings_by_title = data.groupby('title').size()
  20. active_titles = ratings_by_title.index[ratings_by_title >= 250]
  21. mean_ratings = mean_ratings.ix[active_titles]
  22.  
  23. top_female_ratings = mean_ratings.sort_index(by='F', ascending = False)
  24.  
  25. #to find the moview that are most divisive between male and female viewers
  26. mean_ratings['diff'] = mean_ratings['M'] - mean_ratings['F']
  27. sort_by_diff = mean_ratings.sort_index(by='diff')
  28. sort_by_diff[::-1][:15] # reverse order of rows, take first 15 rows
  29.  
  30. rating_std_by_title = data.groupby('title')['rating'].std()
  31. rating_std_by_title = rating_std_by_title.ix[active_titles]
  32. rating_std_by_title.order(ascending = False)[:10]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement