Advertisement
Guest User

dat q6

a guest
Jan 31st, 2015
179
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.28 KB | None | 0 0
  1. # Q6: for each user, output each user's useful votes
  2. # OUTPUT: userid, number of useful votes
  3. # SECONDARY OUTPUT: the average of total useful votes
  4. import numpy as np
  5. from pandas import *
  6.  
  7. # for each user, store the "useful votes" (field #10) they have
  8. usefulVotesCount = dict()
  9.  
  10. n = 0
  11. f = open('yelp_reviews.txt', 'r')
  12. for line in f:
  13.  
  14. # ignore first row
  15. if (n > 0):
  16.  
  17. # split into a list on the pipe delimiter
  18. currentRow = line.split("|")
  19. currentUserId = currentRow[6]
  20. currentUsefulVotes = float(currentRow[9])
  21.  
  22. # save userIDs into dict.
  23. # if it's not there, create an entry
  24. if currentUserId not in usefulVotesCount:
  25. usefulVotesCount[currentUserId] = 0
  26. usefulVotesCount[currentUserId ] += currentUsefulVotes
  27.  
  28. # increment what row we're on
  29. n += 1
  30.  
  31. # end loop to build dict
  32. # put this dict into a series for easy analysis
  33. series = Series(usefulVotesCount)
  34.  
  35. ########### OUTPUT
  36. # first, for each userid, output total number of reviews they had
  37. numberUsers = series.shape[0] # this is the length of series
  38. pandas.set_option("display.max_rows", numberUsers)
  39. print series
  40.  
  41. # secondly, find the AVERAGE number of useful votes per user
  42. print series.mean() # -> 6.78323294735
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement