Advertisement
Guest User

Evaluation Metrics Demo using recommenderlab package in R

a guest
May 6th, 2015
330
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 3.44 KB | None | 0 0
  1. # Installing dependencies
  2. # install.packages(c('ggplot2', 'recommenderlab', 'manipulate'))
  3.  
  4. # Importing Libraries
  5. library(recommenderlab)
  6. library(ggplot2)
  7. library(manipulate)
  8.  
  9. # Importing dataset
  10. data(MovieLense)
  11.  
  12. # Exploratory Data Analysis
  13. MovieLense
  14.  
  15. # 943 * 1664 (= 15,69,152) isnt a complete user-movie rating matrix. There are a lot of NAs.
  16. # There are only 99,392 ratings. Sparsity of approx 6%.
  17.  
  18. # Ratings for a particular user
  19. as(MovieLense[1, ], "data.frame") #Explain the result
  20. names(as(MovieLense[1, ], "data.frame"))
  21. nrow(as(MovieLense[1, ], "data.frame"))
  22. nrow(as(MovieLense[2, ], "data.frame"))
  23.  
  24. # Ratings for a particular item
  25. as(MovieLense[, 1], "data.frame")
  26. names(as(MovieLense[, 1], "data.frame"))
  27. nrow(as(MovieLense[, 1], "data.frame"))
  28. nrow(as(MovieLense[, 2], "data.frame"))
  29.  
  30. # How many movies did users rate, on an average?
  31. hist(rowCounts(MovieLense), breaks = 10) # x-axis: number of movies, y-axis-number of users who have rated the number of movies falling the x-axis range
  32. # variable number of breaks
  33. manipulate(hist(rowCounts(MovieLense), breaks = brk), brk = slider(10, 400))
  34.  
  35. df <- as(MovieLense, "data.frame")
  36.  
  37. # Average rating by a particular user, 942
  38. mean(df[df$user == 942, "rating"])
  39.  
  40. # Average of all the users
  41. tapply(df$rating, df$user, mean)
  42. tapply(df$rating, df$user, mean)[["942"]]
  43.  
  44. # Plotting the averages of all users
  45. barplot(tapply(df$rating, df$user, mean))
  46. barplot(sort(tapply(df$rating, df$user, mean), decreasing = T))
  47.  
  48. # Average rating for a particular movie, say 1
  49. mean(df[df$item == df$item[1], "rating"])
  50.  
  51. # Average rating for all the movies
  52. tapply(df$rating, df$item, mean)
  53. tapply(df$rating, df$item, mean)[[df$item[1]]]
  54.  
  55. # Plotting the averages of all the movies
  56. barplot(tapply(df$rating, df$item, mean))
  57. barplot(sort(tapply(df$rating, df$item, mean), decreasing = T))
  58.  
  59. # User 849 - high rating. User 3 - low rating. Lets verify that.
  60. tapply(df$rating, df$user, mean)[["849"]]
  61. tapply(df$rating, df$user, mean)[["3"]]
  62.  
  63. # Histogram of Ratings
  64. g <- ggplot(as(MovieLense, "data.frame"), aes(x = rating))
  65. g <- g + geom_histogram() # pass binwidth = 1
  66. g
  67.  
  68. # Histogram of normalized ratings. Normalization method is Z-score.
  69. g <- ggplot(as(normalize(MovieLense, method = "Z-score"), "data.frame"), aes(x = rating))
  70. g <- g + geom_histogram() # pass varying binwidth from 0.1 to 0.5
  71. g
  72.  
  73. # Histogram of average ratings
  74. qplot(colMeans(MovieLense), binwidth = 0.1)
  75.  
  76. # normalize will convert it to a normal curve. Proof: summary
  77. summary(as(normalize(MovieLense, method = "Z-score"), "data.frame")$rating)
  78.  
  79. # Lets get recommending!
  80. recommenderRegistry$get_entries(dataType = "realRatingMatrix")
  81. # We have a few options
  82.  
  83. # Let's check some algorithms against each other
  84. scheme <- evaluationScheme(MovieLense, method = "split", train = .9, k = 1, given = 10, goodRating = 4)
  85.  
  86. scheme
  87.  
  88. algorithms <- list(
  89.   "random items" = list(name="RANDOM", param=list(normalize = "Z-score")),
  90.   "popular items" = list(name="POPULAR", param=list(normalize = "Z-score")),
  91.   "user-based CF" = list(name="UBCF", param=list(normalize = "Z-score", method="Cosine", nn=50, minRating=3)),
  92.   "item-based CF" = list(name="IBCF", param=list(normalize = "Z-score"
  93.   ))
  94. )
  95.  
  96. # run algorithms, predict next n movies
  97. results <- evaluate(scheme, algorithms, n=c(1, 3, 5, 10, 15, 20))
  98.  
  99. # Draw ROC curve
  100. plot(results, annotate = 1:4, legend="topleft")
  101.  
  102. # See precision / recall
  103. plot(results, "prec/rec", annotate=1:4)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement