Advertisement
Guest User

Untitled

a guest
Jan 16th, 2018
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Erlang 3.47 KB | None | 0 0
  1. # model 2: filmy zamiast użytkowników
  2. # if DEBUG == 1
  3. #X11()
  4. # endif
  5. library(reshape2)
  6.  
  7. friends = c()
  8. #długość wektora (sqrt(x1^2 + x2^2 + ...))
  9. norm_vec <- function(x) sqrt(sum(x^2, na.rm=TRUE))
  10.  
  11. #podobieństwo kosinusowe
  12. cosine <- function(user1, user2) {
  13.   top = sum(user2 * user1, na.rm=TRUE)
  14.   bot = norm_vec(user1) * norm_vec(user2)
  15.   return (top/bot)
  16. }
  17.  
  18. # user_n szukany użytkownik
  19. # film_n szukany film
  20. # nr = ilość najlepszych podobieństw do analizy
  21. recommend <- function(user, film, nr, mx) {
  22.   userrow = mx[rownames(mx) == user, ]
  23.   tryCatch(
  24.   {
  25.   has_film = mx[!is.na(mx[,film]), ]
  26.   has_film = has_film[rownames(has_film) != user, ]
  27.  
  28.   t = apply(has_film, 1, cosine, userrow)
  29.   ord = sort(t, decreasing=TRUE, index.return = TRUE) $ix
  30.   sorted = has_film[ord, ][1:min(nr, nrow(has_film)), ]
  31.   friends[length(friends) + 1] <<- min(nr, nrow(has_film))
  32.   return (sorted)
  33.   }, error = function(e) {
  34.         }
  35.   )
  36. return(NA)
  37. }
  38.  
  39. pred_avg <- function(mx, user, film) {
  40.     user = toString(user)
  41.     film = toString(film)
  42.     tmp = recommend(user, film, 20, mx)
  43.     if (is.na(tmp)) {
  44.         return(mean(mx[row.names(mx) == user, ], na.rm = TRUE))
  45.         #return(3.5)
  46.     }
  47.     row_means = rowMeans(tmp, na.rm = TRUE)
  48.     user_mean = data.frame(mean(mx[row.names(mx) == user, ], na.rm = TRUE))
  49.     colnames(user_mean) = list("avg")
  50.     regression_table = data.frame(row_means, tmp[, colnames(tmp) == film])
  51.     colnames(regression_table) = list("avg", "rating")
  52.     regression = lm(rating ~ avg, data=regression_table)
  53.     plot(regression)
  54.     return (predict(regression, user_mean))
  55. }
  56.  
  57. pred_2 <- function(mx, user, film) {
  58.   user = toString(user)
  59.   film = toString(film)
  60.   tmp = recommend(user, film, 20, mx)
  61.   if (is.na(tmp)) {
  62.       return(mean(mx[row.names(mx) == user, ], na.rm = TRUE))
  63.       #return(3.5)
  64.   }
  65.   row_means = rowMeans(tmp, na.rm = TRUE)
  66.   user_mean = data.frame(mean(mx[row.names(mx) == user, ], na.rm = TRUE))
  67.   colnames(user_mean) = list("avg")
  68.   regression_table = data.frame(row_means, tmp[, colnames(tmp) == film])
  69.   colnames(regression_table) = list("avg", "rating")
  70.   regression = lm(rating ~ avg, data=regression_table)
  71.   print(c(regression_table["avg"]))
  72.   print(c(regression_table["rating"]))
  73.   # if DEBUG == 1
  74.   #plot(unlist(regression_table["avg"]), unlist(regression_table["rating"]), xlim=c(2,5), ylim=c(2,5))
  75.   #abline(regression, lwd=2)
  76.   #Sys.sleep(1)
  77.   # endif
  78.   return (predict(regression, user_mean))
  79. }
  80.  
  81. prog <- function() {
  82.   set.seed(123)
  83.   MyData <- read.csv(file="~/Desktop/mow-dane/ratings.csv", header=TRUE, sep=",")
  84.   MyData <- MyData[1:3]
  85.  
  86.   test_indexes = sample(nrow(MyData), size = nrow(MyData)/1000)
  87.   TestData <- MyData[test_indexes, ]
  88.   MyData <- MyData[-test_indexes,]
  89.  
  90.   MyMatrix = acast(MyData, userId ~ movieId , value.var="rating")
  91.   TestMatrix = acast(TestData, userId ~ movieId , value.var="rating")
  92.  
  93.   vect = c()
  94.   vect_real = c()
  95.   for(i in 1:nrow(TestData)) {
  96.     row <- matrix(TestData[i,])
  97.     predicted_val = pred_2(MyMatrix, row[1], row[2])
  98.     vect[i] <- predicted_val
  99.     vect_real[i] <- row[3]
  100.     print(i)
  101.   }
  102.   vect_real = unlist(vect_real)
  103.   print(vect)
  104.   print(vect_real)
  105.  
  106.   diffs = abs(vect - vect_real)
  107.   print(diffs)
  108.  
  109.   avg = sum(diffs)/length(vect)
  110.   print(avg)
  111.  
  112.   pred2 = round(vect*2)/2
  113.   matched = which(pred2 == vect_real)
  114.   print(matched)
  115.  
  116.   print(friends)
  117.   avg_friends = sum(friends)/length(friends)
  118.   print(avg_friends)
  119. }
  120.  
  121. prog()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement