daily pastebin goal
64%
SHARE
TWEET

Untitled

a guest Jan 16th, 2018 55 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. library(reshape2)
  2.  
  3. #długość wektora (sqrt(x1^2 + x2^2 + ...))
  4. norm_vec <- function(x) sqrt(sum(x^2, na.rm=TRUE))
  5.  
  6. #podobieństwo kosinusowe
  7. cosine <- function(user1, user2) {
  8.   top = sum(user2 * user1, na.rm=TRUE)
  9.   bot = norm_vec(user1) * norm_vec(user2)
  10.   return (top/bot)
  11. }
  12.  
  13. # user_n szukany użytkownik
  14. # film_n szukany film
  15. # nr = ilość najlepszych podobieństw do analizy
  16. recommend <- function(user, film, nr, mx) {
  17.   userrow = mx[rownames(mx) == user, ]
  18.   has_film = mx[!is.na(mx[,film]), ]
  19.   has_film = has_film[rownames(has_film) != user, ]
  20.  
  21.   t = apply(has_film, 1, cosine, userrow)
  22.   ord = sort(t, decreasing=TRUE, index.return = TRUE) $ix
  23.   sorted = has_film[ord, ][1:min(nr, nrow(has_film)), ]
  24.   return (sorted)
  25. }
  26.  
  27. pred_avg <- function(mx, user, film) {
  28.   user = toString(user)
  29.   film = toString(film)
  30.   tmp = recommend(user, film, 20, mx)
  31.   row_means = rowMeans(tmp, na.rm = TRUE)
  32.   user_mean = data.frame(mean(mx[row.names(mx) == user, ], na.rm = TRUE))
  33.   colnames(user_mean) = list("avg")
  34.   regression_table = data.frame(row_means, tmp[, colnames(tmp) == film])
  35.   colnames(regression_table) = list("avg", "rating")
  36.   regression = lm(rating ~ avg, data=regression_table)
  37.  
  38.   return (predict(regression, user_mean))
  39. }
  40.  
  41. pred_2 <- function(mx, user, film) {
  42.   user = toString(user)
  43.   film = toString(film)
  44.   tmp = recommend(user, film, 20, mx)
  45.   row_means = rowMeans(tmp, na.rm = TRUE)
  46.   user_mean = data.frame(mean(mx[row.names(mx) == user, ], na.rm = TRUE))
  47.   colnames(user_mean) = list("avg")
  48.   regression_table = data.frame(row_means, tmp[, colnames(tmp) == film])
  49.   colnames(regression_table) = list("avg", "rating")
  50.   regression = lm(rating ~ avg, data=regression_table)
  51.  
  52.   return (predict(regression, user_mean))
  53. }
  54.  
  55. prog <- function() {
  56.   set.seed(123)
  57.   MyData <- read.csv(file="~/Desktop/mow-dane/ratings.csv", header=TRUE, sep=",")
  58.   MyData <- MyData[1:3]
  59.  
  60.   test_indexes = sample(nrow(MyData), size = nrow(MyData)/20)
  61.   TestData <- MyData[test_indexes, ]
  62.   MyData <- MyData[-test_indexes,]
  63.  
  64.   MyMatrix = acast(MyData, userId ~ movieId , value.var="rating")
  65.   TestMatrix = acast(TestData, userId ~ movieId , value.var="rating")
  66.  
  67.   #uf1 = list(MyMatrix, 14, 15)
  68.   #uf2 = list(MyMatrix, 14, 15)
  69.   #l1 = list(MyMatrix, MyMatrix)
  70.   #l2 = list(14, 14)
  71.   #l3 = list(15, 15)
  72.   #return lapply(pred_avg, c(l1, l2, l3))
  73.   #pred_avg(MyMatrix, 14, 15)2.759451
  74.   matrixTestData = matrix(TestData)
  75.   for(i in 1:nrow(TestData)) {
  76.     row <- matrix(TestData[i,])
  77.     # do stuff with
  78.     a = row[1,1]
  79.     print(a)
  80.     #predicted_val = pred_2(MyMatrix, row[1], row[2])
  81.   }
  82. }
  83.  
  84. prog()
RAW Paste Data
Top