• API
• FAQ
• Tools
• Archive
daily pastebin goal
8%
SHARE
TWEET

# Untitled

a guest Jan 16th, 2018 55 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. library(reshape2)
2.
3. #długość wektora (sqrt(x1^2 + x2^2 + ...))
4. norm_vec <- function(x) sqrt(sum(x^2, na.rm=TRUE))
5.
6. #podobieństwo kosinusowe
7. cosine <- function(user1, user2) {
8.   top = sum(user2 * user1, na.rm=TRUE)
9.   bot = norm_vec(user1) * norm_vec(user2)
10.   return (top/bot)
11. }
12.
13. # user_n szukany użytkownik
14. # film_n szukany film
15. # nr = ilość najlepszych podobieństw do analizy
16. recommend <- function(user, film, nr, mx) {
17.   userrow = mx[rownames(mx) == user, ]
18.   has_film = mx[!is.na(mx[,film]), ]
19.   has_film = has_film[rownames(has_film) != user, ]
20.
21.   t = apply(has_film, 1, cosine, userrow)
22.   ord = sort(t, decreasing=TRUE, index.return = TRUE) \$ix
23.   sorted = has_film[ord, ][1:min(nr, nrow(has_film)), ]
24.   return (sorted)
25. }
26.
27. pred_avg <- function(mx, user, film) {
28.   user = toString(user)
29.   film = toString(film)
30.   tmp = recommend(user, film, 20, mx)
31.   row_means = rowMeans(tmp, na.rm = TRUE)
32.   user_mean = data.frame(mean(mx[row.names(mx) == user, ], na.rm = TRUE))
33.   colnames(user_mean) = list("avg")
34.   regression_table = data.frame(row_means, tmp[, colnames(tmp) == film])
35.   colnames(regression_table) = list("avg", "rating")
36.   regression = lm(rating ~ avg, data=regression_table)
37.
38.   return (predict(regression, user_mean))
39. }
40.
41. pred_2 <- function(mx, user, film) {
42.   user = toString(user)
43.   film = toString(film)
44.   tmp = recommend(user, film, 20, mx)
45.   row_means = rowMeans(tmp, na.rm = TRUE)
46.   user_mean = data.frame(mean(mx[row.names(mx) == user, ], na.rm = TRUE))
47.   colnames(user_mean) = list("avg")
48.   regression_table = data.frame(row_means, tmp[, colnames(tmp) == film])
49.   colnames(regression_table) = list("avg", "rating")
50.   regression = lm(rating ~ avg, data=regression_table)
51.
52.   return (predict(regression, user_mean))
53. }
54.
55. prog <- function() {
56.   set.seed(123)
58.   MyData <- MyData[1:3]
59.
60.   test_indexes = sample(nrow(MyData), size = nrow(MyData)/20)
61.   TestData <- MyData[test_indexes, ]
62.   MyData <- MyData[-test_indexes,]
63.
64.   MyMatrix = acast(MyData, userId ~ movieId , value.var="rating")
65.   TestMatrix = acast(TestData, userId ~ movieId , value.var="rating")
66.
67.   #uf1 = list(MyMatrix, 14, 15)
68.   #uf2 = list(MyMatrix, 14, 15)
69.   #l1 = list(MyMatrix, MyMatrix)
70.   #l2 = list(14, 14)
71.   #l3 = list(15, 15)
72.   #return lapply(pred_avg, c(l1, l2, l3))
73.   #pred_avg(MyMatrix, 14, 15)2.759451
74.   matrixTestData = matrix(TestData)
75.   for(i in 1:nrow(TestData)) {
76.     row <- matrix(TestData[i,])
77.     # do stuff with
78.     a = row[1,1]
79.     print(a)
80.     #predicted_val = pred_2(MyMatrix, row[1], row[2])
81.   }
82. }
83.
84. prog()
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy.

Top