Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(reshape2)
- #długość wektora (sqrt(x1^2 + x2^2 + ...))
- norm_vec <- function(x) sqrt(sum(x^2, na.rm=TRUE))
- #podobieństwo kosinusowe
- cosine <- function(user1, user2) {
- top = sum(user2 * user1, na.rm=TRUE)
- bot = norm_vec(user1) * norm_vec(user2)
- return (top/bot)
- }
- # user_n szukany użytkownik
- # film_n szukany film
- # nr = ilość najlepszych podobieństw do analizy
- recommend <- function(user, film, nr, mx) {
- userrow = mx[rownames(mx) == user, ]
- has_film = mx[!is.na(mx[,film]), ]
- has_film = has_film[rownames(has_film) != user, ]
- t = apply(has_film, 1, cosine, userrow)
- ord = sort(t, decreasing=TRUE, index.return = TRUE) $ix
- sorted = has_film[ord, ][1:min(nr, nrow(has_film)), ]
- return (sorted)
- }
- pred_avg <- function(mx, user, film) {
- user = toString(user)
- film = toString(film)
- tmp = recommend(user, film, 20, mx)
- row_means = rowMeans(tmp, na.rm = TRUE)
- user_mean = data.frame(mean(mx[row.names(mx) == user, ], na.rm = TRUE))
- colnames(user_mean) = list("avg")
- regression_table = data.frame(row_means, tmp[, colnames(tmp) == film])
- colnames(regression_table) = list("avg", "rating")
- regression = lm(rating ~ avg, data=regression_table)
- return (predict(regression, user_mean))
- }
- pred_2 <- function(mx, user, film) {
- user = toString(user)
- film = toString(film)
- tmp = recommend(user, film, 20, mx)
- row_means = rowMeans(tmp, na.rm = TRUE)
- user_mean = data.frame(mean(mx[row.names(mx) == user, ], na.rm = TRUE))
- colnames(user_mean) = list("avg")
- regression_table = data.frame(row_means, tmp[, colnames(tmp) == film])
- colnames(regression_table) = list("avg", "rating")
- regression = lm(rating ~ avg, data=regression_table)
- return (predict(regression, user_mean))
- }
- prog <- function() {
- set.seed(123)
- MyData <- read.csv(file="~/Desktop/mow-dane/ratings.csv", header=TRUE, sep=",")
- MyData <- MyData[1:3]
- test_indexes = sample(nrow(MyData), size = nrow(MyData)/20)
- TestData <- MyData[test_indexes, ]
- MyData <- MyData[-test_indexes,]
- MyMatrix = acast(MyData, userId ~ movieId , value.var="rating")
- TestMatrix = acast(TestData, userId ~ movieId , value.var="rating")
- #uf1 = list(MyMatrix, 14, 15)
- #uf2 = list(MyMatrix, 14, 15)
- #l1 = list(MyMatrix, MyMatrix)
- #l2 = list(14, 14)
- #l3 = list(15, 15)
- #return lapply(pred_avg, c(l1, l2, l3))
- #pred_avg(MyMatrix, 14, 15)2.759451
- matrixTestData = matrix(TestData)
- for(i in 1:nrow(TestData)) {
- row <- matrix(TestData[i,])
- # do stuff with
- a = row[1,1]
- print(a)
- #predicted_val = pred_2(MyMatrix, row[1], row[2])
- }
- }
- prog()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement