Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # model 2: filmy zamiast użytkowników
- # if DEBUG == 1
- #X11()
- # endif
- library(reshape2)
- friends = c()
- #długość wektora (sqrt(x1^2 + x2^2 + ...))
- norm_vec <- function(x) sqrt(sum(x^2, na.rm=TRUE))
- #podobieństwo kosinusowe
- cosine <- function(user1, user2) {
- top = sum(user2 * user1, na.rm=TRUE)
- bot = norm_vec(user1) * norm_vec(user2)
- return (top/bot)
- }
- # user_n szukany użytkownik
- # film_n szukany film
- # nr = ilość najlepszych podobieństw do analizy
- recommend <- function(user, film, nr, mx) {
- userrow = mx[rownames(mx) == user, ]
- tryCatch(
- {
- has_film = mx[!is.na(mx[,film]), ]
- has_film = has_film[rownames(has_film) != user, ]
- t = apply(has_film, 1, cosine, userrow)
- ord = sort(t, decreasing=TRUE, index.return = TRUE) $ix
- sorted = has_film[ord, ][1:min(nr, nrow(has_film)), ]
- friends[length(friends) + 1] <<- min(nr, nrow(has_film))
- return (sorted)
- }, error = function(e) {
- }
- )
- return(NA)
- }
- pred_avg <- function(mx, user, film) {
- user = toString(user)
- film = toString(film)
- tmp = recommend(user, film, 20, mx)
- if (is.na(tmp)) {
- return(mean(mx[row.names(mx) == user, ], na.rm = TRUE))
- #return(3.5)
- }
- row_means = rowMeans(tmp, na.rm = TRUE)
- user_mean = data.frame(mean(mx[row.names(mx) == user, ], na.rm = TRUE))
- colnames(user_mean) = list("avg")
- regression_table = data.frame(row_means, tmp[, colnames(tmp) == film])
- colnames(regression_table) = list("avg", "rating")
- regression = lm(rating ~ avg, data=regression_table)
- plot(regression)
- return (predict(regression, user_mean))
- }
- pred_2 <- function(mx, user, film) {
- user = toString(user)
- film = toString(film)
- tmp = recommend(user, film, 20, mx)
- if (is.na(tmp)) {
- return(mean(mx[row.names(mx) == user, ], na.rm = TRUE))
- #return(3.5)
- }
- row_means = rowMeans(tmp, na.rm = TRUE)
- user_mean = data.frame(mean(mx[row.names(mx) == user, ], na.rm = TRUE))
- colnames(user_mean) = list("avg")
- regression_table = data.frame(row_means, tmp[, colnames(tmp) == film])
- colnames(regression_table) = list("avg", "rating")
- regression = lm(rating ~ avg, data=regression_table)
- print(c(regression_table["avg"]))
- print(c(regression_table["rating"]))
- # if DEBUG == 1
- #plot(unlist(regression_table["avg"]), unlist(regression_table["rating"]), xlim=c(2,5), ylim=c(2,5))
- #abline(regression, lwd=2)
- #Sys.sleep(1)
- # endif
- return (predict(regression, user_mean))
- }
- prog <- function() {
- set.seed(123)
- MyData <- read.csv(file="~/Desktop/mow-dane/ratings.csv", header=TRUE, sep=",")
- MyData <- MyData[1:3]
- test_indexes = sample(nrow(MyData), size = nrow(MyData)/1000)
- TestData <- MyData[test_indexes, ]
- MyData <- MyData[-test_indexes,]
- MyMatrix = acast(MyData, userId ~ movieId , value.var="rating")
- TestMatrix = acast(TestData, userId ~ movieId , value.var="rating")
- vect = c()
- vect_real = c()
- for(i in 1:nrow(TestData)) {
- row <- matrix(TestData[i,])
- predicted_val = pred_2(MyMatrix, row[1], row[2])
- vect[i] <- predicted_val
- vect_real[i] <- row[3]
- print(i)
- }
- vect_real = unlist(vect_real)
- print(vect)
- print(vect_real)
- diffs = abs(vect - vect_real)
- print(diffs)
- avg = sum(diffs)/length(vect)
- print(avg)
- pred2 = round(vect*2)/2
- matched = which(pred2 == vect_real)
- print(matched)
- print(friends)
- avg_friends = sum(friends)/length(friends)
- print(avg_friends)
- }
- prog()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement