Guest User

Untitled

a guest
Apr 21st, 2018
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.54 KB | None | 0 0
  1. fit <- lm(Gross ~ Year + Runtime + Budget, data = movie)
  2.  
  3. predicted = predict(fit, movie)
  4.  
  5. rerror = rmse(predicted, movie$Gross)
  6.  
  7. rmse = function(predicted_vector, actual_vector) {
  8. difference = predicted_vector - actual_vector
  9. rmse = sqrt(mean(difference ^ 2))
  10. return(rmse)
  11. }
  12.  
  13. rmse_frame = data.frame(size = numeric(20), train_rmse = numeric(20), test_rmse = numeric(20))
  14. size_vector = seq(from = 5, to = 100, by = 5)
  15.  
  16. for(c in 1:20) {
  17. size = size_vector[c]
  18. total_rows_training = dim(training_set)[1]
  19. number_of_rows_to_select = ceiling(((size/100) * total_rows_training))
  20. train_rmse_vector = c()
  21. test_rmse_vector = c()
  22. for(i in 1:10) { # RUN 10 TIMES, THEN TAKE AVERAGE OF RMSE
  23. # SHUFFLE THE TRAINING SET
  24. shuffle_training = training_set[sample(nrow(training_set)),]
  25. # SELECT THE ROWS FOR TRAINING SET ACCORDING TO THE SAMPLE SIZE
  26. sample_train_set = shuffle_training[1:number_of_rows_to_select,]
  27. fit <- lm(Gross ~ Year + Runtime + Budget + imdbVotesTransformed , data = sample_train_set)
  28. train_prediction = predict(fit, sample_train_set)
  29. train_rmse = rmse(train_prediction, sample_train_set$Gross)
  30. train_rmse_vector = c(train_rmse_vector, train_rmse)
  31. # USE MODEL GENERATED ON THE TEST_SET
  32. test_prediction = predict(fit, test_set)
  33. test_rmse = rmse(test_prediction, test_set$Gross)
  34. test_rmse_vector = c(test_rmse_vector, test_rmse)
  35. }
  36. rmse_frame$size[c] = size
  37. # TAKE AVERAGE OF RMSE
  38. rmse_frame$train_rmse[c] = mean(train_rmse_vector)
  39. rmse_frame$test_rmse[c] = mean(test_rmse_vector)
  40. }
Add Comment
Please, Sign In to add comment