Advertisement
Guest User

Untitled

a guest
Apr 30th, 2016
57
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.40 KB | None | 0 0
  1. ##################################################################
  2. # This is benchmark code for using titanic
  3. ##################################################################
  4.  
  5. #This will load data and help create a output function
  6. #attaching titanic data
  7. attach("/titanic/input/titanic.RData")
  8.  
  9. head(train)
  10. head(test)
  11.  
  12. library(h2o)
  13.  
  14. #intialize h2o on your server
  15. h2o.init(ip="", port="")
  16.  
  17. #loading data into h2o
  18. train_hex = as.h2o(train)
  19. test_hex = as.h2o(test)
  20.  
  21. nrow(test)
  22. nrow(train)
  23.  
  24. #Feature Engineering
  25. # Analyze your data and take care of missing values
  26.  
  27. #Random Forest
  28. titanic.rf = h2o.randomForest(y = 2, x = c(1,3:15), training_frame = train_hex)
  29.  
  30. #Look at the model
  31. print(titanic.rf)
  32.  
  33. #Prediction using h2o
  34. predictions <- h2o.predict(titanic.rf, test_hex)
  35.  
  36. #Convert h2o frame to R frame
  37. prediction = as.data.frame(predictions$predict)
  38. head(prediction)
  39. summary(prediction)
  40.  
  41. #Normalized Data
  42. normalized = (prediction-min(prediction))/(max(prediction)-min(prediction))
  43.  
  44. #Histogram of example data and normalized data
  45. par(mfrow=c(1,2))
  46. hist(prediction,xlab="Data",col="lightblue",main="")
  47. hist(normalized,xlab="Normalized Data",col="lightgreen",main="")
  48.  
  49.  
  50. #Creating a submission frame
  51. test <- as.data.frame(test_hex)
  52. passengerId <- test$passengerId
  53.  
  54. #Creating a submission data frame
  55. submission <- cbind(passengerId,normalized)
  56.  
  57. #Write submission
  58. write.csv(submission, file="taposh_test_submission3.csv", row.names = FALSE)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement