Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ##################################################################
- # This is benchmark code for using titanic
- ##################################################################
- #This will load data and help create a output function
- #attaching titanic data
- attach("/titanic/input/titanic.RData")
- head(train)
- head(test)
- library(h2o)
- #intialize h2o on your server
- h2o.init(ip="", port="")
- #loading data into h2o
- train_hex = as.h2o(train)
- test_hex = as.h2o(test)
- nrow(test)
- nrow(train)
- #Feature Engineering
- # Analyze your data and take care of missing values
- #Random Forest
- titanic.rf = h2o.randomForest(y = 2, x = c(1,3:15), training_frame = train_hex)
- #Look at the model
- print(titanic.rf)
- #Prediction using h2o
- predictions <- h2o.predict(titanic.rf, test_hex)
- #Convert h2o frame to R frame
- prediction = as.data.frame(predictions$predict)
- head(prediction)
- summary(prediction)
- #Normalized Data
- normalized = (prediction-min(prediction))/(max(prediction)-min(prediction))
- #Histogram of example data and normalized data
- par(mfrow=c(1,2))
- hist(prediction,xlab="Data",col="lightblue",main="")
- hist(normalized,xlab="Normalized Data",col="lightgreen",main="")
- #Creating a submission frame
- test <- as.data.frame(test_hex)
- passengerId <- test$passengerId
- #Creating a submission data frame
- submission <- cbind(passengerId,normalized)
- #Write submission
- write.csv(submission, file="taposh_test_submission3.csv", row.names = FALSE)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement