Guest User

Untitled

a guest
Oct 15th, 2018
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.53 KB | None | 0 0
  1. ########loading the Titanic Train Data Set
  2. TitanicTrain<-train1
  3.  
  4. ######Checking Missing Values in the Train Data Set
  5. sapply(TitanicTrain, function(x)sum(is.na(x)))
  6.  
  7. #######Loading the Titanic Test Data Set
  8. TitanicTest<-test11
  9.  
  10. #######Checking Missing Values in the Test Data Set
  11. sapply(TitanicTest, function(x)sum(is.na(x)))
  12.  
  13. #######Adding the Survived Variable to the Test Data Set with NA's so that we can easily merge Train and Test
  14. TitanicTest$Survived<-NA
  15.  
  16. ###Merging the Two Data Sets
  17. TitanicTrain<-rbind(TitanicTrain,TitanicTest)
  18.  
  19. #######Making Sex a Factor Variable
  20. TitanicTrain$Sex<-as.factor(TitanicTrain$Sex)
  21.  
  22. ########Extracting the Titles from the Name Column
  23. TitanicTrain$Title <- gsub('(.*, )|(\\..*)', '', TitanicTrain$Name)
  24.  
  25. ###########Fixing the Missing Values in the Variable "Age" with the Median
  26. ROWS<-which(is.na(TitanicTrain$Age))
  27. MedianAge<-median(TitanicTrain$Age,na.rm=TRUE)
  28. TitanicTrain$Age<-as.character(TitanicTrain$Age)
  29. TitanicTrain[ROWS,"Age"]<-MedianAge
  30.  
  31.  
  32. ###########Fixing the Missing Values in Variable "Embarked"
  33. ROWS2<-which(is.na(TitanicTrain$Embarked))
  34. TitanicTrain[ROWS2,"Embarked"]<-"S"
  35.  
  36. ##########Fixing the Missing Values in Variable "Fare"
  37. Rows3<-which(is.na(TitanicTrain$Fare))
  38. TitanicTrain[Rows3,"Fare"]<-14.45
  39.  
  40. ###########Converting the types of Variables to correct form
  41. TitanicTrain$Survived<-as.factor(TitanicTrain$Survived)
  42. TitanicTrain$PassengerId<-as.factor(TitanicTrain$PassengerId)
  43. TitanicTrain$Pclass<-as.factor(TitanicTrain$Pclass)
  44. TitanicTrain$Age<-as.numeric(TitanicTrain$Age)
  45. TitanicTrain$SibSp<-as.factor(TitanicTrain$SibSp)
  46. TitanicTrain$Parch<-as.factor(TitanicTrain$Parch)
  47. TitanicTrain$Embarked<-as.factor(TitanicTrain$Embarked)
  48. TitanicTrain$Title<-as.factor(TitanicTrain$Title)
  49.  
  50. #########Removing Variables "Ticket and "Cabin" as they have a huge proportion of missing values and do not add any value to the model
  51. TitanicTrain$Ticket<-NULL
  52. TitanicTrain$Cabin<-NULL
  53. TitanicTrain$Name<-NULL ###We Can remove the variable Name too as we have already extracted the Titles
  54.  
  55.  
  56. ########Converting the prepared data to a Data Frame
  57. TitanicTrain<-data.frame(TitanicTrain)
  58.  
  59. #######Converting Passenger Id to numeric so that we can subset the data and sepearate the observations that we have to predict
  60. TitanicTrain$PassengerId<-as.numeric(TitanicTrain$PassengerId)
  61. Train<-subset(TitanicTrain,PassengerId<892)
  62. Test<-subset(TitanicTrain,PassengerId>=892)
  63.  
  64. #############Separating the Independent (x) and the dependant ("Survived") variables
  65. y<-as.numeric(Train[,2])-1
  66. x<-data.frame(Train[,3:10])
  67. Test$Survived<-NULL
  68. xtest<-data.frame(Test)
  69. xtest$PassengerId<-NULL
  70.  
  71. ########Finally checking Structures for all the created data frames
  72. str(y)
  73. str(x)
  74. str(xtest)
  75.  
  76.  
  77. ###########Loading the SuperLearner Library
  78. library(SuperLearner)
  79.  
  80. #############Training the Model using SuperLearner Library (Ensemble Modelling)
  81. single.model2 <- SuperLearner(y,
  82. x,
  83. family=binomial(),
  84. SL.library=list("SL.ranger","SL.ksvm","SL.gbm","SL.xgboost",
  85. "SL.glmnet","SL.randomForest"))
  86.  
  87. ###########Printing the Model to check the Risk Estimates/Error
  88. print(single.model2)
  89.  
  90.  
  91. ##########Making Predictions on the Test Data
  92. predictions3<-predict.SuperLearner(single.model2,xtest)
  93. ###########Observing the frequency distribution of the Predictions
  94. hist(predictions3$pred)
  95.  
  96. #########Converting the Predictions to Binaries
  97. predictions4<-ifelse(predictions3$pred>=0.73,1,0)
  98.  
  99.  
  100. #############Creating a CV File with the Predictions
  101. write.csv(predictions4,"Predictions.csv")
Add Comment
Please, Sign In to add comment