Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- PassengerId = testData[1]
- testData = testData[-c(1, 8:11)]
- testData$Sex = gsub("female", 1, testData$Sex)
- testData$Sex = gsub("^male", 0, testData$Sex)
- test_master_vector = grep("Master.",testData$Name, fixed=TRUE)
- test_miss_vector = grep("Miss.", testData$Name, fixed=TRUE)
- test_mrs_vector = grep("Mrs.", testData$Name, fixed=TRUE)
- test_mr_vector = grep("Mr.", testData$Name, fixed=TRUE)
- test_dr_vector = grep("Dr.", testData$Name, fixed=TRUE)
- for(i in test_master_vector) {
- testData[i, 2] = "Master"
- }
- for(i in test_miss_vector) {
- testData[i, 2] = "Miss"
- }
- for(i in test_mrs_vector) {
- testData[i, 2] = "Mrs"
- }
- for(i in test_mr_vector) {
- testData[i, 2] = "Mr"
- }
- for(i in test_dr_vector) {
- testData[i, 2] = "Dr"
- }
- test_master_age = round(mean(testData$Age[testData$Name == "Master"], na.rm = TRUE), digits = 2)
- test_miss_age = round(mean(testData$Age[testData$Name == "Miss"], na.rm = TRUE), digits =2)
- test_mrs_age = round(mean(testData$Age[testData$Name == "Mrs"], na.rm = TRUE), digits = 2)
- test_mr_age = round(mean(testData$Age[testData$Name == "Mr"], na.rm = TRUE), digits = 2)
- test_dr_age = round(mean(testData$Age[testData$Name == "Dr"], na.rm = TRUE), digits = 2)
- for (i in 1:nrow(testData)) {
- if (is.na(testData[i,4])) {
- if (testData[i, 2] == "Master") {
- testData[i, 4] = test_master_age
- } else if (testData[i, 2] == "Miss") {
- testData[i, 4] = test_miss_age
- } else if (testData[i, 2] == "Mrs") {
- testData[i, 4] = test_mrs_age
- } else if (testData[i, 2] == "Mr") {
- testData[i, 4] = test_mr_age
- } else if (testData[i, 2] == "Dr") {
- testData[i, 4] = test_dr_age
- } else {
- print(paste("Uncaught title at: ", i, sep=""))
- print(paste("The title unrecognized was: ", testData[i,2], sep=""))
- }
- }
- }
- #We do a manual replacement here, because we weren't able to programmatically figure out the title.
- #We figured out it was 89 because the above print statement should have warned us.
- testData[89, 4] = test_miss_age
- testData["Child"] = NA
- for (i in 1:nrow(testData)) {
- if (testData[i, 4] <= 12) {
- testData[i, 7] = 1
- } else {
- testData[i, 7] = 2
- }
- }
- testData["Family"] = NA
- for(i in 1:nrow(testData)) {
- testData[i, 8] = testData[i, 5] + testData[i, 6] + 1
- }
- testData["Mother"] = NA
- for(i in 1:nrow(testData)) {
- if(testData[i, 2] == "Mrs" & testData[i, 6] > 0) {
- testData[i, 9] = 1
- } else {
- testData[i, 9] = 2
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement