Advertisement
Guest User

Untitled

a guest
Jun 25th, 2018
65
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.76 KB | None | 0 0
  1. library(randomForest)
  2. library(MASS)
  3. set.seed(123)
  4. Mode <- function(x) {
  5. ux <- unique(x)
  6. return(ux[which.max(tabulate(match(x, ux))) & is.na(ux) == F])
  7. }
  8. df <- read.csv("train.csv")
  9. df1 <- df
  10. df2 <- read.csv("test.csv")
  11. tt <- read.csv("sample_submission.csv")
  12.  
  13. for(i in 1:ncol(df))
  14. {
  15. col <- df[[i]]
  16. if(is.numeric(col))
  17. {
  18. mn <- (mean(col, na.rm = T))
  19. col[is.na(col)] <- mn
  20. col <- log(col + 1)
  21. }
  22. if(is.factor(col[1]))
  23. {
  24. md <- Mode(col)
  25. # print(md[1])
  26. col[is.na(col)] <- md[1]
  27. }
  28. df1[[i]] <- col
  29. }
  30.  
  31. apply(df1, 2, function(x) length(unique(x)))
  32. #df1 <- ifelse(length(unique(df1)) < 8, as.factor(df1), df1)
  33. View(df1)
  34. summary(df1)
  35. #apply(df, 2, function(x){sum(is.na(x))})
  36.  
  37. #df$PoolQC <- ifelse(is.na(df$PoolQC)==TRUE,mode(df$PoolQC), df$PoolQC)
  38.  
  39.  
  40. #fit <- lm(SalePrice ~ ., df1, family = "lm")
  41.  
  42. #ifit <- step(fit, direction = "backward")
  43.  
  44.  
  45.  
  46. #df1$y <- predict(ifit, df1)
  47.  
  48.  
  49.  
  50. for(i in 1:ncol(df2))
  51. {
  52. col <- df2[[i]]
  53. if(is.numeric(col))
  54. {
  55. mn <- (mean(col, na.rm = T))
  56. col[is.na(col)] <- mn
  57. col <- log(col + 1)
  58. }
  59. if(is.factor(col[1]))
  60. {
  61. md <- Mode(col)
  62. # print(md[1])
  63. col[is.na(col)] <- md[1]
  64. }
  65. df2[[i]] <- col
  66. }
  67.  
  68. #model3 <- randomForest(SalePrice~., df1)
  69.  
  70. #install.packages("ROSE")
  71. #library(ROSE)
  72. #roc.curve(df1$SalePric e, predict(model3, df2))
  73.  
  74. rf <- randomForest(SalePrice ~ ., df1, mtry = 9, ntree = 500)
  75.  
  76. length(levels(df1))
  77. df2$SalePrice <- 0
  78. df1 <- rbind(df1, df2)
  79.  
  80. tt$SalePrice <- predict(rf, df1[1461:2919, 1:80])
  81. #tt$SalePrice <- predict(ifit, df2)
  82.  
  83. #df2$SalePrice <- predict(ifit, df2)
  84.  
  85. #tt$SalePrice <- df2$SalePrice
  86. tt$SalePrice <- exp(tt$SalePrice) - 1
  87.  
  88.  
  89. write.csv(tt, file = "sm.csv", row.names = F)
  90. q <- read.csv("sm.csv")
  91. q
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement