Advertisement
Guest User

Untitled

a guest
Jul 7th, 2019
69
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 2.22 KB | None | 0 0
  1. autoput <- read.csv("travel-times.csv",stringsAsFactors = FALSE)
  2. str(autoput)
  3. apply(X=autoput,
  4.       MARGIN=2,
  5.       FUN=function(x){
  6.         length(which(is.na(x)))
  7.        
  8.       })
  9. apply(X=autoput,
  10.       MARGIN=2,
  11.       FUN=function(x){
  12.         length(which(x=="" | x=="-"))
  13.        
  14.       })
  15. #Izbacujemo varijablu comments jer ima mnogo nedostajucih vrednosti, a i nije
  16. #bitna za analizu.
  17. table(autoput$GoingTo)
  18. autoput$GoingTo[autoput$GoingTo=="" | autoput$GoingTo=="-"] <- "Work"
  19. #autoput$FuelEconomy[autoput$FuelEconomy=="" | autoput$FuelEconomy=="-"] <- NA
  20. autoput$FuelEconomy <- as.numeric(autoput$FuelEconomy)
  21. shapiro.test(autoput$FuelEconomy)
  22. medijana <- median(autoput$FuelEconomy,na.rm=T)
  23. autoput$FuelEconomy[is.na(autoput$FuelEconomy)] <- medijana
  24.  
  25. str(autoput)
  26. autoput$Date <- NULL
  27. autoput$StartTime <- NULL
  28. autoput$DayOfWeek <- as.factor(autoput$DayOfWeek)
  29. autoput$GoingTo <- as.factor(autoput$GoingTo)
  30. str(autoput)
  31.  
  32. percent <- as.numeric(quantile(autoput$Congestion407,0.6))
  33.  
  34.  
  35. autoput$Take407All <- ifelse(test=(autoput$Congestion407<percent & autoput$Comments==""),
  36.                              yes="YES",
  37.                              no="NO")
  38. autoput$Take407All <- as.factor(autoput$Take407All)
  39. head(autoput$Take407All)
  40. autoput$Congestion407 <- NULL
  41. autoput$Comments <- NULL
  42. #trening i test set
  43. library(caret)
  44. set.seed(10)
  45. indexes <- createDataPartition(autoput$Take407All,p=0.8,list=FALSE)
  46. train.data <- autoput[indexes,]
  47. test.data <- autoput[-indexes,]
  48.  
  49.  
  50. library(e1071)
  51. library(caret)
  52. set.seed(10)
  53. numFolds = trainControl(method = "cv", number = 10) # define <k>-fold cross-validation parameters
  54. cpGrid= expand.grid(.k = seq(from = 3, to = 25, by = 2))
  55. cross.val <- train(Take407All ~.,
  56.                     data = train.data,
  57.                      method = "knn",
  58.                     trControl = numFolds, tuneGrid = cpGrid)
  59. cross.val
  60. plot(cross.val)
  61. #Dobili smo da je optimalna vrednost za k=23;
  62. #model
  63. str(autoput)
  64. library(class)
  65. knn1 <- knn(train=train.data[,-10],
  66.            test=test.data[,-10],
  67.            cl=train.data$Take407All,
  68.            k=23)
  69. knn1
  70. table(knn1)
  71. knn1.cm <- table(true=test.data$Take407All,predicted=knn1)
  72. table(train.data$Take407All)
  73. str(train.data$GoingTo)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement