Advertisement
Guest User

Untitled

a guest
Jul 7th, 2019
107
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 2.13 KB | None | 0 0
  1. autoput <- read.csv("travel-times.csv",stringsAsFactors = FALSE)
  2. str(autoput)
  3. apply(X=autoput,
  4.       MARGIN=2,
  5.       FUN=function(x){
  6.         length(which(is.na(x)))
  7.        
  8.       })
  9. apply(X=autoput,
  10.       MARGIN=2,
  11.       FUN=function(x){
  12.         length(which(x=="" | x=="-"))
  13.        
  14.       })
  15.  
  16. table(autoput$GoingTo)
  17. autoput$GoingTo[autoput$GoingTo=="" | autoput$GoingTo=="-"] <- "Work"
  18. #autoput$FuelEconomy[autoput$FuelEconomy=="" | autoput$FuelEconomy=="-"] <- NA
  19. autoput$FuelEconomy <- as.numeric(autoput$FuelEconomy)
  20. shapiro.test(autoput$FuelEconomy)
  21. medijana <- median(autoput$FuelEconomy,na.rm=T)
  22. autoput$FuelEconomy[is.na(autoput$FuelEconomy)] <- medijana
  23.  
  24. str(autoput)
  25. autoput$Date <- NULL
  26. autoput$StartTime <- NULL
  27. autoput$DayOfWeek <- as.factor(autoput$DayOfWeek)
  28. autoput$GoingTo <- as.factor(autoput$GoingTo)
  29. str(autoput)
  30.  
  31. percent <- as.numeric(quantile(autoput$Congestion407,0.6))
  32.  
  33.  
  34. autoput$Take407All <- ifelse(test=(autoput$Congestion407<percent & autoput$Comments==""),
  35.                              yes="YES",
  36.                              no="NO")
  37. autoput$Take407All <- as.factor(autoput$Take407All)
  38. head(autoput$Take407All)
  39. autoput$Congestion407 <- NULL
  40. autoput$Comments <- NULL
  41. #trening i test set
  42. library(caret)
  43. set.seed(10)
  44. indexes <- createDataPartition(autoput$Take407All,p=0.8,list=FALSE)
  45. train.data <- autoput[indexes,]
  46. test.data <- autoput[-indexes,]
  47.  
  48.  
  49. library(e1071)
  50. library(caret)
  51. set.seed(10)
  52. numFolds = trainControl(method = "cv", number = 10) # define <k>-fold cross-validation parameters
  53. cpGrid= expand.grid(.k = seq(from = 3, to = 25, by = 2))
  54. cross.val <- train(Take407All ~.,
  55.                     data = train.data,
  56.                      method = "knn",
  57.                     trControl = numFolds, tuneGrid = cpGrid)
  58. cross.val
  59. plot(cross.val)
  60. #Dobili smo da je optimalna vrednost za k=23;
  61. #model
  62. str(autoput)
  63. library(class)
  64. knn1 <- knn(train=train.data[,-10],
  65.            test=test.data[,-10],
  66.            cl=train.data$Take407All,
  67.            k=23)
  68. knn1
  69. table(knn1)
  70. knn1.cm <- table(true=test.data$Take407All,predicted=knn1)
  71. table(train.data$Take407All)
  72. str(train.data$GoingTo)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement