Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- autoput <- read.csv("travel-times.csv",stringsAsFactors = FALSE)
- str(autoput)
- apply(X=autoput,
- MARGIN=2,
- FUN=function(x){
- length(which(is.na(x)))
- })
- apply(X=autoput,
- MARGIN=2,
- FUN=function(x){
- length(which(x=="" | x=="-"))
- })
- #Izbacujemo varijablu comments jer ima mnogo nedostajucih vrednosti, a i nije
- #bitna za analizu.
- table(autoput$GoingTo)
- autoput$GoingTo[autoput$GoingTo=="" | autoput$GoingTo=="-"] <- "Work"
- #autoput$FuelEconomy[autoput$FuelEconomy=="" | autoput$FuelEconomy=="-"] <- NA
- autoput$FuelEconomy <- as.numeric(autoput$FuelEconomy)
- shapiro.test(autoput$FuelEconomy)
- medijana <- median(autoput$FuelEconomy,na.rm=T)
- autoput$FuelEconomy[is.na(autoput$FuelEconomy)] <- medijana
- str(autoput)
- autoput$Date <- NULL
- autoput$StartTime <- NULL
- autoput$DayOfWeek <- as.factor(autoput$DayOfWeek)
- autoput$GoingTo <- as.factor(autoput$GoingTo)
- str(autoput)
- percent <- as.numeric(quantile(autoput$Congestion407,0.6))
- autoput$Take407All <- ifelse(test=(autoput$Congestion407<percent & autoput$Comments==""),
- yes="YES",
- no="NO")
- autoput$Take407All <- as.factor(autoput$Take407All)
- head(autoput$Take407All)
- autoput$Congestion407 <- NULL
- autoput$Comments <- NULL
- #trening i test set
- library(caret)
- set.seed(10)
- indexes <- createDataPartition(autoput$Take407All,p=0.8,list=FALSE)
- train.data <- autoput[indexes,]
- test.data <- autoput[-indexes,]
- library(e1071)
- library(caret)
- set.seed(10)
- numFolds = trainControl(method = "cv", number = 10) # define <k>-fold cross-validation parameters
- cpGrid= expand.grid(.k = seq(from = 3, to = 25, by = 2))
- cross.val <- train(Take407All ~.,
- data = train.data,
- method = "knn",
- trControl = numFolds, tuneGrid = cpGrid)
- cross.val
- plot(cross.val)
- #Dobili smo da je optimalna vrednost za k=23;
- #model
- str(autoput)
- library(class)
- knn1 <- knn(train=train.data[,-10],
- test=test.data[,-10],
- cl=train.data$Take407All,
- k=23)
- knn1
- table(knn1)
- knn1.cm <- table(true=test.data$Take407All,predicted=knn1)
- table(train.data$Take407All)
- str(train.data$GoingTo)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement