Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- baza<-read.csv("C:\\Users\\Aleksandar\\Desktop\\projekat-biostat\\abcChurn.csv")
- #izbacivanje kolona X i ID jer su nepotrebne u
- #odredjivanju promenljive churn
- baza<-baza[,-c(1,2)]
- summary(baza)
- str(baza)
- levels(baza$grad)
- baza$grad<- as.numeric(baza$grad)
- str(baza$pol)
- baza$pol<- as.numeric(baza$pol)
- baza$pol[baza$pol==""]<-NA
- length(which(is.na(baza$pol)))
- #NA vrednosti prebacujemo u one kojih je vise, dakle u male
- xtabs(~pol, data=baza)
- baza$pol[is.na(baza$pol)]<-"male"
- library(corrplot)
- #Pravi se korelaciona matrica da se proveri
- #sa kojom vrednoscu je churn u jakoj zavisnosti
- corr.matrix<-cor(baza)
- corrplot(corr.matrix, method="number", type="upper", diag=F, number.cex = 0.5, tl.cex=0.5)
- baza1<-baza[,c("cena_izabranog_plana","uplaceno")]
- summary(baza1)
- vars<- c("cena_izabranog_plana","uplaceno")
- library(dplyr)
- set.seed(23)
- data.st<- baza1[, vars]
- data.st <- baza %>% mutate_each_(funs(scale(.) %>% as.vector),
- vars=c("cena_izabranog_plana", "uplaceno","datumIsticanjaClanstva"))
- churn.st<- data.st[, vars]
- churn.st<- as.data.frame(churn.st)
- churn.st<-as.factor(baza$churn)
- churn.st$churn<-as.factor(baza$churn)
- summary(churn.st)
- boxplot(churn.st)
- #Sad radimo drvo odlucivanja
- library(caret)
- ?caret
- set.seed(11)
- train.indices <- createDataPartition(baza$churn,p=0.8,list=FALSE)
- train.data <- churn.st[train.indices,]
- test.data <- churn.st[-train.indices,]
- prop.table(table(train.data$churn))
- prop.table(table(test.data$churn))
- library(e1071)
- library(rpart)
- library(rpart.plot)
- numFolds = trainControl( method = "cv", number = 10 )
- cpGrid = expand.grid( .cp = seq(0.001, to = 0.05, by = 0.001))
- set.seed(10)
- dt.cv <- train(churn ~ cena_izabranog_plana + uplaceno, data = train.data, method = "rpart", control = rpart.control(minsplit = 10), trControl = numFolds, tuneGrid = cpGrid)
- #cp=0.001
- tree<-rpart(churn ~.,data=train.data, method = "class",
- control=rpart.control(minsplit = 10,cp=0.001))
- library(rpart.plot)
- prp(tree)
- tree.pred <- predict(tree, newdata = test.data, type = "class")
- tree.cm <- table(true = test.data$churn, predicted = tree.pred)
- tree.cm
- compute.eval.metrics <- function(cmatrix) {
- TP <- cmatrix[1,1]
- TN <- cmatrix[2,2]
- FP <- cmatrix[2,1]
- FN <- cmatrix[1,2]
- acc = sum(diag(cmatrix)) / sum(cmatrix)
- precision <- TP / (TP + FP)
- recall <- TP / (TP + FN)
- F1 <- 2*precision*recall / (precision + recall)
- c(accuracy = acc, precision = precision, recall = recall, F1 = F1)
- }
- tree.eval <- compute.eval.metrics(tree.cm)
- tree.eval
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement