Advertisement
Guest User

Untitled

a guest
Jun 18th, 2019
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.56 KB | None | 0 0
  1. baza<-read.csv("C:\\Users\\Aleksandar\\Desktop\\projekat-biostat\\abcChurn.csv")
  2.  
  3. #izbacivanje kolona X i ID jer su nepotrebne u
  4. #odredjivanju promenljive churn
  5. baza<-baza[,-c(1,2)]
  6.  
  7. summary(baza)
  8. str(baza)
  9.  
  10. levels(baza$grad)
  11. baza$grad<- as.numeric(baza$grad)
  12. str(baza$pol)
  13. baza$pol<- as.numeric(baza$pol)
  14. baza$pol[baza$pol==""]<-NA
  15. length(which(is.na(baza$pol)))
  16. #NA vrednosti prebacujemo u one kojih je vise, dakle u male
  17. xtabs(~pol, data=baza)
  18. baza$pol[is.na(baza$pol)]<-"male"
  19. library(corrplot)
  20. #Pravi se korelaciona matrica da se proveri
  21. #sa kojom vrednoscu je churn u jakoj zavisnosti
  22. corr.matrix<-cor(baza)
  23. corrplot(corr.matrix, method="number", type="upper", diag=F, number.cex = 0.5, tl.cex=0.5)
  24.  
  25. baza1<-baza[,c("cena_izabranog_plana","uplaceno")]
  26.  
  27. summary(baza1)
  28. vars<- c("cena_izabranog_plana","uplaceno")
  29. library(dplyr)
  30. set.seed(23)
  31. data.st<- baza1[, vars]
  32. data.st <- baza %>% mutate_each_(funs(scale(.) %>% as.vector),
  33. vars=c("cena_izabranog_plana", "uplaceno","datumIsticanjaClanstva"))
  34. churn.st<- data.st[, vars]
  35.  
  36. churn.st<- as.data.frame(churn.st)
  37. churn.st<-as.factor(baza$churn)
  38. churn.st$churn<-as.factor(baza$churn)
  39. summary(churn.st)
  40. boxplot(churn.st)
  41.  
  42. #Sad radimo drvo odlucivanja
  43. library(caret)
  44. ?caret
  45. set.seed(11)
  46. train.indices <- createDataPartition(baza$churn,p=0.8,list=FALSE)
  47. train.data <- churn.st[train.indices,]
  48. test.data <- churn.st[-train.indices,]
  49. prop.table(table(train.data$churn))
  50. prop.table(table(test.data$churn))
  51. library(e1071)
  52. library(rpart)
  53. library(rpart.plot)
  54. numFolds = trainControl( method = "cv", number = 10 )
  55. cpGrid = expand.grid( .cp = seq(0.001, to = 0.05, by = 0.001))
  56. set.seed(10)
  57. dt.cv <- train(churn ~ cena_izabranog_plana + uplaceno, data = train.data, method = "rpart", control = rpart.control(minsplit = 10), trControl = numFolds, tuneGrid = cpGrid)
  58. #cp=0.001
  59. tree<-rpart(churn ~.,data=train.data, method = "class",
  60. control=rpart.control(minsplit = 10,cp=0.001))
  61. library(rpart.plot)
  62. prp(tree)
  63. tree.pred <- predict(tree, newdata = test.data, type = "class")
  64. tree.cm <- table(true = test.data$churn, predicted = tree.pred)
  65. tree.cm
  66. compute.eval.metrics <- function(cmatrix) {
  67. TP <- cmatrix[1,1]
  68. TN <- cmatrix[2,2]
  69. FP <- cmatrix[2,1]
  70. FN <- cmatrix[1,2]
  71. acc = sum(diag(cmatrix)) / sum(cmatrix)
  72. precision <- TP / (TP + FP)
  73. recall <- TP / (TP + FN)
  74. F1 <- 2*precision*recall / (precision + recall)
  75. c(accuracy = acc, precision = precision, recall = recall, F1 = F1)
  76. }
  77. tree.eval <- compute.eval.metrics(tree.cm)
  78. tree.eval
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement