Advertisement
Guest User

Untitled

a guest
Dec 19th, 2018
89
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.96 KB | None | 0 0
  1. library(editrules)
  2. baza <-read.csv("E:\\ug\\IO\\praca_domowa2\\adult.csv", header=TRUE, sep=",")
  3.  
  4.  
  5. #Obróbka danych (puste wartości w postaci "?")
  6. for (x in 1:ncol(baza))
  7. {
  8. print(summary(baza[,x]))
  9. print("-------------------------------------------------------------------")
  10. }
  11.  
  12. for (x in 1:nrow(baza)) {
  13. if(baza[x,]$age=="?"){
  14. baza[x,]$age = 38.58
  15. }}
  16. for (x in 1:nrow(baza)) {
  17. if(baza[x,]$workclass=="?"){
  18. baza[x,]$workclass = "Private"
  19. }}
  20.  
  21. for (x in 1:nrow(baza)) {
  22. if(baza[x,]$fnlwgt == "?" ){
  23. baza[x,]$fnlwgt = 189778
  24. }}
  25.  
  26. for (x in 1:nrow(baza)) {
  27. if(baza[x,]$occupation == "?" ){
  28. baza[x,]$occupation = "Craft-repair"
  29. }}
  30. for (x in 1:nrow(baza)) {
  31. if(baza[x,]$native.country == "?" ){
  32. baza[x,]$native.country = "United-States"
  33. }}
  34.  
  35.  
  36. #3a
  37. library("party")
  38. set.seed(1234)
  39. ind <- sample(2, nrow(baza), replace=TRUE, prob=c(0.67, 0.33))
  40. baza.training <- baza[ind==1,1:15]
  41. baza.test <- baza[ind==2,1:15]
  42.  
  43. #3b
  44. #drzewo
  45. library("party")
  46. baza.ctree <- ctree(income ~ age + workclass + fnlwgt + education + education.num + marital.status + occupation + relationship + race + sex + capital.gain + capital.loss + hours.per.week + native.country, data=baza.training)
  47. plot(baza.ctree)
  48. plot(baza.ctree, type="simple")
  49. print(baza.ctree)
  50.  
  51. predicted <- predict(baza.ctree, baza.test[,1:14])
  52. real <- baza.test[,15]
  53. table(predicted,real)
  54. cm_tree <- table(predicted,real)
  55. accuracy_tree <- sum(diag(cm_tree))/sum(cm_tree)
  56. accuracy_tree
  57.  
  58. #3-KNN
  59. daneN<-data.frame(baza$age,as.numeric(baza$workclass),baza$fnlwgt, as.numeric(as.factor(baza$education)),baza$education.num, as.numeric(baza$marital.status), as.numeric(baza$occupation),as.numeric(baza$relationship), as.numeric(baza$race), as.numeric(baza$sex), baza$capital.gain, baza$capital.loss,baza$hours.per.week, as.numeric(baza$native.country), as.numeric(baza$income))
  60. normalize <- function(x){
  61. (x - min(x))/(max(x) - min(x))
  62. }
  63. dane.norm <- cbind(normalize(daneN[1:14]), daneN[15])
  64.  
  65. set.seed(1234)
  66. ind <- sample(2, nrow(daneN.norm), replace=TRUE, prob=c(0.67, 0.33))
  67. baza.training <- daneN.norm[ind==1,1:15]
  68. baza.test <- daneN.norm[ind==2,1:15]
  69.  
  70. library(class)
  71.  
  72. knn.3 <- knn(baza.training[,1:14], baza.test[,1:14], cl=baza.training[,15], k = 3, prob=FALSE)
  73. predicted <- knn.3
  74. real <- baza.test[,15]
  75. conf.matrix <- table(predicted,real)
  76. accuracy_KNN <- sum(diag(conf.matrix))/sum(conf.matrix)
  77.  
  78. #NaiveBayes
  79. library(e1071)
  80. m <- naiveBayes(income ~ ., data = baza)
  81. m <- naiveBayes(baza[,-15], baza[,15])
  82. m
  83. table_NaibeBayes <-table(predict(m, baza[,-15]), baza[,15])
  84. accuracy_nb <- sum(diag(table_NaibeBayes)/sum(table_NaibeBayes))
  85. accuracy_nb
  86.  
  87.  
  88.  
  89. TPR_tree = 7700/(7700+472)
  90. FPR_tree =1011/(1011+1599)
  91.  
  92. TPR_knn = 23071/(23071+1649)
  93. FPR_knn = 3933/(3933+3908)
  94.  
  95. TPR_nb = 23071/(23071+1649)
  96. FPR_nb= 3933/(3933+3908)
  97.  
  98. library("plotROC")
  99. wykres <- c(Drzewo_Decyzyjne=0.8624559, kNN=0.7604341, NaiveBayes=0.8285679)
  100. barplot(wykres)
  101.  
  102. #4
  103. dane.log <- log(daneN[,1:14])
  104. dane.stand <- scale(dane.log, center=TRUE)
  105. dane.stand<-dane.stand[,-11]
  106. dane.stand<-dane.stand[,-12]
  107. dane.pca <- prcomp(dane.stand)
  108. dane.stand<-dane.stand[,-12]
  109. dane.stand<-dane.stand[,-11]
  110. dane.final <- predict(dane.pca)[,1:2]
  111. cl <- kmeans(dane.final, 3, nstart = 25)
  112. plot(dane.final, col = cl$cluster)
  113. cl <- kmeans(dane.final, 2, nstart = 25)
  114. plot(dane.final, col = cl$cluster)
  115. points(cl$centers, col = 1:3, pch = 8, cex = 3)
  116. income <- daneN$income
  117. dane.final2 <-cbind(dane.final,income )
  118.  
  119.  
  120. #5
  121. library(arules)
  122. danefactor<-data.frame(as.factor(baza$age),as.factor(baza$workclass),as.factor(baza$fnlwgt),(as.factor(baza$education)),as.factor(baza$education.num), as.factor(baza$marital.status), as.factor(baza$occupation),as.factor(baza$relationship), as.factor(baza$race), as.factor(baza$sex), as.factor(baza$capital.gain), as.factor(baza$capital.loss),as.factor(baza$hours.per.week), as.factor(baza$native.country), as.factor(baza$income))
  123. rules <- apriori(danefactor)
  124. inspect(rules)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement