Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(editrules)
- baza <-read.csv("E:\\ug\\IO\\praca_domowa2\\adult.csv", header=TRUE, sep=",")
- #Obróbka danych (puste wartości w postaci "?")
- for (x in 1:ncol(baza))
- {
- print(summary(baza[,x]))
- print("-------------------------------------------------------------------")
- }
- for (x in 1:nrow(baza)) {
- if(baza[x,]$age=="?"){
- baza[x,]$age = 38.58
- }}
- for (x in 1:nrow(baza)) {
- if(baza[x,]$workclass=="?"){
- baza[x,]$workclass = "Private"
- }}
- for (x in 1:nrow(baza)) {
- if(baza[x,]$fnlwgt == "?" ){
- baza[x,]$fnlwgt = 189778
- }}
- for (x in 1:nrow(baza)) {
- if(baza[x,]$occupation == "?" ){
- baza[x,]$occupation = "Craft-repair"
- }}
- for (x in 1:nrow(baza)) {
- if(baza[x,]$native.country == "?" ){
- baza[x,]$native.country = "United-States"
- }}
- #3a
- library("party")
- set.seed(1234)
- ind <- sample(2, nrow(baza), replace=TRUE, prob=c(0.67, 0.33))
- baza.training <- baza[ind==1,1:15]
- baza.test <- baza[ind==2,1:15]
- #3b
- #drzewo
- library("party")
- baza.ctree <- ctree(income ~ age + workclass + fnlwgt + education + education.num + marital.status + occupation + relationship + race + sex + capital.gain + capital.loss + hours.per.week + native.country, data=baza.training)
- plot(baza.ctree)
- plot(baza.ctree, type="simple")
- print(baza.ctree)
- predicted <- predict(baza.ctree, baza.test[,1:14])
- real <- baza.test[,15]
- table(predicted,real)
- cm_tree <- table(predicted,real)
- accuracy_tree <- sum(diag(cm_tree))/sum(cm_tree)
- accuracy_tree
- #3-KNN
- daneN<-data.frame(baza$age,as.numeric(baza$workclass),baza$fnlwgt, as.numeric(as.factor(baza$education)),baza$education.num, as.numeric(baza$marital.status), as.numeric(baza$occupation),as.numeric(baza$relationship), as.numeric(baza$race), as.numeric(baza$sex), baza$capital.gain, baza$capital.loss,baza$hours.per.week, as.numeric(baza$native.country), as.numeric(baza$income))
- normalize <- function(x){
- (x - min(x))/(max(x) - min(x))
- }
- dane.norm <- cbind(normalize(daneN[1:14]), daneN[15])
- set.seed(1234)
- ind <- sample(2, nrow(daneN.norm), replace=TRUE, prob=c(0.67, 0.33))
- baza.training <- daneN.norm[ind==1,1:15]
- baza.test <- daneN.norm[ind==2,1:15]
- library(class)
- knn.3 <- knn(baza.training[,1:14], baza.test[,1:14], cl=baza.training[,15], k = 3, prob=FALSE)
- predicted <- knn.3
- real <- baza.test[,15]
- conf.matrix <- table(predicted,real)
- accuracy_KNN <- sum(diag(conf.matrix))/sum(conf.matrix)
- #NaiveBayes
- library(e1071)
- m <- naiveBayes(income ~ ., data = baza)
- m <- naiveBayes(baza[,-15], baza[,15])
- m
- table_NaibeBayes <-table(predict(m, baza[,-15]), baza[,15])
- accuracy_nb <- sum(diag(table_NaibeBayes)/sum(table_NaibeBayes))
- accuracy_nb
- TPR_tree = 7700/(7700+472)
- FPR_tree =1011/(1011+1599)
- TPR_knn = 23071/(23071+1649)
- FPR_knn = 3933/(3933+3908)
- TPR_nb = 23071/(23071+1649)
- FPR_nb= 3933/(3933+3908)
- library("plotROC")
- wykres <- c(Drzewo_Decyzyjne=0.8624559, kNN=0.7604341, NaiveBayes=0.8285679)
- barplot(wykres)
- #4
- dane.log <- log(daneN[,1:14])
- dane.stand <- scale(dane.log, center=TRUE)
- dane.stand<-dane.stand[,-11]
- dane.stand<-dane.stand[,-12]
- dane.pca <- prcomp(dane.stand)
- dane.stand<-dane.stand[,-12]
- dane.stand<-dane.stand[,-11]
- dane.final <- predict(dane.pca)[,1:2]
- cl <- kmeans(dane.final, 3, nstart = 25)
- plot(dane.final, col = cl$cluster)
- cl <- kmeans(dane.final, 2, nstart = 25)
- plot(dane.final, col = cl$cluster)
- points(cl$centers, col = 1:3, pch = 8, cex = 3)
- income <- daneN$income
- dane.final2 <-cbind(dane.final,income )
- #5
- library(arules)
- danefactor<-data.frame(as.factor(baza$age),as.factor(baza$workclass),as.factor(baza$fnlwgt),(as.factor(baza$education)),as.factor(baza$education.num), as.factor(baza$marital.status), as.factor(baza$occupation),as.factor(baza$relationship), as.factor(baza$race), as.factor(baza$sex), as.factor(baza$capital.gain), as.factor(baza$capital.loss),as.factor(baza$hours.per.week), as.factor(baza$native.country), as.factor(baza$income))
- rules <- apriori(danefactor)
- inspect(rules)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement