Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #PROJEKTNI ZADATAK - Sanja Bozic
- library(corrplot)
- library(RColorBrewer)
- library(ggplot2)
- library(ggpubr)
- library(class)
- library(Metrics)
- library(caret)
- library(cluster)
- library(fpc)
- library(BBmisc)
- #citanje csv datoteka
- df1<-read.csv("data1.csv",sep=";",dec=",")
- df2<-read.csv("data2.csv",sep=";",dec=",")
- #brisanje nepotrebnih atributa
- df1 <- df1[-1]
- df1 <- df1[-1]
- df1 <- df1[-1]
- df1 <- df1[-20]
- df1 <- df1[-2]
- #mijenjane krivih naziva
- names(df2)[1] <- "avg.height"
- names(df2)[4] <- "Height.width"
- names(df1)[18] <- "Theta"
- names(df2)[18] <- "Theta"
- #sortiranje headera u columnama
- df1 <- df1[ , order(names(df1))]
- df2 <- df2[ , order(names(df2))]
- #spajanje dvaju data frame-a
- df3 <- rbind(df1,df2)
- #str(df3)
- #odabir samo glavnih parametara
- df4 <- df3[c(4:9,14:16,18)]
- attach(df4)
- #VIZUALIZACIJA PODATAKA
- head(df4,5)
- corrplot(cor(df3), type="upper", order="hclust",
- col=brewer.pal(n=8, name="RdYlBu"))
- corrplot(cor(df4), type="upper", order="hclust",
- col=brewer.pal(n=8, name="RdYlBu"))
- plot_graph <- function(Firmness) {
- ggplot(data=df4, aes(x=Firmness, y=Zs)) +
- geom_point(col="firebrick") +
- geom_smooth(method="lm", se=F)
- }
- plot_graph(FirmnessAv) + labs(title="Ovisnost izme�u otpora el. struje breskve i prosjeka zrelosti breskve")
- f1 <- plot_graph(firmness1)
- f2 <- plot_graph(firmness2)
- f3 <- plot_graph(firmness3)
- f4 <- plot_graph(firmness4)
- ggarrange(f1, f2, f3, f4,
- labels = c("A", "B", "C", "D"),
- ncol = 2, nrow = 2)
- #SUMIRANJE PODATAKA
- #mean
- mean_vrijednosti <- c(mean(FirmnessAv),mean(SSC.TA),mean(density.g.cm3),mean(volume.cm3),mean(Theta),mean(Zs))
- meanCol_nazivi <- c("FirmnessAV", "SSC.TA", "density.g.cm3","volume.cm3","Theta","Zs")
- names(mean_vrijednosti) <- meanCol_nazivi
- mean_vrijednosti
- #standard dev
- sd_vrijednosti <- c(sd(FirmnessAv),sd(SSC.TA),sd(density.g.cm3),sd(volume.cm3),sd(Theta),sd(Zs))
- sdCol_nazivi <- c("FirmnessAV", "SSC.TA", "density.g.cm3","volume.cm3","Theta","Zs")
- names(sd_vrijednosti) <- sdCol_nazivi
- sd_vrijednosti
- length_vrijednosti <- length(Zs)
- length_vrijednosti
- summary(df4)
- round(cor(df4),2) #tablica korelacije
- #KNN
- df4 <- normalize(df4,method="range",range=c(0,1), margin = 1L, on.constant = "quiet")
- df4
- set.seed(72)
- ran <- sample(1:nrow(df4), 0.8 * nrow(df4))
- x_train <- df4[ran,]
- x_test <- df4[-ran,]
- #mi�em stupac sa onim �to predvi�am
- x_train <- x_train[-6]
- x_test <- x_test[-6]
- #ovdje je samo stupac sa onim sta zelim predvidati
- y_train <- df4[ran,6]
- y_test <- df4[-ran,6]
- knn_model <- train(x_train,y_train,method="knn")
- knn_model
- predictions <- predict(knn_model, x_test)
- predictions
- mae(y_test,predictions) #mean apsolute error - model je u prosjeku u krivu za 3,59%
- mse(y_test,predictions) #mean squared error
- rae(y_test,predictions) #relative absolute error
- rmse(y_test,predictions) #root mean squared error
- mdae(y_test,predictions) #median apsolute error
- #proba sa drugim vrstama k
- knn.3 <- knn(train=x_train,test=x_test,cl=y_train, k=3)
- #pretvorba iz factora u numericki vektor
- knn.3 <- as.numeric(as.character(knn.3))
- mae(y_test,knn.3) #3,90% mae
- #proba sa drugim vrstama k
- knn.7 <- knn(train=x_train,test=x_test,cl=y_train, k=7)
- #pretvorba iz factora u numericki vektor
- knn.7 <- as.numeric(as.character(knn.7))
- mae(y_test,knn.7) #5,45% mae
- #CLUSTERS
- #odre�ivanje broja clustera
- wss <- (nrow(df4)-1)*sum(apply(df4,2,var))
- for (i in 2:15) wss[i] <- sum(kmeans(df4,
- centers=i)$withinss)
- plot(1:15, wss, type="b", xlab="Broj clustera",
- ylab="Unutar skupina zbroj kvadrata",
- col="red",
- main="Odre�ivanje broja clustera")
- # K-Means Cluster Analysis
- fit <- kmeans(df4,3)
- fit
- # cluster means
- aggregate(df4,by=list(fit$cluster),FUN=mean)
- # append cluster assignment
- data_with_cluster <- data.frame(df4, fit$cluster)
- data_with_cluster
- #vizualizacija clustera
- plotcluster(df4, fit$cluster, xlab="Theta",ylab="") +
- title(main="Prikaz clustera")
- #69 u prvom clusteru, 72 u drugom i 59 u trecem
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement