SHARE
TWEET

Untitled

a guest Jan 24th, 2020 77 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #PROJEKTNI ZADATAK - Sanja Bozic
  2. library(corrplot)
  3. library(RColorBrewer)
  4. library(ggplot2)
  5. library(ggpubr)
  6. library(class)
  7. library(Metrics)
  8. library(caret)
  9. library(cluster)
  10. library(fpc)
  11. library(BBmisc)
  12.  
  13. #citanje csv datoteka
  14. df1<-read.csv("data1.csv",sep=";",dec=",")
  15. df2<-read.csv("data2.csv",sep=";",dec=",")
  16.  
  17. #brisanje nepotrebnih atributa
  18. df1 <- df1[-1]
  19. df1 <- df1[-1]
  20. df1 <- df1[-1]
  21. df1 <- df1[-20]
  22. df1 <- df1[-2]
  23.  
  24. #mijenjane krivih naziva
  25. names(df2)[1] <- "avg.height"
  26. names(df2)[4] <- "Height.width"
  27. names(df1)[18] <- "Theta"
  28. names(df2)[18] <- "Theta"
  29.  
  30. #sortiranje headera u columnama
  31. df1 <- df1[ , order(names(df1))]
  32. df2 <- df2[ , order(names(df2))]
  33.  
  34. #spajanje dvaju data frame-a
  35. df3 <- rbind(df1,df2)
  36. #str(df3)
  37.  
  38. #odabir samo glavnih parametara
  39. df4 <- df3[c(4:9,14:16,18)]
  40. attach(df4)
  41.  
  42. #VIZUALIZACIJA PODATAKA
  43. head(df4,5)
  44.  
  45. corrplot(cor(df3), type="upper", order="hclust",
  46.          col=brewer.pal(n=8, name="RdYlBu"))
  47.  
  48. corrplot(cor(df4), type="upper", order="hclust",
  49.          col=brewer.pal(n=8, name="RdYlBu"))
  50.  
  51. plot_graph <- function(Firmness) {
  52.   ggplot(data=df4, aes(x=Firmness, y=Zs)) +
  53.     geom_point(col="firebrick") +
  54.     geom_smooth(method="lm", se=F)
  55. }
  56.  
  57. plot_graph(FirmnessAv) + labs(title="Ovisnost izme�u otpora el. struje breskve i prosjeka zrelosti breskve")
  58.  
  59. f1 <- plot_graph(firmness1)
  60. f2 <- plot_graph(firmness2)
  61. f3 <- plot_graph(firmness3)
  62. f4 <- plot_graph(firmness4)
  63.  
  64. ggarrange(f1, f2, f3, f4,
  65.           labels = c("A", "B", "C", "D"),
  66.           ncol = 2, nrow = 2)
  67.  
  68. #SUMIRANJE PODATAKA
  69. #mean
  70. mean_vrijednosti <- c(mean(FirmnessAv),mean(SSC.TA),mean(density.g.cm3),mean(volume.cm3),mean(Theta),mean(Zs))
  71. meanCol_nazivi <- c("FirmnessAV", "SSC.TA", "density.g.cm3","volume.cm3","Theta","Zs")
  72. names(mean_vrijednosti) <- meanCol_nazivi
  73. mean_vrijednosti
  74.  
  75. #standard dev
  76. sd_vrijednosti <- c(sd(FirmnessAv),sd(SSC.TA),sd(density.g.cm3),sd(volume.cm3),sd(Theta),sd(Zs))
  77. sdCol_nazivi <- c("FirmnessAV", "SSC.TA", "density.g.cm3","volume.cm3","Theta","Zs")
  78. names(sd_vrijednosti) <- sdCol_nazivi
  79. sd_vrijednosti
  80.  
  81. length_vrijednosti <- length(Zs)
  82. length_vrijednosti
  83.  
  84. summary(df4)
  85. round(cor(df4),2) #tablica korelacije
  86.  
  87. #KNN
  88. df4 <- normalize(df4,method="range",range=c(0,1), margin = 1L, on.constant = "quiet")
  89. df4
  90.  
  91. set.seed(72)
  92. ran <- sample(1:nrow(df4), 0.8 * nrow(df4))
  93. x_train <- df4[ran,]
  94. x_test <- df4[-ran,]
  95.  
  96. #mi�em stupac sa onim �to predvi�am
  97. x_train <- x_train[-6]
  98. x_test <- x_test[-6]
  99.  
  100. #ovdje je samo stupac sa onim sta zelim predvidati
  101. y_train <- df4[ran,6]
  102. y_test <- df4[-ran,6]
  103.  
  104. knn_model <- train(x_train,y_train,method="knn")
  105. knn_model
  106. predictions <- predict(knn_model, x_test)
  107. predictions
  108.  
  109. mae(y_test,predictions) #mean apsolute error - model je u prosjeku u krivu za 3,59%
  110. mse(y_test,predictions) #mean squared error
  111. rae(y_test,predictions) #relative absolute error
  112. rmse(y_test,predictions) #root mean squared error
  113. mdae(y_test,predictions) #median apsolute error
  114.  
  115. #proba sa drugim vrstama k
  116. knn.3 <- knn(train=x_train,test=x_test,cl=y_train, k=3)
  117. #pretvorba iz factora u numericki vektor
  118. knn.3 <- as.numeric(as.character(knn.3))
  119. mae(y_test,knn.3) #3,90% mae
  120.  
  121. #proba sa drugim vrstama k
  122. knn.7 <- knn(train=x_train,test=x_test,cl=y_train, k=7)
  123. #pretvorba iz factora u numericki vektor
  124. knn.7 <- as.numeric(as.character(knn.7))
  125. mae(y_test,knn.7) #5,45% mae
  126.  
  127. #CLUSTERS
  128. #odre�ivanje broja clustera
  129. wss <- (nrow(df4)-1)*sum(apply(df4,2,var))
  130. for (i in 2:15) wss[i] <- sum(kmeans(df4,
  131.                                      centers=i)$withinss)
  132. plot(1:15, wss, type="b", xlab="Broj clustera",
  133.      ylab="Unutar skupina zbroj kvadrata",
  134.      col="red",
  135.      main="Odre�ivanje broja clustera")
  136.  
  137. # K-Means Cluster Analysis
  138. fit <- kmeans(df4,3)
  139. fit
  140.  
  141. # cluster means
  142. aggregate(df4,by=list(fit$cluster),FUN=mean)
  143.  
  144. # append cluster assignment
  145. data_with_cluster <- data.frame(df4, fit$cluster)
  146. data_with_cluster
  147.  
  148. #vizualizacija clustera
  149. plotcluster(df4, fit$cluster, xlab="Theta",ylab="") +
  150.  title(main="Prikaz clustera")
  151.  
  152. #69 u prvom clusteru, 72 u drugom i 59 u trecem
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Top