Advertisement
Guest User

Untitled

a guest
Jan 24th, 2020
196
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #PROJEKTNI ZADATAK - Sanja Bozic
  2. library(corrplot)
  3. library(RColorBrewer)
  4. library(ggplot2)
  5. library(ggpubr)
  6. library(class)
  7. library(Metrics)
  8. library(caret)
  9. library(cluster)
  10. library(fpc)
  11. library(BBmisc)
  12.  
  13. #citanje csv datoteka
  14. df1<-read.csv("data1.csv",sep=";",dec=",")
  15. df2<-read.csv("data2.csv",sep=";",dec=",")
  16.  
  17. #brisanje nepotrebnih atributa
  18. df1 <- df1[-1]
  19. df1 <- df1[-1]
  20. df1 <- df1[-1]
  21. df1 <- df1[-20]
  22. df1 <- df1[-2]
  23.  
  24. #mijenjane krivih naziva
  25. names(df2)[1] <- "avg.height"
  26. names(df2)[4] <- "Height.width"
  27. names(df1)[18] <- "Theta"
  28. names(df2)[18] <- "Theta"
  29.  
  30. #sortiranje headera u columnama
  31. df1 <- df1[ , order(names(df1))]
  32. df2 <- df2[ , order(names(df2))]
  33.  
  34. #spajanje dvaju data frame-a
  35. df3 <- rbind(df1,df2)
  36. #str(df3)
  37.  
  38. #odabir samo glavnih parametara
  39. df4 <- df3[c(4:9,14:16,18)]
  40. attach(df4)
  41.  
  42. #VIZUALIZACIJA PODATAKA
  43. head(df4,5)
  44.  
  45. corrplot(cor(df3), type="upper", order="hclust",
  46.          col=brewer.pal(n=8, name="RdYlBu"))
  47.  
  48. corrplot(cor(df4), type="upper", order="hclust",
  49.          col=brewer.pal(n=8, name="RdYlBu"))
  50.  
  51. plot_graph <- function(Firmness) {
  52.   ggplot(data=df4, aes(x=Firmness, y=Zs)) +
  53.     geom_point(col="firebrick") +
  54.     geom_smooth(method="lm", se=F)
  55. }
  56.  
  57. plot_graph(FirmnessAv) + labs(title="Ovisnost izme�u otpora el. struje breskve i prosjeka zrelosti breskve")
  58.  
  59. f1 <- plot_graph(firmness1)
  60. f2 <- plot_graph(firmness2)
  61. f3 <- plot_graph(firmness3)
  62. f4 <- plot_graph(firmness4)
  63.  
  64. ggarrange(f1, f2, f3, f4,
  65.           labels = c("A", "B", "C", "D"),
  66.           ncol = 2, nrow = 2)
  67.  
  68. #SUMIRANJE PODATAKA
  69. #mean
  70. mean_vrijednosti <- c(mean(FirmnessAv),mean(SSC.TA),mean(density.g.cm3),mean(volume.cm3),mean(Theta),mean(Zs))
  71. meanCol_nazivi <- c("FirmnessAV", "SSC.TA", "density.g.cm3","volume.cm3","Theta","Zs")
  72. names(mean_vrijednosti) <- meanCol_nazivi
  73. mean_vrijednosti
  74.  
  75. #standard dev
  76. sd_vrijednosti <- c(sd(FirmnessAv),sd(SSC.TA),sd(density.g.cm3),sd(volume.cm3),sd(Theta),sd(Zs))
  77. sdCol_nazivi <- c("FirmnessAV", "SSC.TA", "density.g.cm3","volume.cm3","Theta","Zs")
  78. names(sd_vrijednosti) <- sdCol_nazivi
  79. sd_vrijednosti
  80.  
  81. length_vrijednosti <- length(Zs)
  82. length_vrijednosti
  83.  
  84. summary(df4)
  85. round(cor(df4),2) #tablica korelacije
  86.  
  87. #KNN
  88. df4 <- normalize(df4,method="range",range=c(0,1), margin = 1L, on.constant = "quiet")
  89. df4
  90.  
  91. set.seed(72)
  92. ran <- sample(1:nrow(df4), 0.8 * nrow(df4))
  93. x_train <- df4[ran,]
  94. x_test <- df4[-ran,]
  95.  
  96. #mi�em stupac sa onim �to predvi�am
  97. x_train <- x_train[-6]
  98. x_test <- x_test[-6]
  99.  
  100. #ovdje je samo stupac sa onim sta zelim predvidati
  101. y_train <- df4[ran,6]
  102. y_test <- df4[-ran,6]
  103.  
  104. knn_model <- train(x_train,y_train,method="knn")
  105. knn_model
  106. predictions <- predict(knn_model, x_test)
  107. predictions
  108.  
  109. mae(y_test,predictions) #mean apsolute error - model je u prosjeku u krivu za 3,59%
  110. mse(y_test,predictions) #mean squared error
  111. rae(y_test,predictions) #relative absolute error
  112. rmse(y_test,predictions) #root mean squared error
  113. mdae(y_test,predictions) #median apsolute error
  114.  
  115. #proba sa drugim vrstama k
  116. knn.3 <- knn(train=x_train,test=x_test,cl=y_train, k=3)
  117. #pretvorba iz factora u numericki vektor
  118. knn.3 <- as.numeric(as.character(knn.3))
  119. mae(y_test,knn.3) #3,90% mae
  120.  
  121. #proba sa drugim vrstama k
  122. knn.7 <- knn(train=x_train,test=x_test,cl=y_train, k=7)
  123. #pretvorba iz factora u numericki vektor
  124. knn.7 <- as.numeric(as.character(knn.7))
  125. mae(y_test,knn.7) #5,45% mae
  126.  
  127. #CLUSTERS
  128. #odre�ivanje broja clustera
  129. wss <- (nrow(df4)-1)*sum(apply(df4,2,var))
  130. for (i in 2:15) wss[i] <- sum(kmeans(df4,
  131.                                      centers=i)$withinss)
  132. plot(1:15, wss, type="b", xlab="Broj clustera",
  133.      ylab="Unutar skupina zbroj kvadrata",
  134.      col="red",
  135.      main="Odre�ivanje broja clustera")
  136.  
  137. # K-Means Cluster Analysis
  138. fit <- kmeans(df4,3)
  139. fit
  140.  
  141. # cluster means
  142. aggregate(df4,by=list(fit$cluster),FUN=mean)
  143.  
  144. # append cluster assignment
  145. data_with_cluster <- data.frame(df4, fit$cluster)
  146. data_with_cluster
  147.  
  148. #vizualizacija clustera
  149. plotcluster(df4, fit$cluster, xlab="Theta",ylab="") +
  150.  title(main="Prikaz clustera")
  151.  
  152. #69 u prvom clusteru, 72 u drugom i 59 u trecem
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement