Advertisement
Guest User

Untitled

a guest
Sep 17th, 2019
120
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.32 KB | None | 0 0
  1. OptimalClustering <- function(distance, df, FUN = kmeans, kmax = NULL, dist_order = TRUE, seed = 1789){
  2. set.seed(seed)
  3. if (is.null(kmax)) kmax <- nrow(df) %/% 2
  4. p_dist <- fviz_dist(distance, gradient = list(low = "steelblue", high = "white"), order = dist_order)
  5. p_wss <- fviz_nbclust(df, FUN, method = "wss", k.max = kmax)
  6. p_sil <- fviz_nbclust(df, FUN, method = "silhouette", k.max = kmax)
  7. gap_stat <- clusGap(df, FUN = FUN, nstart = 25, K.max = kmax, B = 50)
  8. p_gap <- fviz_gap_stat(gap_stat)
  9. lay <- rbind(c(1, 2), c(3, 4))
  10. gridExtra::grid.arrange(grobs = list(p_dist, p_wss, p_sil, p_gap), layout_matrix = lay)
  11. if (identical(FUN,kmeans)){
  12. cl_Methods <- "kmeans"
  13. } else if (identical(FUN,hcut)){
  14. cl_Methods <- "hierarchical"
  15. }
  16. stab <- clValid(df, nClust = 2:kmax, clMethods = cl_Methods,
  17. validation = "stability")
  18. summary(stab)
  19. print("Optimal Number of Cluster under One Std Rule")
  20. sd_measures <- apply(stab@measures, 1, sd)
  21. min_measures <- apply(stab@measures, 1, min)
  22. one_sd <- t(sapply(seq_along(sd_measures), function(i) min_measures[i] + sd_measures[i] > stab@measures[i, ,]))
  23. optim_df <- data.frame(measure = stab@measNames, cluster = colnames(stab@measures)[apply(one_sd, 1, which.max)])
  24. print(optim_df)
  25. }
  26.  
  27. kmeansOptimal <- function(df, optik = 2, seed = 1789, do_plot = TRUE){
  28. set.seed(seed)
  29. clustering_result <- kmeans(df, centers = optik, nstart = 25)
  30. pkm <- fviz_cluster(clustering_result, geom = "text", data = df) + theme_cowplot()
  31. if (do_plot) plot(pkm)
  32. return(clustering_result)
  33. }
  34.  
  35. hcutOptimal <- function(df, optik = 2, do_plot = TRUE){
  36. clustering_result <- eclust(df, "hclust", k = optik, graph = FALSE)
  37. c_phc <- fviz_cluster(clustering_result, geom = "text", data = df) + theme_cowplot()
  38. q <- ggplot_build(c_phc)
  39. k_cols <- q$data[[2]]$colour
  40. k_cols <- k_cols[unique(clustering_result$cluster[clustering_result$order])]
  41. phc <- fviz_dend(clustering_result, rect = TRUE, show_labels = TRUE, k = optik, k_colors = k_cols) + theme_cowplot()
  42. if (do_plot)
  43. plot(phc)
  44. return(clustering_result)
  45. }
  46.  
  47. clustCompare <- function(df, kmax = NULL, clmethods = c("hierarchical","kmeans"), seed = 1789){
  48. set.seed(seed)
  49. if (is.null(kmax)) kmax <- nrow(df) %/% 2
  50. stab <- clValid(df, nClust = 2:kmax, clMethods = clmethods,
  51. validation = "stability")
  52. # Display only optimal Scores
  53. return(stab)
  54. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement