SHARE
TWEET

Untitled

a guest Sep 17th, 2019 80 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. OptimalClustering <- function(distance, df, FUN = kmeans, kmax = NULL, dist_order = TRUE, seed = 1789){
  2.     set.seed(seed)
  3.     if (is.null(kmax)) kmax <- nrow(df) %/% 2
  4.     p_dist <- fviz_dist(distance, gradient = list(low = "steelblue",  high = "white"), order = dist_order)
  5.     p_wss <- fviz_nbclust(df, FUN, method = "wss", k.max = kmax)
  6.     p_sil <- fviz_nbclust(df, FUN, method = "silhouette", k.max = kmax)
  7.     gap_stat <- clusGap(df, FUN = FUN, nstart = 25, K.max = kmax, B = 50)
  8.     p_gap <- fviz_gap_stat(gap_stat)
  9.     lay <-  rbind(c(1, 2), c(3, 4))
  10.     gridExtra::grid.arrange(grobs = list(p_dist, p_wss, p_sil, p_gap), layout_matrix = lay)
  11.     if (identical(FUN,kmeans)){
  12.         cl_Methods <- "kmeans"
  13.     } else if (identical(FUN,hcut)){
  14.         cl_Methods <- "hierarchical"
  15.     }
  16.     stab <- clValid(df, nClust = 2:kmax, clMethods = cl_Methods,
  17.                     validation = "stability")
  18.     summary(stab)
  19.     print("Optimal Number of Cluster under One Std Rule")
  20.     sd_measures <- apply(stab@measures, 1, sd)
  21.     min_measures <- apply(stab@measures, 1, min)
  22.     one_sd <- t(sapply(seq_along(sd_measures), function(i) min_measures[i] + sd_measures[i] > stab@measures[i, ,]))
  23.     optim_df <- data.frame(measure = stab@measNames, cluster = colnames(stab@measures)[apply(one_sd, 1, which.max)])
  24.     print(optim_df)
  25. }
  26.  
  27. kmeansOptimal <- function(df, optik = 2, seed = 1789, do_plot = TRUE){
  28.     set.seed(seed)
  29.     clustering_result <- kmeans(df, centers = optik, nstart = 25)
  30.     pkm <- fviz_cluster(clustering_result, geom = "text",  data = df) + theme_cowplot()
  31.     if (do_plot) plot(pkm)
  32.     return(clustering_result)
  33. }
  34.  
  35. hcutOptimal <- function(df, optik = 2, do_plot = TRUE){
  36.     clustering_result <- eclust(df, "hclust", k = optik, graph = FALSE)
  37.     c_phc <- fviz_cluster(clustering_result, geom = "text",  data = df) + theme_cowplot()
  38.     q <- ggplot_build(c_phc)
  39.     k_cols <- q$data[[2]]$colour
  40.     k_cols <- k_cols[unique(clustering_result$cluster[clustering_result$order])]
  41.     phc <- fviz_dend(clustering_result, rect = TRUE, show_labels = TRUE, k = optik, k_colors = k_cols) + theme_cowplot()
  42.     if (do_plot)
  43.         plot(phc)
  44.     return(clustering_result)
  45. }
  46.  
  47. clustCompare <- function(df, kmax = NULL, clmethods = c("hierarchical","kmeans"), seed = 1789){
  48.     set.seed(seed)
  49.     if (is.null(kmax)) kmax <- nrow(df) %/% 2
  50.     stab <- clValid(df, nClust = 2:kmax, clMethods = clmethods,
  51.                     validation = "stability")
  52.     # Display only optimal Scores
  53.     return(stab)
  54. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top