Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- OptimalClustering <- function(distance, df, FUN = kmeans, kmax = NULL, dist_order = TRUE, seed = 1789){
- set.seed(seed)
- if (is.null(kmax)) kmax <- nrow(df) %/% 2
- p_dist <- fviz_dist(distance, gradient = list(low = "steelblue", high = "white"), order = dist_order)
- p_wss <- fviz_nbclust(df, FUN, method = "wss", k.max = kmax)
- p_sil <- fviz_nbclust(df, FUN, method = "silhouette", k.max = kmax)
- gap_stat <- clusGap(df, FUN = FUN, nstart = 25, K.max = kmax, B = 50)
- p_gap <- fviz_gap_stat(gap_stat)
- lay <- rbind(c(1, 2), c(3, 4))
- gridExtra::grid.arrange(grobs = list(p_dist, p_wss, p_sil, p_gap), layout_matrix = lay)
- if (identical(FUN,kmeans)){
- cl_Methods <- "kmeans"
- } else if (identical(FUN,hcut)){
- cl_Methods <- "hierarchical"
- }
- stab <- clValid(df, nClust = 2:kmax, clMethods = cl_Methods,
- validation = "stability")
- summary(stab)
- print("Optimal Number of Cluster under One Std Rule")
- sd_measures <- apply(stab@measures, 1, sd)
- min_measures <- apply(stab@measures, 1, min)
- one_sd <- t(sapply(seq_along(sd_measures), function(i) min_measures[i] + sd_measures[i] > stab@measures[i, ,]))
- optim_df <- data.frame(measure = stab@measNames, cluster = colnames(stab@measures)[apply(one_sd, 1, which.max)])
- print(optim_df)
- }
- kmeansOptimal <- function(df, optik = 2, seed = 1789, do_plot = TRUE){
- set.seed(seed)
- clustering_result <- kmeans(df, centers = optik, nstart = 25)
- pkm <- fviz_cluster(clustering_result, geom = "text", data = df) + theme_cowplot()
- if (do_plot) plot(pkm)
- return(clustering_result)
- }
- hcutOptimal <- function(df, optik = 2, do_plot = TRUE){
- clustering_result <- eclust(df, "hclust", k = optik, graph = FALSE)
- c_phc <- fviz_cluster(clustering_result, geom = "text", data = df) + theme_cowplot()
- q <- ggplot_build(c_phc)
- k_cols <- q$data[[2]]$colour
- k_cols <- k_cols[unique(clustering_result$cluster[clustering_result$order])]
- phc <- fviz_dend(clustering_result, rect = TRUE, show_labels = TRUE, k = optik, k_colors = k_cols) + theme_cowplot()
- if (do_plot)
- plot(phc)
- return(clustering_result)
- }
- clustCompare <- function(df, kmax = NULL, clmethods = c("hierarchical","kmeans"), seed = 1789){
- set.seed(seed)
- if (is.null(kmax)) kmax <- nrow(df) %/% 2
- stab <- clValid(df, nClust = 2:kmax, clMethods = clmethods,
- validation = "stability")
- # Display only optimal Scores
- return(stab)
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement