Untitled

OptimalClustering <- function(distance, df, FUN = kmeans, kmax = NULL, dist_order = TRUE, seed = 1789){
	set.seed(seed)
	if (is.null(kmax)) kmax <- nrow(df) %/% 2
	p_dist <- fviz_dist(distance, gradient = list(low = "steelblue",  high = "white"), order = dist_order)
	p_wss <- fviz_nbclust(df, FUN, method = "wss", k.max = kmax)
	p_sil <- fviz_nbclust(df, FUN, method = "silhouette", k.max = kmax)
	gap_stat <- clusGap(df, FUN = FUN, nstart = 25, K.max = kmax, B = 50)
	p_gap <- fviz_gap_stat(gap_stat)
	lay <-  rbind(c(1, 2), c(3, 4))
	gridExtra::grid.arrange(grobs = list(p_dist, p_wss, p_sil, p_gap), layout_matrix = lay)
	if (identical(FUN,kmeans)){
		cl_Methods <- "kmeans"
	} else if (identical(FUN,hcut)){
		cl_Methods <- "hierarchical"
	}
	stab <- clValid(df, nClust = 2:kmax, clMethods = cl_Methods,
	                validation = "stability")
	summary(stab)
	print("Optimal Number of Cluster under One Std Rule")
	sd_measures <- apply(stab@measures, 1, sd)
	min_measures <- apply(stab@measures, 1, min)
	one_sd <- t(sapply(seq_along(sd_measures), function(i) min_measures[i] + sd_measures[i] > stab@measures[i, ,]))
	optim_df <- data.frame(measure = stab@measNames, cluster = colnames(stab@measures)[apply(one_sd, 1, which.max)])
	print(optim_df)
}

kmeansOptimal <- function(df, optik = 2, seed = 1789, do_plot = TRUE){
	set.seed(seed)
	clustering_result <- kmeans(df, centers = optik, nstart = 25)
	pkm <- fviz_cluster(clustering_result, geom = "text",  data = df) + theme_cowplot()
	if (do_plot) plot(pkm)
	return(clustering_result)
}

hcutOptimal <- function(df, optik = 2, do_plot = TRUE){
	clustering_result <- eclust(df, "hclust", k = optik, graph = FALSE)
	c_phc <- fviz_cluster(clustering_result, geom = "text",  data = df) + theme_cowplot()
	q <- ggplot_build(c_phc)
	k_cols <- q$data[[2]]$colour
	k_cols <- k_cols[unique(clustering_result$cluster[clustering_result$order])]
	phc <- fviz_dend(clustering_result, rect = TRUE, show_labels = TRUE, k = optik, k_colors = k_cols) + theme_cowplot()
	if (do_plot)
		plot(phc)
	return(clustering_result)
}

clustCompare <- function(df, kmax = NULL, clmethods = c("hierarchical","kmeans"), seed = 1789){
	set.seed(seed)
	if (is.null(kmax)) kmax <- nrow(df) %/% 2
	stab <- clValid(df, nClust = 2:kmax, clMethods = clmethods,
	                validation = "stability")
	# Display only optimal Scores
	return(stab)
}