Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- NbCluster_copy <- function (data = NULL, diss = NULL, distance = "euclidean",
- min.nc = 2, max.nc = 15, method = NULL, index = "all", alphaBeale = 0.1)
- {
- x <- 0
- min_nc <- min.nc
- max_nc <- max.nc
- if (is.null(method))
- stop("method is NULL")
- method <- pmatch(method, c("ward.D2", "single", "complete",
- "average", "mcquitty", "median", "centroid", "kmeans",
- "ward.D"))
- indice <- pmatch(index, c("kl", "ch", "hartigan", "ccc",
- "scott", "marriot", "trcovw", "tracew", "friedman",
- "rubin", "cindex", "db", "silhouette", "duda", "pseudot2",
- "beale", "ratkowsky", "ball", "ptbiserial", "gap", "frey",
- "mcclain", "gamma", "gplus", "tau", "dunn", "hubert",
- "sdindex", "dindex", "sdbw", "all", "alllong"))
- if (is.na(indice))
- stop("invalid clustering index")
- if (indice == -1)
- stop("ambiguous index")
- if ((indice == 3) || (indice == 5) || (indice == 6) || (indice ==
- 7) || (indice == 8) || (indice == 9) || (indice == 10) ||
- (indice == 11) || (indice == 18) || (indice == 27) ||
- (indice == 29) || (indice == 31) || (indice == 32)) {
- if ((max.nc - min.nc) < 2)
- stop("The difference between the minimum and the maximum number of clusters must be at least equal to 2")
- }
- if (is.null(data)) {
- if (method == 8) {
- stop("\n", "method = kmeans, data matrix is needed")
- }else {
- if ((indice == 1) || (indice == 2) || (indice ==
- 3) || (indice == 4) || (indice == 5) || (indice ==
- 6) || (indice == 7) || (indice == 8) || (indice ==
- 9) || (indice == 10) || (indice == 12) || (indice ==
- 14) || (indice == 15) || (indice == 16) || (indice ==
- 17) || (indice == 18) || (indice == 19) || (indice ==
- 20) || (indice == 23) || (indice == 24) || (indice ==
- 25) || (indice == 27) || (indice == 28) || (indice ==
- 29) || (indice == 30) || (indice == 31) || (indice ==
- 32))
- stop("\n", "Data matrix is needed. Only frey, mcclain, cindex, sihouette and dunn can be computed.",
- "\n")
- if (is.null(diss))
- stop("data matrix and dissimilarity matrix are both null")
- else cat("\n", "Only frey, mcclain, cindex, sihouette and dunn can be computed. To compute the other indices, data matrix is needed",
- "\n")
- }
- }else {
- jeu1 <- as.matrix(data)
- numberObsBefore <- dim(jeu1)[1]
- jeu <- na.omit(jeu1)
- nn <- numberObsAfter <- dim(jeu)[1]
- pp <- dim(jeu)[2]
- TT <- t(jeu) %*% jeu
- sizeEigenTT <- length(eigen(TT)$value)
- eigenValues <- eigen(TT/(nn - 1))$value
- if (any(indice == 4) || (indice == 5) || (indice ==
- 6) || (indice == 7) || (indice == 8) || (indice ==
- 9) || (indice == 10) || (indice == 31) || (indice ==
- 32)) {
- for (i in 1:sizeEigenTT) {
- if (eigenValues[i] < 0) {
- stop("The TSS matrix is indefinite. There must be too many missing values. The index cannot be calculated.")
- }
- }
- s1 <- sqrt(eigenValues)
- ss <- rep(1, sizeEigenTT)
- for (i in 1:sizeEigenTT) {
- if (s1[i] != 0)
- ss[i] = s1[i]
- }
- vv <- prod(ss)
- }
- }
- if (is.null(distance))
- distanceM <- 7
- if (!is.null(distance))
- distanceM <- pmatch(distance, c("euclidean", "maximum",
- "manhattan", "canberra", "binary", "minkowski"))
- if (is.na(distanceM)) {
- stop("invalid distance")
- }
- if (is.null(diss)) {
- if (distanceM == 1) {
- md <- dist(jeu, method = "euclidean")
- }
- if (distanceM == 2) {
- md <- dist(jeu, method = "maximum")
- }
- if (distanceM == 3) {
- md <- dist(jeu, method = "manhattan")
- }
- if (distanceM == 4) {
- md <- dist(jeu, method = "canberra")
- }
- if (distanceM == 5) {
- md <- dist(jeu, method = "binary")
- }
- if (distanceM == 6) {
- md <- dist(jeu, method = "minkowski")
- }
- if (distanceM == 7) {
- stop("dissimilarity matrix and distance are both NULL")
- }
- }
- if (!is.null(diss)) {
- if ((distanceM == 1) || (distanceM == 2) || (distanceM ==
- 3) || (distanceM == 4) || (distanceM == 5) || (distanceM ==
- 6))
- stop("dissimilarity matrix and distance are both not null")
- else md <- diss
- }
- res <- array(0, c(max_nc - min_nc + 1, 30))
- x_axis <- min_nc:max_nc
- resCritical <- array(0, c(max_nc - min_nc + 1, 4))
- rownames(resCritical) <- min_nc:max_nc
- colnames(resCritical) <- c("CritValue_Duda", "CritValue_PseudoT2",
- "Fvalue_Beale", "CritValue_Gap")
- rownames(res) <- min_nc:max_nc
- colnames(res) <- c("KL", "CH", "Hartigan", "CCC", "Scott",
- "Marriot", "TrCovW", "TraceW", "Friedman", "Rubin",
- "Cindex", "DB", "Silhouette", "Duda", "Pseudot2", "Beale",
- "Ratkowsky", "Ball", "Ptbiserial", "Gap", "Frey", "McClain",
- "Gamma", "Gplus", "Tau", "Dunn", "Hubert", "SDindex",
- "Dindex", "SDbw")
- if (is.na(method))
- stop("invalid clustering method")
- if (method == -1)
- stop("ambiguous method")
- if (method == 1) {
- hc <- hclust(md, method = "ward.D2")
- }
- if (method == 2) {
- hc <- hclust(md, method = "single")
- }
- if (method == 3) {
- hc <- hclust(md, method = "complete")
- }
- if (method == 4) {
- hc <- hclust(md, method = "average")
- }
- if (method == 5) {
- hc <- hclust(md, method = "mcquitty")
- }
- if (method == 6) {
- hc <- hclust(md, method = "median")
- }
- if (method == 7) {
- hc <- hclust(md, method = "centroid")
- }
- if (method == 9) {
- hc <- hclust(md, method = "ward.D")
- }
- centers <- function(cl, x) {
- x <- as.matrix(x)
- n <- length(cl)
- k <- max(cl)
- centers <- matrix(nrow = k, ncol = ncol(x))
- {
- for (i in 1:k) {
- for (j in 1:ncol(x)) {
- centers[i, j] <- mean(x[cl == i, j])
- }
- }
- }
- return(centers)
- }
- Average.scattering <- function(cl, x) {
- x <- as.matrix(x)
- n <- length(cl)
- k <- max(cl)
- centers.matrix <- centers(cl, x)
- cluster.size <- numeric(0)
- variance.clusters <- matrix(0, ncol = ncol(x), nrow = k)
- var <- matrix(0, ncol = ncol(x), nrow = k)
- for (u in 1:k) cluster.size[u] <- sum(cl == u)
- for (u in 1:k) {
- for (j in 1:ncol(x)) {
- for (i in 1:n) {
- if (cl[i] == u)
- variance.clusters[u, j] <- variance.clusters[u,
- j] + (x[i, j] - centers.matrix[u, j])^2
- }
- }
- }
- for (u in 1:k) {
- for (j in 1:ncol(x)) variance.clusters[u, j] = variance.clusters[u,
- j]/cluster.size[u]
- }
- variance.matrix <- numeric(0)
- for (j in 1:ncol(x)) variance.matrix[j] = var(x[, j]) *
- (n - 1)/n
- Somme.variance.clusters <- 0
- for (u in 1:k) Somme.variance.clusters <- Somme.variance.clusters +
- sqrt((variance.clusters[u, ] %*% (variance.clusters[u,
- ])))
- stdev <- (1/k) * sqrt(Somme.variance.clusters)
- scat <- (1/k) * (Somme.variance.clusters/sqrt(variance.matrix %*%
- variance.matrix))
- scat <- list(stdev = stdev, centers = centers.matrix,
- variance.intraclusters = variance.clusters, scatt = scat)
- return(scat)
- }
- density.clusters <- function(cl, x) {
- x <- as.matrix(x)
- k <- max(cl)
- n <- length(cl)
- distance <- matrix(0, ncol = 1, nrow = n)
- density <- matrix(0, ncol = 1, nrow = k)
- centers.matrix <- centers(cl, x)
- stdev <- Average.scattering(cl, x)$stdev
- for (i in 1:n) {
- u = 1
- while (cl[i] != u) u <- u + 1
- for (j in 1:ncol(x)) {
- distance[i] <- distance[i] + (x[i, j] - centers.matrix[u,
- j])^2
- }
- distance[i] <- sqrt(distance[i])
- if (distance[i] <= stdev)
- density[u] = density[u] + 1
- }
- dens <- list(distance = distance, density = density)
- return(dens)
- }
- density.bw <- function(cl, x) {
- x <- as.matrix(x)
- k <- max(cl)
- n <- length(cl)
- centers.matrix <- centers(cl, x)
- stdev <- Average.scattering(cl, x)$stdev
- density.bw <- matrix(0, ncol = k, nrow = k)
- u <- 1
- for (u in 1:k) {
- for (v in 1:k) {
- if (v != u) {
- distance <- matrix(0, ncol = 1, nrow = n)
- moy <- (centers.matrix[u, ] + centers.matrix[v,
- ])/2
- for (i in 1:n) {
- if ((cl[i] == u) || (cl[i] == v)) {
- for (j in 1:ncol(x)) {
- distance[i] <- distance[i] + (x[i, j] -
- moy[j])^2
- }
- distance[i] <- sqrt(distance[i])
- if (distance[i] <= stdev) {
- density.bw[u, v] <- density.bw[u, v] +
- 1
- }
- }
- }
- }
- }
- }
- density.clust <- density.clusters(cl, x)$density
- S <- 0
- for (u in 1:k) for (v in 1:k) {
- if (max(density.clust[u], density.clust[v]) != 0)
- S = S + (density.bw[u, v]/max(density.clust[u],
- density.clust[v]))
- }
- density.bw <- S/(k * (k - 1))
- return(density.bw)
- }
- Dis <- function(cl, x) {
- x <- as.matrix(x)
- k <- max(cl)
- centers.matrix <- centers(cl, x)
- Distance.centers <- dist(centers.matrix)
- Dmin <- min(Distance.centers)
- Dmax <- max(Distance.centers)
- Distance.centers <- as.matrix(Distance.centers)
- s2 <- 0
- for (u in 1:k) {
- s1 = 0
- for (j in 1:ncol(Distance.centers)) {
- s1 <- s1 + Distance.centers[u, j]
- }
- s2 <- s2 + 1/s1
- }
- Dis <- (Dmax/Dmin) * s2
- return(Dis)
- }
- Index.Hubert <- function(x, cl) {
- k <- max(cl)
- n <- dim(x)[1]
- y <- matrix(0, ncol = dim(x)[2], nrow = n)
- P <- as.matrix(md)
- meanP <- mean(P)
- variance.matrix <- numeric(0)
- md <- dist(x, method = "euclidean")
- for (j in 1:n) variance.matrix[j] = var(P[, j]) * (n -
- 1)/n
- varP <- sqrt(variance.matrix %*% variance.matrix)
- centers.clusters <- centers(cl, x)
- for (i in 1:n) {
- for (u in 1:k) {
- if (cl[i] == u)
- y[i, ] <- centers.clusters[u, ]
- }
- }
- Q <- as.matrix(dist(y, method = "euclidean"))
- meanQ <- mean(Q)
- for (j in 1:n) variance.matrix[j] = var(Q[, j]) * (n -
- 1)/n
- varQ <- sqrt(variance.matrix %*% variance.matrix)
- M <- n * (n - 1)/2
- S <- 0
- n1 <- n - 1
- for (i in 1:n1) {
- j <- i + 1
- while (j <= n) {
- S <- S + (P[i, j] - meanP) * (Q[i, j] - meanQ)
- j <- j + 1
- }
- }
- gamma <- S/(M * varP * varQ)
- return(gamma)
- }
- Index.sPlussMoins <- function(cl1, md) {
- cn1 <- max(cl1)
- n1 <- length(cl1)
- dmat <- as.matrix(md)
- average.distance <- median.distance <- separation <- cluster.size <- within.dist1 <- between.dist1 <- numeric(0)
- separation.matrix <- matrix(0, ncol = cn1, nrow = cn1)
- di <- list()
- for (u in 1:cn1) {
- cluster.size[u] <- sum(cl1 == u)
- du <- as.dist(dmat[cl1 == u, cl1 == u])
- within.dist1 <- c(within.dist1, du)
- average.distance[u] <- mean(du)
- median.distance[u] <- median(du)
- bv <- numeric(0)
- for (v in 1:cn1) {
- if (v != u) {
- suv <- dmat[cl1 == u, cl1 == v]
- bv <- c(bv, suv)
- if (u < v) {
- separation.matrix[u, v] <- separation.matrix[v,
- u] <- min(suv)
- between.dist1 <- c(between.dist1, suv)
- }
- }
- }
- }
- nwithin1 <- length(within.dist1)
- nbetween1 <- length(between.dist1)
- meanwithin1 <- mean(within.dist1)
- meanbetween1 <- mean(between.dist1)
- s.plus <- s.moins <- 0
- for (k in 1:nwithin1) {
- s.plus <- s.plus + (colSums(outer(between.dist1,
- within.dist1[k], ">")))
- s.moins <- s.moins + (colSums(outer(between.dist1,
- within.dist1[k], "<")))
- }
- Index.Gamma <- (s.plus - s.moins)/(s.plus + s.moins)
- Index.Gplus <- (2 * s.moins)/(n1 * (n1 - 1))
- t.tau <- (nwithin1 * nbetween1) - (s.plus + s.moins)
- Index.Tau <- (s.plus - s.moins)/(((n1 * (n1 - 1)/2 -
- t.tau) * (n1 * (n1 - 1)/2))^(1/2))
- results <- list(gamma = Index.Gamma, gplus = Index.Gplus,
- tau = Index.Tau)
- return(results)
- }
- Index.15and28 <- function(cl1, cl2, md) {
- cn1 <- max(cl1)
- n1 <- length(cl1)
- dmat <- as.matrix(md)
- average.distance <- median.distance <- separation <- cluster.size <- within.dist1 <- between.dist1 <- numeric(0)
- separation.matrix <- matrix(0, ncol = cn1, nrow = cn1)
- di <- list()
- for (u in 1:cn1) {
- cluster.size[u] <- sum(cl1 == u)
- du <- as.dist(dmat[cl1 == u, cl1 == u])
- within.dist1 <- c(within.dist1, du)
- for (v in 1:cn1) {
- if (v != u) {
- suv <- dmat[cl1 == u, cl1 == v]
- if (u < v) {
- separation.matrix[u, v] <- separation.matrix[v,
- u] <- min(suv)
- between.dist1 <- c(between.dist1, suv)
- }
- }
- }
- }
- cn2 <- max(cl2)
- n2 <- length(cl2)
- dmat <- as.matrix(md)
- average.distance <- median.distance <- separation <- cluster.size <- within.dist2 <- between.dist2 <- numeric(0)
- separation.matrix <- matrix(0, ncol = cn2, nrow = cn2)
- di <- list()
- for (w in 1:cn2) {
- cluster.size[w] <- sum(cl2 == w)
- dw <- as.dist(dmat[cl2 == w, cl2 == w])
- within.dist2 <- c(within.dist2, dw)
- bx <- numeric(0)
- for (x in 1:cn2) {
- if (x != w) {
- swx <- dmat[cl2 == w, cl2 == x]
- bx <- c(bx, swx)
- if (w < x) {
- separation.matrix[w, x] <- separation.matrix[x,
- w] <- min(swx)
- between.dist2 <- c(between.dist2, swx)
- }
- }
- }
- }
- nwithin1 <- length(within.dist1)
- nbetween1 <- length(between.dist1)
- meanwithin1 <- mean(within.dist1)
- meanbetween1 <- mean(between.dist1)
- meanwithin2 <- mean(within.dist2)
- meanbetween2 <- mean(between.dist2)
- Index.15 <- (meanbetween2 - meanbetween1)/(meanwithin2 -
- meanwithin1)
- Index.28 <- (meanwithin1/nwithin1)/(meanbetween1/nbetween1)
- results <- list(frey = Index.15, mcclain = Index.28)
- return(results)
- }
- Indice.ptbiserial <- function(x, md, cl1) {
- nn <- dim(x)[1]
- pp <- dim(x)[2]
- md2 <- as.matrix(md)
- m01 <- array(NA, c(nn, nn))
- nbr <- (nn * (nn - 1))/2
- pb <- array(0, c(nbr, 2))
- m3 <- 1
- for (m1 in 2:nn) {
- m12 <- m1 - 1
- for (m2 in 1:m12) {
- if (cl1[m1] == cl1[m2])
- m01[m1, m2] <- 0
- if (cl1[m1] != cl1[m2])
- m01[m1, m2] <- 1
- pb[m3, 1] <- m01[m1, m2]
- pb[m3, 2] <- md2[m1, m2]
- m3 <- m3 + 1
- }
- }
- y <- pb[, 1]
- x <- pb[, 2]
- biserial.cor <- function(x, y, use = c("all.obs", "complete.obs"),
- level = 1) {
- if (!is.numeric(x))
- stop("'x' must be a numeric variable.\n")
- y <- as.factor(y)
- if (length(levs <- levels(y)) > 2)
- stop("'y' must be a dichotomous variable.\n")
- if (length(x) != length(y))
- stop("'x' and 'y' do not have the same length")
- use <- match.arg(use)
- if (use == "complete.obs") {
- cc.ind <- complete.cases(x, y)
- x <- x[cc.ind]
- y <- y[cc.ind]
- }
- ind <- y == levs[level]
- diff.mu <- mean(x[ind]) - mean(x[!ind])
- prob <- mean(ind)
- diff.mu * sqrt(prob * (1 - prob))/sd(x)
- }
- ptbiserial <- biserial.cor(x = pb[, 2], y = pb[, 1],
- level = 2)
- return(ptbiserial)
- }
- Indices.WKWL <- function(x, cl1 = cl1, cl2 = cl2) {
- dim2 <- dim(x)[2]
- wss <- function(x) {
- x <- as.matrix(x)
- n <- length(x)
- centers <- matrix(nrow = 1, ncol = ncol(x))
- if (ncol(x) == 1) {
- centers[1, ] <- mean(x)
- }
- if (is.null(dim(x))) {
- bb <- matrix(x, byrow = FALSE, nrow = 1, ncol = ncol(x))
- centers[1, ] <- apply(bb, 2, mean)
- }else {
- centers[1, ] <- apply(x, 2, mean)
- }
- x.2 <- sweep(x, 2, centers[1, ], "-")
- withins <- sum(x.2^2)
- wss <- sum(withins)
- return(wss)
- }
- ncg1 <- 1
- ncg1max <- max(cl1)
- while ((sum(cl1 == ncg1) == sum(cl2 == ncg1)) && ncg1 <=
- ncg1max) {
- ncg1 <- ncg1 + 1
- }
- g1 <- ncg1
- ncg2 <- max(cl2)
- nc2g2 <- ncg2 - 1
- while ((sum(cl1 == nc2g2) == sum(cl2 == ncg2)) && nc2g2 >=
- 1) {
- ncg2 <- ncg2 - 1
- nc2g2 <- nc2g2 - 1
- }
- g2 <- ncg2
- NK <- sum(cl2 == g1)
- WK.x <- x[cl2 == g1, ]
- WK <- wss(x = WK.x)
- NL <- sum(cl2 == g2)
- WL.x <- x[cl2 == g2, ]
- WL <- wss(x = WL.x)
- NM <- sum(cl1 == g1)
- WM.x <- x[cl1 == g1, ]
- WM <- wss(x = WM.x)
- duda <- (WK + WL)/WM
- BKL <- WM - WK - WL
- pseudot2 <- BKL/((WK + WL)/(NK + NL - 2))
- beale <- (BKL/(WK + WL))/(((NM - 1)/(NM - 2)) * (2^(2/dim2) -
- 1))
- results <- list(duda = duda, pseudot2 = pseudot2, NM = NM,
- NK = NK, NL = NL, beale = beale)
- return(results)
- }
- Indices.WBT <- function(x, cl, P, s, vv) {
- n <- dim(x)[1]
- pp <- dim(x)[2]
- qq <- max(cl)
- z <- matrix(0, ncol = qq, nrow = n)
- clX <- as.matrix(cl)
- for (i in 1:n) for (j in 1:qq) {
- z[i, j] == 0
- if (clX[i, 1] == j) {
- z[i, j] = 1
- }
- }
- xbar <- solve(t(z) %*% z) %*% t(z) %*% x
- B <- t(xbar) %*% t(z) %*% z %*% xbar
- W <- P - B
- marriot <- (qq^2) * det(W)
- trcovw <- sum(diag(cov(W)))
- tracew <- sum(diag(W))
- if (det(W) != 0)
- scott <- n * log(det(P)/det(W))
- else {
- cat("Error: division by zero!")
- }
- friedman <- sum(diag(solve(W) * B))
- rubin <- sum(diag(P))/sum(diag(W))
- R2 <- 1 - sum(diag(W))/sum(diag(P))
- v1 <- 1
- u <- rep(0, pp)
- c <- (vv/(qq))^(1/pp)
- u <- s/c
- k1 <- sum((u >= 1) == TRUE)
- p1 <- min(k1, qq - 1)
- if (all(p1 > 0, p1 < pp)) {
- for (i in 1:p1) v1 <- v1 * s[i]
- c <- (v1/(qq))^(1/p1)
- u <- s/c
- b1 <- sum(1/(n + u[1:p1]))
- b2 <- sum(u[p1 + 1:pp]^2/(n + u[p1 + 1:pp]), na.rm = TRUE)
- E_R2 <- 1 - ((b1 + b2)/sum(u^2)) * ((n - qq)^2/n) *
- (1 + 4/n)
- ccc <- log((1 - E_R2)/(1 - R2)) * (sqrt(n * p1/2)/((0.001 +
- E_R2)^1.2))
- }else {
- b1 <- sum(1/(n + u))
- E_R2 <- 1 - (b1/sum(u^2)) * ((n - qq)^2/n) * (1 +
- 4/n)
- ccc <- log((1 - E_R2)/(1 - R2)) * (sqrt(n * pp/2)/((0.001 +
- E_R2)^1.2))
- }
- results <- list(ccc = ccc, scott = scott, marriot = marriot,
- trcovw = trcovw, tracew = tracew, friedman = friedman,
- rubin = rubin)
- return(results)
- }
- Indices.Traces <- function(data, d, clall, index = "all") {
- x <- data
- cl0 <- clall[, 1]
- cl1 <- clall[, 2]
- cl2 <- clall[, 3]
- clall <- clall
- nb.cl0 <- table(cl0)
- nb.cl1 <- table(cl1)
- nb.cl2 <- table(cl2)
- nb1.cl0 <- sum(nb.cl0 == 1)
- nb1.cl1 <- sum(nb.cl1 == 1)
- nb1.cl2 <- sum(nb.cl2 == 1)
- gss <- function(x, cl, d) {
- n <- length(cl)
- k <- max(cl)
- centers <- matrix(nrow = k, ncol = ncol(x))
- for (i in 1:k) {
- if (ncol(x) == 1) {
- centers[i, ] <- mean(x[cl == i, ])
- }
- if (is.null(dim(x[cl == i, ]))) {
- bb <- matrix(x[cl == i, ], byrow = FALSE,
- nrow = 1, ncol = ncol(x))
- centers[i, ] <- apply(bb, 2, mean)
- }else {
- centers[i, ] <- apply(x[cl == i, ], 2, mean)
- }
- }
- allmean <- apply(x, 2, mean)
- dmean <- sweep(x, 2, allmean, "-")
- allmeandist <- sum(dmean^2)
- withins <- rep(0, k)
- x.2 <- (x - centers[cl, ])^2
- for (i in 1:k) {
- withins[i] <- sum(x.2[cl == i, ])
- }
- wgss <- sum(withins)
- bgss <- allmeandist - wgss
- results <- list(wgss = wgss, bgss = bgss, centers = centers)
- return(results)
- }
- vargss <- function(x, clsize, varwithins) {
- nvar <- dim(x)[2]
- n <- sum(clsize)
- k <- length(clsize)
- varallmean <- rep(0, nvar)
- varallmeandist <- rep(0, nvar)
- varwgss <- rep(0, nvar)
- for (l in 1:nvar) varallmean[l] <- mean(x[, l])
- vardmean <- sweep(x, 2, varallmean, "-")
- for (l in 1:nvar) {
- varallmeandist[l] <- sum((vardmean[, l])^2)
- varwgss[l] <- sum(varwithins[, l])
- }
- varbgss <- varallmeandist - varwgss
- vartss <- varbgss + varwgss
- zvargss <- list(vartss = vartss, varbgss = varbgss)
- return(zvargss)
- }
- varwithinss <- function(x, centers, cluster) {
- nrow <- dim(centers)[1]
- nvar <- dim(x)[2]
- varwithins <- matrix(0, nrow, nvar)
- x <- (x - centers[cluster, ])^2
- for (l in 1:nvar) {
- for (k in 1:nrow) {
- varwithins[k, l] <- sum(x[cluster == k, l])
- }
- }
- return(varwithins)
- }
- indice.kl <- function(x, clall, d = NULL, centrotypes = "centroids") {
- if (nb1.cl1 > 0) {
- KL <- NA
- }
- if (sum(c("centroids", "medoids") == centrotypes) ==
- 0)
- stop("Wrong centrotypes argument")
- if ("medoids" == centrotypes && is.null(d))
- stop("For argument centrotypes = 'medoids' d cannot be null")
- if (!is.null(d)) {
- if (!is.matrix(d)) {
- d <- as.matrix(d)
- }
- row.names(d) <- row.names(x)
- }
- m <- ncol(x)
- g <- k <- max(clall[, 2])
- KL <- abs((g - 1)^(2/m) * gss(x, clall[, 1], d)$wgss -
- g^(2/m) * gss(x, clall[, 2], d)$wgss)/abs((g)^(2/m) *
- gss(x, clall[, 2], d)$wgss - (g + 1)^(2/m) *
- gss(x, clall[, 3], d)$wgss)
- return(KL)
- }
- indice.ch <- function(x, cl, d = NULL, centrotypes = "centroids") {
- if (nb1.cl1 > 0) {
- CH <- NA
- }
- if (sum(c("centroids", "medoids") == centrotypes) ==
- 0)
- stop("Wrong centrotypes argument")
- if ("medoids" == centrotypes && is.null(d))
- stop("For argument centrotypes = 'medoids' d cannot be null")
- if (!is.null(d)) {
- if (!is.matrix(d)) {
- d <- as.matrix(d)
- }
- row.names(d) <- row.names(x)
- }
- n <- length(cl)
- k <- max(cl)
- CH <- (gss(x, cl, d)$bgss/(k - 1))/(gss(x, cl, d)$wgss/(n -
- k))
- return(CH)
- }
- indice.hart <- function(x, clall, d = NULL, centrotypes = "centroids") {
- if (sum(c("centroids", "medoids") == centrotypes) ==
- 0)
- stop("Wrong centrotypes argument")
- if ("medoids" == centrotypes && is.null(d))
- stop("For argument centrotypes = 'medoids' d cannot be null")
- if (!is.null(d)) {
- if (!is.matrix(d)) {
- d <- as.matrix(d)
- }
- row.names(d) <- row.names(x)
- }
- n <- nrow(x)
- g <- max(clall[, 1])
- HART <- (gss(x, clall[, 2], d)$wgss/gss(x, clall[,
- 3], d)$wgss - 1) * (n - g - 1)
- return(HART)
- }
- indice.ball <- function(x, cl, d = NULL, centrotypes = "centroids") {
- wgssB <- gss(x, cl, d)$wgss
- qq <- max(cl)
- ball <- wgssB/qq
- return(ball)
- }
- indice.ratkowsky <- function(x, cl, d, centrotypes = "centroids") {
- qq <- max(cl)
- clsize <- table(cl)
- centers <- gss(x, cl, d)$centers
- varwithins <- varwithinss(x, centers, cl)
- zvargss <- vargss(x, clsize, varwithins)
- ratio <- mean(sqrt(zvargss$varbgss/zvargss$vartss))
- ratkowsky <- ratio/sqrt(qq)
- return(ratkowsky)
- }
- indice <- pmatch(index, c("kl", "ch", "hart", "ratkowsky",
- "ball", "all"))
- if (is.na(indice))
- stop("invalid clustering index")
- if (indice == -1)
- stop("ambiguous index")
- vecallindex <- numeric(5)
- if (any(indice == 1) || (indice == 6))
- vecallindex[1] <- indice.kl(x, clall, d)
- if (any(indice == 2) || (indice == 6))
- vecallindex[2] <- indice.ch(x, cl = clall[, 2],
- d)
- if (any(indice == 3) || (indice == 6))
- vecallindex[3] <- indice.hart(x, clall, d)
- if (any(indice == 4) || (indice == 6))
- vecallindex[4] <- indice.ratkowsky(x, cl = cl1,
- d)
- if (any(indice == 5) || (indice == 6))
- vecallindex[5] <- indice.ball(x, cl = cl1, d)
- names(vecallindex) <- c("kl", "ch", "hart", "ratkowsky",
- "ball")
- if (indice < 6)
- vecallindex <- vecallindex[indice]
- return(vecallindex)
- }
- Indice.cindex <- function(d, cl) {
- d <- data.matrix(d)
- DU <- 0
- r <- 0
- v_max <- array(1, max(cl))
- v_min <- array(1, max(cl))
- for (i in 1:max(cl)) {
- n <- sum(cl == i)
- if (n > 1) {
- t <- d[cl == i, cl == i]
- DU = DU + sum(t)/2
- v_max[i] = max(t)
- if (sum(t == 0) == n)
- v_min[i] <- min(t[t != 0])
- else v_min[i] <- 0
- r <- r + n * (n - 1)/2
- }
- }
- Dmin = min(v_min)
- Dmax = max(v_max)
- if (Dmin == Dmax)
- result <- NA
- else result <- (DU - r * Dmin)/(Dmax * r - Dmin * r)
- result
- }
- Indice.DB <- function(x, cl, d = NULL, centrotypes = "centroids",
- p = 2, q = 2) {
- if (sum(c("centroids") == centrotypes) == 0)
- stop("Wrong centrotypes argument")
- if (!is.null(d)) {
- if (!is.matrix(d)) {
- d <- as.matrix(d)
- }
- row.names(d) <- row.names(x)
- }
- if (is.null(dim(x))) {
- dim(x) <- c(length(x), 1)
- }
- x <- as.matrix(x)
- n <- length(cl)
- k <- max(cl)
- dAm <- d
- centers <- matrix(nrow = k, ncol = ncol(x))
- if (centrotypes == "centroids") {
- for (i in 1:k) {
- for (j in 1:ncol(x)) {
- centers[i, j] <- mean(x[cl == i, j])
- }
- }
- }else {
- stop("wrong centrotypes argument")
- }
- S <- rep(0, k)
- for (i in 1:k) {
- ind <- (cl == i)
- if (sum(ind) > 1) {
- centerI <- centers[i, ]
- centerI <- rep(centerI, sum(ind))
- centerI <- matrix(centerI, nrow = sum(ind),
- ncol = ncol(x), byrow = TRUE)
- S[i] <- mean(sqrt(apply((x[ind, ] - centerI)^2,
- 1, sum))^q)^(1/q)
- }else S[i] <- 0
- }
- M <- as.matrix(dist(centers, p = p))
- R <- array(Inf, c(k, k))
- r = rep(0, k)
- for (i in 1:k) {
- for (j in 1:k) {
- R[i, j] = (S[i] + S[j])/M[i, j]
- }
- r[i] = max(R[i, ][is.finite(R[i, ])])
- }
- DB = mean(r[is.finite(r)])
- resul <- list(DB = DB, r = r, R = R, d = M, S = S, centers = centers)
- resul
- }
- Indice.S <- function(d, cl) {
- d <- as.matrix(d)
- Si <- 0
- for (k in 1:max(cl)) {
- if ((sum(cl == k)) <= 1)
- Sil <- 1
- else {
- Sil <- 0
- for (i in 1:length(cl)) {
- if (cl[i] == k) {
- ai <- sum(d[i, cl == k])/(sum(cl == k) -
- 1)
- dips <- NULL
- for (j in 1:max(cl)) if (cl[i] != j)
- if (sum(cl == j) != 1)
- dips <- cbind(dips, c((sum(d[i, cl ==
- j]))/(sum(cl == j))))
- else dips <- cbind(dips, c((sum(d[i, cl ==
- j]))))
- bi <- min(dips)
- Sil <- Sil + (bi - ai)/max(c(ai, bi))
- }
- }
- }
- Si <- Si + Sil
- }
- Si/length(cl)
- }
- Indice.Gap <- function(x, clall, reference.distribution = "unif",
- B = 10, method = "ward.D2", d = NULL, centrotypes = "centroids") {
- GAP <- function(X, cl, referenceDistribution, B, method,
- d, centrotypes) {
- set.seed(1)
- simgap <- function(Xvec) {
- ma <- max(Xvec)
- mi <- min(Xvec)
- set.seed(1)
- Xout <- runif(length(Xvec), min = mi, max = ma)
- return(Xout)
- }
- pcsim <- function(X, d, centrotypes) {
- if (centrotypes == "centroids") {
- Xmm <- apply(X, 2, mean)
- }
- for (k in (1:dim(X)[2])) {
- X[, k] <- X[, k] - Xmm[k]
- }
- ss <- svd(X)
- Xs <- X %*% ss$v
- Xnew <- apply(Xs, 2, simgap)
- Xt <- Xnew %*% t(ss$v)
- for (k in (1:dim(X)[2])) {
- Xt[, k] <- Xt[, k] + Xmm[k]
- }
- return(Xt)
- }
- if (is.null(dim(x))) {
- dim(x) <- c(length(x), 1)
- }
- ClassNr <- max(cl)
- Wk0 <- 0
- WkB <- matrix(0, 1, B)
- for (bb in (1:B)) {
- if (reference.distribution == "unif")
- Xnew <- apply(X, 2, simgap)
- else if (reference.distribution == "pc")
- Xnew <- pcsim(X, d, centrotypes)
- else stop("Wrong reference distribution type")
- if (bb == 1) {
- pp <- cl
- if (ClassNr == length(cl))
- pp2 <- 1:ClassNr
- else if (method == "k-means") {
- set.seed(1)
- pp2 <- kmeans(Xnew, ClassNr, 100)$cluster
- }else if (method == "single" || method == "complete" ||
- method == "average" || method == "ward.D2" ||
- method == "mcquitty" || method == "median" ||
- method == "centroid" || method == "ward.D")
- pp2 <- cutree(hclust(dist(Xnew), method = method),
- ClassNr)
- else stop("Wrong clustering method")
- if (ClassNr > 1) {
- for (zz in (1:ClassNr)) {
- Xuse <- X[pp == zz, ]
- Wk0 <- Wk0 + sum(diag(var(Xuse))) * (length(pp[pp ==
- zz]) - 1)/(dim(X)[1] - ClassNr)
- Xuse2 <- Xnew[pp2 == zz, ]
- WkB[1, bb] <- WkB[1, bb] + sum(diag(var(Xuse2))) *
- (length(pp2[pp2 == zz]) - 1)/(dim(X)[1] -
- ClassNr)
- }
- }
- if (ClassNr == 1) {
- Wk0 <- sum(diag(var(X)))
- WkB[1, bb] <- sum(diag(var(Xnew)))
- }
- }
- if (bb > 1) {
- if (ClassNr == length(cl))
- pp2 <- 1:ClassNr
- else if (method == "k-means") {
- set.seed(1)
- pp2 <- kmeans(Xnew, ClassNr, 100)$cluster
- }else if (method == "single" || method == "complete" ||
- method == "average" || method == "ward.D2" ||
- method == "mcquitty" || method == "median" ||
- method == "centroid" || method == "ward.D")
- pp2 <- cutree(hclust(dist(Xnew), method = method),
- ClassNr)
- else stop("Wrong clustering method")
- if (ClassNr > 1) {
- for (zz in (1:ClassNr)) {
- Xuse2 <- Xnew[pp2 == zz, ]
- WkB[1, bb] <- WkB[1, bb] + sum(diag(var(Xuse2))) *
- length(pp2[pp2 == zz])/(dim(X)[1] -
- ClassNr)
- }
- }
- if (ClassNr == 1) {
- WkB[1, bb] <- sum(diag(var(Xnew)))
- }
- }
- }
- Sgap <- mean(log(WkB[1, ])) - log(Wk0)
- Sdgap <- sqrt(1 + 1/B) * sqrt(var(log(WkB[1, ]))) *
- sqrt((B - 1)/B)
- resul <- list(Sgap = Sgap, Sdgap = Sdgap)
- resul
- }
- if (sum(c("centroids", "medoids") == centrotypes) ==
- 0)
- stop("Wrong centrotypes argument")
- if ("medoids" == centrotypes && is.null(d))
- stop("For argument centrotypes = 'medoids' d can not be null")
- if (!is.null(d)) {
- if (!is.matrix(d)) {
- d <- as.matrix(d)
- }
- row.names(d) <- row.names(x)
- }
- X <- as.matrix(x)
- gap1 <- GAP(X, clall[, 1], reference.distribution, B,
- method, d, centrotypes)
- gap <- gap1$Sgap
- gap2 <- GAP(X, clall[, 2], reference.distribution, B,
- method, d, centrotypes)
- diffu <- gap - (gap2$Sgap - gap2$Sdgap)
- resul <- list(gap = gap, diffu = diffu)
- resul
- }
- Index.sdindex <- function(x, clmax, cl) {
- x <- as.matrix(x)
- Alpha <- Dis(clmax, x)
- Scatt <- Average.scattering(cl, x)$scatt
- Dis0 <- Dis(cl, x)
- SD.indice <- Alpha * Scatt + Dis0
- return(SD.indice)
- }
- Index.SDbw <- function(x, cl) {
- x <- as.matrix(x)
- Scatt <- Average.scattering(cl, x)$scatt
- Dens.bw <- density.bw(cl, x)
- SDbw <- Scatt + Dens.bw
- return(SDbw)
- }
- Index.Dindex <- function(cl, x) {
- x <- as.matrix(x)
- distance <- density.clusters(cl, x)$distance
- n <- length(distance)
- S <- 0
- for (i in 1:n) S <- S + distance[i]
- inertieIntra <- S/n
- return(inertieIntra)
- }
- Index.dunn <- function(md, clusters, Data = NULL, method = "euclidean") {
- distance <- as.matrix(md)
- nc <- max(clusters)
- interClust <- matrix(NA, nc, nc)
- intraClust <- rep(NA, nc)
- for (i in 1:nc) {
- c1 <- which(clusters == i)
- for (j in i:nc) {
- if (j == i)
- intraClust[i] <- max(distance[c1, c1])
- if (j > i) {
- c2 <- which(clusters == j)
- interClust[i, j] <- min(distance[c1, c2])
- }
- }
- }
- dunn <- min(interClust, na.rm = TRUE)/max(intraClust)
- return(dunn)
- }
- for (nc in min_nc:max_nc) {
- if (any(method == 1) || (method == 2) || (method ==
- 3) || (method == 4) || (method == 5) || (method ==
- 6) || (method == 7) || (method == 9)) {
- cl1 <- cutree(hc, k = nc)
- cl2 <- cutree(hc, k = nc + 1)
- clall <- cbind(cl1, cl2)
- clmax <- cutree(hc, k = max_nc)
- if (nc >= 2) {
- cl0 <- cutree(hc, k = nc - 1)
- clall1 <- cbind(cl0, cl1, cl2)
- }
- if (nc == 1) {
- cl0 <- rep(NA, nn)
- clall1 <- cbind(cl0, cl1, cl2)
- }
- }
- if (method == 8) {
- set.seed(1)
- cl2 <- kmeans(jeu, nc + 1)$cluster
- set.seed(1)
- clmax <- kmeans(jeu, max_nc)$cluster
- if (nc > 2) {
- set.seed(1)
- cl1 <- kmeans(jeu, nc)$cluster
- clall <- cbind(cl1, cl2)
- set.seed(1)
- cl0 <- kmeans(jeu, nc - 1)$cluster
- clall1 <- cbind(cl0, cl1, cl2)
- }
- if (nc == 2) {
- set.seed(1)
- cl1 <- kmeans(jeu, nc)$cluster
- clall <- cbind(cl1, cl2)
- cl0 <- rep(1, nn)
- clall1 <- cbind(cl0, cl1, cl2)
- }
- if (nc == 1) {
- stop("Number of clusters must be higher than 2")
- }
- }
- j <- table(cl1)
- s <- sum(j == 1)
- j2 <- table(cl2)
- s2 <- sum(j2 == 1)
- if (any(indice == 3) || (indice == 31) || (indice ==
- 32)) {
- res[nc - min_nc + 1, 3] <- Indices.Traces(jeu, md,
- clall1, index = "hart")
- }
- if (any(indice == 4) || (indice == 31) || (indice ==
- 32)) {
- res[nc - min_nc + 1, 4] <- Indices.WBT(x = jeu,
- cl = cl1, P = TT, s = ss, vv = vv)$ccc
- }
- if (any(indice == 5) || (indice == 31) || (indice ==
- 32)) {
- res[nc - min_nc + 1, 5] <- Indices.WBT(x = jeu,
- cl = cl1, P = TT, s = ss, vv = vv)$scott
- }
- if (any(indice == 6) || (indice == 31) || (indice ==
- 32)) {
- res[nc - min_nc + 1, 6] <- Indices.WBT(x = jeu,
- cl = cl1, P = TT, s = ss, vv = vv)$marriot
- }
- if (any(indice == 7) || (indice == 31) || (indice ==
- 32)) {
- res[nc - min_nc + 1, 7] <- Indices.WBT(x = jeu,
- cl = cl1, P = TT, s = ss, vv = vv)$trcovw
- }
- if (any(indice == 8) || (indice == 31) || (indice ==
- 32)) {
- res[nc - min_nc + 1, 8] <- Indices.WBT(x = jeu,
- cl = cl1, P = TT, s = ss, vv = vv)$tracew
- }
- if (any(indice == 9) || (indice == 31) || (indice ==
- 32)) {
- res[nc - min_nc + 1, 9] <- Indices.WBT(x = jeu,
- cl = cl1, P = TT, s = ss, vv = vv)$friedman
- }
- if (any(indice == 10) || (indice == 31) || (indice ==
- 32)) {
- res[nc - min_nc + 1, 10] <- Indices.WBT(x = jeu,
- cl = cl1, P = TT, s = ss, vv = vv)$rubin
- }
- if (any(indice == 14) || (indice == 31) || (indice ==
- 32)) {
- res[nc - min_nc + 1, 14] <- Indices.WKWL(x = jeu,
- cl1 = cl1, cl2 = cl2)$duda
- }
- if (any(indice == 15) || (indice == 31) || (indice ==
- 32)) {
- res[nc - min_nc + 1, 15] <- Indices.WKWL(x = jeu,
- cl1 = cl1, cl2 = cl2)$pseudot2
- }
- if (any(indice == 16) || (indice == 31) || (indice ==
- 32)) {
- res[nc - min_nc + 1, 16] <- beale <- Indices.WKWL(x = jeu,
- cl1 = cl1, cl2 = cl2)$beale
- }
- if (any(indice == 14) || (indice == 15) || (indice ==
- 16) || (indice == 31) || (indice == 32)) {
- NM <- Indices.WKWL(x = jeu, cl1 = cl1, cl2 = cl2)$NM
- NK <- Indices.WKWL(x = jeu, cl1 = cl1, cl2 = cl2)$NK
- NL <- Indices.WKWL(x = jeu, cl1 = cl1, cl2 = cl2)$NL
- zz <- 3.2
- zzz <- zz * sqrt(2 * (1 - 8/((pi^2) * pp))/(NM *
- pp))
- if (any(indice == 14) || (indice == 31) || (indice ==
- 32)) {
- resCritical[nc - min_nc + 1, 1] <- critValue <- 1 -
- (2/(pi * pp)) - zzz
- }
- if ((indice == 15) || (indice == 31) || (indice ==
- 32)) {
- critValue <- 1 - (2/(pi * pp)) - zzz
- resCritical[nc - min_nc + 1, 2] <- ((1 - critValue)/critValue) *
- (NK + NL - 2)
- }
- if (any(indice == 16) || (indice == 31) || (indice ==
- 32)) {
- df2 <- (NM - 2) * pp
- resCritical[nc - min_nc + 1, 3] <- 1 - pf(beale,
- pp, df2)
- }
- }
- if (any(indice == 18) || (indice == 31) || (indice ==
- 32)) {
- res[nc - min_nc + 1, 18] <- Indices.Traces(jeu,
- md, clall1, index = "ball")
- }
- if (any(indice == 19) || (indice == 31) || (indice ==
- 32)) {
- res[nc - min_nc + 1, 19] <- Indice.ptbiserial(x = jeu,
- md = md, cl1 = cl1)
- }
- if (any(indice == 20) || (indice == 32)) {
- if (method == 1) {
- if(exists("Indice.Gap")){
- print("Indice.Gap exists")
- }
- debug(fun = "Indice.Gap")
- resultSGAP <- Indice.Gap(x = jeu, clall = clall,
- reference.distribution = "unif", B = 10, method = "ward.D2",
- d = NULL, centrotypes = "centroids")
- }
- if (method == 2) {
- resultSGAP <- Indice.Gap(x = jeu, clall = clall,
- reference.distribution = "unif", B = 10, method = "single",
- d = NULL, centrotypes = "centroids")
- }
- if (method == 3) {
- resultSGAP <- Indice.Gap(x = jeu, clall = clall,
- reference.distribution = "unif", B = 10, method = "complete",
- d = NULL, centrotypes = "centroids")
- }
- if (method == 4) {
- resultSGAP <- Indice.Gap(x = jeu, clall = clall,
- reference.distribution = "unif", B = 10, method = "average",
- d = NULL, centrotypes = "centroids")
- }
- if (method == 5) {
- resultSGAP <- Indice.Gap(x = jeu, clall = clall,
- reference.distribution = "unif", B = 10, method = "mcquitty",
- d = NULL, centrotypes = "centroids")
- }
- if (method == 6) {
- resultSGAP <- Indice.Gap(x = jeu, clall = clall,
- reference.distribution = "unif", B = 10, method = "median",
- d = NULL, centrotypes = "centroids")
- }
- if (method == 7) {
- resultSGAP <- Indice.Gap(x = jeu, clall = clall,
- reference.distribution = "unif", B = 10, method = "centroid",
- d = NULL, centrotypes = "centroids")
- }
- if (method == 9) {
- resultSGAP <- Indice.Gap(x = jeu, clall = clall,
- reference.distribution = "unif", B = 10, method = "ward.D",
- d = NULL, centrotypes = "centroids")
- }
- if (method == 8) {
- resultSGAP <- Indice.Gap(x = jeu, clall = clall,
- reference.distribution = "unif", B = 10, method = "k-means",
- d = NULL, centrotypes = "centroids")
- }
- res[nc - min_nc + 1, 20] <- resultSGAP$gap
- resCritical[nc - min_nc + 1, 4] <- resultSGAP$diffu
- }
- if (nc >= 2) {
- if (any(indice == 1) || (indice == 31) || (indice ==
- 32)) {
- res[nc - min_nc + 1, 1] <- Indices.Traces(jeu,
- md, clall1, index = "kl")
- }
- if (any(indice == 2) || (indice == 31) || (indice ==
- 32)) {
- res[nc - min_nc + 1, 2] <- Indices.Traces(jeu,
- md, clall1, index = "ch")
- }
- if (any(indice == 11) || (indice == 31) || (indice ==
- 32)) {
- res[nc - min_nc + 1, 11] <- Indice.cindex(d = md,
- cl = cl1)
- }
- if (any(indice == 12) || (indice == 31) || (indice ==
- 32)) {
- res[nc - min_nc + 1, 12] <- Indice.DB(x = jeu,
- cl = cl1, d = NULL, centrotypes = "centroids",
- p = 2, q = 2)$DB
- }
- if (any(indice == 13) || (indice == 31) || (indice ==
- 32)) {
- res[nc - min_nc + 1, 13] <- Indice.S(d = md,
- cl = cl1)
- }
- if (any(indice == 17) || (indice == 31) || (indice ==
- 32)) {
- res[nc - min_nc + 1, 17] <- Indices.Traces(jeu,
- md, clall1, index = "ratkowsky")
- }
- if (any(indice == 21) || (indice == 31) || (indice ==
- 32)) {
- res[nc - min_nc + 1, 21] <- Index.15and28(cl1 = cl1,
- cl2 = cl2, md = md)$frey
- }
- if (any(indice == 22) || (indice == 31) || (indice ==
- 32)) {
- res[nc - min_nc + 1, 22] <- Index.15and28(cl1 = cl1,
- cl2 = cl2, md = md)$mcclain
- }
- if (any(indice == 23) || (indice == 32)) {
- res[nc - min_nc + 1, 23] <- Index.sPlussMoins(cl1 = cl1,
- md = md)$gamma
- }
- if (any(indice == 24) || (indice == 32)) {
- res[nc - min_nc + 1, 24] <- Index.sPlussMoins(cl1 = cl1,
- md = md)$gplus
- }
- if (any(indice == 25) || (indice == 32)) {
- res[nc - min_nc + 1, 25] <- Index.sPlussMoins(cl1 = cl1,
- md = md)$tau
- }
- if (any(indice == 26) || (indice == 31) || (indice ==
- 32)) {
- res[nc - min_nc + 1, 26] <- Index.dunn(md, cl1,
- Data = jeu, method = NULL)
- }
- if (any(indice == 27) || (indice == 31) || (indice ==
- 32)) {
- res[nc - min_nc + 1, 27] <- Index.Hubert(jeu,
- cl1)
- }
- if (any(indice == 28) || (indice == 31) || (indice ==
- 32)) {
- res[nc - min_nc + 1, 28] <- Index.sdindex(jeu,
- clmax, cl1)
- }
- if (any(indice == 29) || (indice == 31) || (indice ==
- 32)) {
- res[nc - min_nc + 1, 29] <- Index.Dindex(cl1,
- jeu)
- }
- if (any(indice == 30) || (indice == 31) || (indice ==
- 32)) {
- res[nc - min_nc + 1, 30] <- Index.SDbw(jeu,
- cl1)
- }
- }else {
- res[nc - min_nc + 1, 1] <- NA
- res[nc - min_nc + 1, 2] <- NA
- res[nc - min_nc + 1, 11] <- NA
- res[nc - min_nc + 1, 12] <- NA
- res[nc - min_nc + 1, 13] <- NA
- res[nc - min_nc + 1, 17] <- NA
- res[nc - min_nc + 1, 21] <- NA
- res[nc - min_nc + 1, 22] <- NA
- res[nc - min_nc + 1, 23] <- NA
- res[nc - min_nc + 1, 24] <- NA
- res[nc - min_nc + 1, 25] <- NA
- res[nc - min_nc + 1, 26] <- NA
- res[nc - min_nc + 1, 27] <- NA
- res[nc - min_nc + 1, 28] <- NA
- res[nc - min_nc + 1, 29] <- NA
- res[nc - min_nc + 1, 30] <- NA
- }
- }
- nc.KL <- indice.KL <- 0
- if (any(indice == 1) || (indice == 31) || (indice == 32)) {
- nc.KL <- (min_nc:max_nc)[which.max(res[, 1])]
- indice.KL <- max(res[, 1], na.rm = TRUE)
- best.nc <- nc.KL
- }
- nc.CH <- indice.CH <- 0
- if (any(indice == 2) || (indice == 31) || (indice == 32)) {
- nc.CH <- (min_nc:max_nc)[which.max(res[, 2])]
- indice.CH <- max(res[, 2], na.rm = TRUE)
- best.nc <- nc.CH
- }
- nc.CCC <- indice.CCC <- 0
- if (any(indice == 4) || (indice == 31) || (indice == 32)) {
- nc.CCC <- (min_nc:max_nc)[which.max(res[, 4])]
- indice.CCC <- max(res[, 4], na.rm = TRUE)
- best.nc <- nc.CCC
- }
- nc.DB <- indice.DB <- 0
- if (any(indice == 12) || (indice == 31) || (indice == 32)) {
- nc.DB <- (min_nc:max_nc)[which.min(res[, 12])]
- indice.DB <- min(res[, 12], na.rm = TRUE)
- best.nc <- nc.DB
- }
- nc.Silhouette <- indice.Silhouette <- 0
- if (any(indice == 13) || (indice == 31) || (indice == 32)) {
- nc.Silhouette <- (min_nc:max_nc)[which.max(res[, 13])]
- indice.Silhouette <- max(res[, 13], na.rm = TRUE)
- best.nc <- nc.Silhouette
- }
- nc.Gap <- indice.Gap <- 0
- if (any(indice == 20) || (indice == 32)) {
- found <- FALSE
- for (ncG in min_nc:max_nc) {
- if ((resCritical[ncG - min_nc + 1, 4] >= 0) && (!found)) {
- ncGap <- ncG
- indiceGap <- res[ncG - min_nc + 1, 20]
- found <- TRUE
- }
- }
- if (found) {
- nc.Gap <- ncGap
- indice.Gap <- indiceGap
- best.nc <- nc.Gap
- }else {
- nc.Gap <- NA
- indice.Gap <- NA
- }
- }
- nc.Duda <- indice.Duda <- 0
- if (any(indice == 14) || (indice == 31) || (indice == 32)) {
- foundDuda <- FALSE
- for (ncD in min_nc:max_nc) {
- if ((res[ncD - min_nc + 1, 14] >= resCritical[ncD -
- min_nc + 1, 1]) && (!foundDuda)) {
- ncDuda <- ncD
- indiceDuda <- res[ncD - min_nc + 1, 14]
- foundDuda <- TRUE
- }
- }
- if (foundDuda) {
- nc.Duda <- ncDuda
- indice.Duda <- indiceDuda
- best.nc <- nc.Duda
- }else {
- nc.Duda <- NA
- indice.Duda <- NA
- }
- }
- nc.Pseudo <- indice.Pseudo <- 0
- if (any(indice == 15) || (indice == 31) || (indice == 32)) {
- foundPseudo <- FALSE
- for (ncP in min_nc:max_nc) {
- if ((res[ncP - min_nc + 1, 15] <= resCritical[ncP -
- min_nc + 1, 2]) && (!foundPseudo)) {
- ncPseudo <- ncP
- indicePseudo <- res[ncP - min_nc + 1, 15]
- foundPseudo <- TRUE
- }
- }
- if (foundPseudo) {
- nc.Pseudo <- ncPseudo
- indice.Pseudo <- indicePseudo
- best.nc <- nc.Pseudo
- }else {
- nc.Pseudo <- NA
- indice.Pseudo <- NA
- }
- }
- nc.Beale <- indice.Beale <- 0
- if (any(indice == 16) || (indice == 31) || (indice == 32)) {
- foundBeale <- FALSE
- for (ncB in min_nc:max_nc) {
- if ((resCritical[ncB - min_nc + 1, 3] >= alphaBeale) &&
- (!foundBeale)) {
- ncBeale <- ncB
- indiceBeale <- res[ncB - min_nc + 1, 16]
- foundBeale <- TRUE
- }
- }
- if (foundBeale) {
- nc.Beale <- ncBeale
- indice.Beale <- indiceBeale
- best.nc <- nc.Beale
- }else {
- nc.Beale <- NA
- indice.Beale <- NA
- }
- }
- nc.ptbiserial <- indice.ptbiserial <- 0
- if (any(indice == 19) || (indice == 31) || (indice == 32)) {
- nc.ptbiserial <- (min_nc:max_nc)[which.max(res[, 19])]
- indice.ptbiserial <- max(res[, 19], na.rm = TRUE)
- best.nc <- nc.ptbiserial
- }
- foundNC <- foundIndice <- numeric(0)
- nc.Frey <- indice.Frey <- 0
- if (any(indice == 21) || (indice == 31) || (indice == 32)) {
- foundFrey <- FALSE
- i <- 1
- for (ncF in min_nc:max_nc) {
- if (res[ncF - min_nc + 1, 21] < 1) {
- ncFrey <- ncF - 1
- indiceFrey <- res[ncF - 1 - min_nc + 1, 21]
- foundFrey <- TRUE
- foundNC[i] <- ncFrey
- foundIndice[i] <- indiceFrey
- i <- i + 1
- }
- }
- if (foundFrey) {
- nc.Frey <- foundNC[1]
- indice.Frey <- foundIndice[1]
- best.nc <- nc.Frey
- }else {
- nc.Frey <- NA
- indice.Frey <- NA
- print(paste("Frey index : No clustering structure in this data set"))
- }
- }
- nc.McClain <- indice.McClain <- 0
- if (any(indice == 22) || (indice == 31) || (indice == 32)) {
- nc.McClain <- (min_nc:max_nc)[which.min(res[, 22])]
- indice.McClain <- min(res[, 22], na.rm = TRUE)
- best.nc <- nc.McClain
- }
- nc.Gamma <- indice.Gamma <- 0
- if (any(indice == 23) || (indice == 31) || (indice == 32)) {
- nc.Gamma <- (min_nc:max_nc)[which.max(res[, 23])]
- indice.Gamma <- max(res[, 23], na.rm = TRUE)
- best.nc <- nc.Gamma
- }
- nc.Gplus <- indice.Gplus <- 0
- if (any(indice == 24) || (indice == 31) || (indice == 32)) {
- nc.Gplus <- (min_nc:max_nc)[which.min(res[, 24])]
- indice.Gplus <- min(res[, 24], na.rm = TRUE)
- best.nc <- nc.Gplus
- }
- nc.Tau <- indice.Tau <- 0
- if (any(indice == 25) || (indice == 31) || (indice == 32)) {
- nc.Tau <- (min_nc:max_nc)[which.max(res[, 25])]
- indice.Tau <- max(res[, 25], na.rm = TRUE)
- best.nc <- nc.Tau
- }
- if ((indice == 3) || (indice == 5) || (indice == 6) || (indice ==
- 7) || (indice == 8) || (indice == 9) || (indice == 10) ||
- (indice == 18) || (indice == 27) || (indice == 11) ||
- (indice == 29) || (indice == 31) || (indice == 32)) {
- DiffLev <- array(0, c(max_nc - min_nc + 1, 12))
- DiffLev[, 1] <- min_nc:max_nc
- for (nc3 in min_nc:max_nc) {
- if (nc3 == min_nc) {
- DiffLev[nc3 - min_nc + 1, 2] <- abs(res[nc3 -
- min_nc + 1, 3] - NA)
- DiffLev[nc3 - min_nc + 1, 3] <- abs(res[nc3 -
- min_nc + 1, 5] - NA)
- DiffLev[nc3 - min_nc + 1, 4] <- abs(res[nc3 -
- min_nc + 1, 6] - NA)
- DiffLev[nc3 - min_nc + 1, 5] <- abs(res[nc3 -
- min_nc + 1, 7] - NA)
- DiffLev[nc3 - min_nc + 1, 6] <- abs(res[nc3 -
- min_nc + 1, 8] - NA)
- DiffLev[nc3 - min_nc + 1, 7] <- abs(res[nc3 -
- min_nc + 1, 9] - NA)
- DiffLev[nc3 - min_nc + 1, 8] <- abs(res[nc3 -
- min_nc + 1, 10] - NA)
- DiffLev[nc3 - min_nc + 1, 9] <- abs(res[nc3 -
- min_nc + 1, 18] - NA)
- DiffLev[nc3 - min_nc + 1, 10] <- abs(res[nc3 -
- min_nc + 1, 27] - NA)
- DiffLev[nc3 - min_nc + 1, 12] <- abs(res[nc3 -
- min_nc + 1, 29] - NA)
- }else {
- if (nc3 == max_nc) {
- DiffLev[nc3 - min_nc + 1, 2] <- abs(res[nc3 -
- min_nc + 1, 3] - res[nc3 - min_nc, 3])
- DiffLev[nc3 - min_nc + 1, 3] <- abs(res[nc3 -
- min_nc + 1, 5] - res[nc3 - min_nc, 5])
- DiffLev[nc3 - min_nc + 1, 4] <- abs(res[nc3 -
- min_nc + 1, 6] - NA)
- DiffLev[nc3 - min_nc + 1, 5] <- abs(res[nc3 -
- min_nc + 1, 7] - res[nc3 - min_nc, 7])
- DiffLev[nc3 - min_nc + 1, 6] <- abs(res[nc3 -
- min_nc + 1, 8] - NA)
- DiffLev[nc3 - min_nc + 1, 7] <- abs(res[nc3 -
- min_nc + 1, 9] - res[nc3 - min_nc, 9])
- DiffLev[nc3 - min_nc + 1, 8] <- abs(res[nc3 -
- min_nc + 1, 10] - NA)
- DiffLev[nc3 - min_nc + 1, 9] <- abs(res[nc3 -
- min_nc + 1, 18] - res[nc3 - min_nc, 18])
- DiffLev[nc3 - min_nc + 1, 10] <- abs(res[nc3 -
- min_nc + 1, 27] - NA)
- DiffLev[nc3 - min_nc + 1, 12] <- abs(res[nc3 -
- min_nc + 1, 29] - NA)
- }else {
- DiffLev[nc3 - min_nc + 1, 2] <- abs(res[nc3 -
- min_nc + 1, 3] - res[nc3 - min_nc, 3])
- DiffLev[nc3 - min_nc + 1, 3] <- abs(res[nc3 -
- min_nc + 1, 5] - res[nc3 - min_nc, 5])
- DiffLev[nc3 - min_nc + 1, 4] <- ((res[nc3 -
- min_nc + 2, 6] - res[nc3 - min_nc + 1, 6]) -
- (res[nc3 - min_nc + 1, 6] - res[nc3 - min_nc,
- 6]))
- DiffLev[nc3 - min_nc + 1, 5] <- abs(res[nc3 -
- min_nc + 1, 7] - res[nc3 - min_nc, 7])
- DiffLev[nc3 - min_nc + 1, 6] <- ((res[nc3 -
- min_nc + 2, 8] - res[nc3 - min_nc + 1, 8]) -
- (res[nc3 - min_nc + 1, 8] - res[nc3 - min_nc,
- 8]))
- DiffLev[nc3 - min_nc + 1, 7] <- abs(res[nc3 -
- min_nc + 1, 9] - res[nc3 - min_nc, 9])
- DiffLev[nc3 - min_nc + 1, 8] <- ((res[nc3 -
- min_nc + 2, 10] - res[nc3 - min_nc + 1,
- 10]) - (res[nc3 - min_nc + 1, 10] - res[nc3 -
- min_nc, 10]))
- DiffLev[nc3 - min_nc + 1, 9] <- abs(res[nc3 -
- min_nc + 1, 18] - res[nc3 - min_nc, 18])
- DiffLev[nc3 - min_nc + 1, 10] <- abs((res[nc3 -
- min_nc + 1, 27] - res[nc3 - min_nc, 27]))
- DiffLev[nc3 - min_nc + 1, 12] <- ((res[nc3 -
- min_nc + 2, 29] - res[nc3 - min_nc + 1,
- 29]) - (res[nc3 - min_nc + 1, 29] - res[nc3 -
- min_nc, 29]))
- }
- }
- }
- }
- nc.Hartigan <- indice.Hartigan <- 0
- if (any(indice == 3) || (indice == 31) || (indice == 32)) {
- nc.Hartigan <- DiffLev[, 1][which.max(DiffLev[, 2])]
- indice.Hartigan <- max(DiffLev[, 2], na.rm = TRUE)
- best.nc <- nc.Hartigan
- }
- nc.Ratkowsky <- indice.Ratkowsky <- 0
- if (any(indice == 17) || (indice == 31) || (indice == 32)) {
- nc.Ratkowsky <- (min_nc:max_nc)[which.max(res[, 17])]
- indice.Ratkowsky <- max(res[, 17], na.rm = TRUE)
- best.nc <- nc.Ratkowsky
- }
- nc.cindex <- indice.cindex <- 0
- if (any(indice == 11) || (indice == 31) || (indice == 32)) {
- nc.cindex <- (min_nc:max_nc)[which.min(res[, 11])]
- indice.cindex <- min(res[, 11], na.rm = TRUE)
- best.nc <- nc.cindex
- }
- nc.Scott <- indice.Scott <- 0
- if (any(indice == 5) || (indice == 31) || (indice == 32)) {
- nc.Scott <- DiffLev[, 1][which.max(DiffLev[, 3])]
- indice.Scott <- max(DiffLev[, 3], na.rm = TRUE)
- best.nc <- nc.Scott
- }
- nc.Marriot <- indice.Marriot <- 0
- if (any(indice == 6) || (indice == 31) || (indice == 32)) {
- nc.Marriot <- DiffLev[, 1][which.max(DiffLev[, 4])]
- round(nc.Marriot, digits = 1)
- indice.Marriot <- max(DiffLev[, 4], na.rm = TRUE)
- best.nc <- nc.Marriot
- }
- nc.TrCovW <- indice.TrCovW <- 0
- if (any(indice == 7) || (indice == 31) || (indice == 32)) {
- nc.TrCovW <- DiffLev[, 1][which.max(DiffLev[, 5])]
- indice.TrCovW <- max(DiffLev[, 5], na.rm = TRUE)
- best.nc <- nc.TrCovW
- }
- nc.TraceW <- indice.TraceW <- 0
- if (any(indice == 8) || (indice == 31) || (indice == 32)) {
- nc.TraceW <- DiffLev[, 1][which.max(DiffLev[, 6])]
- indice.TraceW <- max(DiffLev[, 6], na.rm = TRUE)
- best.nc <- nc.TraceW
- }
- nc.Friedman <- indice.Friedman <- 0
- if (any(indice == 9) || (indice == 31) || (indice == 32)) {
- nc.Friedman <- DiffLev[, 1][which.max(DiffLev[, 7])]
- indice.Friedman <- max(DiffLev[, 7], na.rm = TRUE)
- best.nc <- nc.Friedman
- }
- nc.Rubin <- indice.Rubin <- 0
- if (any(indice == 10) || (indice == 31) || (indice == 32)) {
- nc.Rubin <- DiffLev[, 1][which.min(DiffLev[, 8])]
- indice.Rubin <- min(DiffLev[, 8], na.rm = TRUE)
- best.nc <- nc.Rubin
- }
- nc.Ball <- indice.Ball <- 0
- if (any(indice == 18) || (indice == 31) || (indice == 32)) {
- nc.Ball <- DiffLev[, 1][which.max(DiffLev[, 9])]
- indice.Ball <- max(DiffLev[, 9], na.rm = TRUE)
- best.nc <- nc.Ball
- }
- nc.Dunn <- indice.Dunn <- 0
- if (any(indice == 26) || (indice == 31) || (indice == 32)) {
- nc.Dunn <- (min_nc:max_nc)[which.max(res[, 26])]
- indice.Dunn <- max(res[, 26], na.rm = TRUE)
- best.nc <- nc.Dunn
- }
- nc.Hubert <- indice.Hubert <- 0
- if (any(indice == 27) || (indice == 31) || (indice == 32)) {
- nc.Hubert <- 0
- indice.Hubert <- 0
- par(mfrow = c(1, 2))
- plot(x_axis, res[, 27], tck = 0, type = "b", col = "red",
- xlab = expression(paste("Number of clusters ")),
- ylab = expression(paste("Hubert Statistic values")))
- plot(DiffLev[, 1], DiffLev[, 10], tck = 0, type = "b",
- col = "blue", xlab = expression(paste("Number of clusters ")),
- ylab = expression(paste("Hubert statistic second differences")))
- cat(paste("*** : The Hubert index is a graphical method of determining the number of clusters.\n In the plot of Hubert index, we seek a significant knee that corresponds to a \n significant increase of the value of the measure i.e the significant peak in Hubert\n index second differences plot.",
- "\n", "\n"))
- }
- nc.sdindex <- indice.sdindex <- 0
- if (any(indice == 28) || (indice == 31) || (indice == 32)) {
- nc.sdindex <- (min_nc:max_nc)[which.min(res[, 28])]
- indice.sdindex <- min(res[, 28], na.rm = TRUE)
- best.nc <- nc.sdindex
- }
- nc.Dindex <- indice.Dindex <- 0
- if (any(indice == 29) || (indice == 31) || (indice == 32)) {
- nc.Dindex <- 0
- indice.Dindex <- 0
- par(mfrow = c(1, 2))
- plot(x_axis, res[, 29], tck = 0, type = "b", col = "red",
- xlab = expression(paste("Number of clusters ")),
- ylab = expression(paste("Dindex Values")))
- plot(DiffLev[, 1], DiffLev[, 12], tck = 0, type = "b",
- col = "blue", xlab = expression(paste("Number of clusters ")),
- ylab = expression(paste("Second differences Dindex Values")))
- cat(paste("*** : The D index is a graphical method of determining the number of clusters. \n In the plot of D index, we seek a significant knee (the significant peak in Dindex\n second differences plot) that corresponds to a significant increase of the value of\n the measure.",
- "\n", "\n"))
- }
- nc.SDbw <- indice.SDbw <- 0
- if (any(indice == 30) || (indice == 31) || (indice == 32)) {
- nc.SDbw <- (min_nc:max_nc)[which.min(res[, 30])]
- indice.SDbw <- min(res[, 30], na.rm = TRUE)
- best.nc <- nc.SDbw
- }
- if (indice < 31) {
- res <- res[, c(indice)]
- if (indice == 14) {
- resCritical <- resCritical[, 1]
- }
- if (indice == 15) {
- resCritical <- resCritical[, 2]
- }
- if (indice == 16) {
- resCritical <- resCritical[, 3]
- }
- if (indice == 20) {
- resCritical <- resCritical[, 4]
- }
- }
- if (indice == 31) {
- res <- res[, c(1:19, 21:22, 26:30)]
- resCritical <- resCritical[, c(1:3)]
- }
- if (any(indice == 20) || (indice == 23) || (indice == 24) ||
- (indice == 25) || (indice == 32)) {
- results <- c(nc.KL, indice.KL, nc.CH, indice.CH, nc.Hartigan,
- indice.Hartigan, nc.CCC, indice.CCC, nc.Scott, indice.Scott,
- nc.Marriot, indice.Marriot, nc.TrCovW, indice.TrCovW,
- nc.TraceW, indice.TraceW, nc.Friedman, indice.Friedman,
- nc.Rubin, indice.Rubin, nc.cindex, indice.cindex,
- nc.DB, indice.DB, nc.Silhouette, indice.Silhouette,
- nc.Duda, indice.Duda, nc.Pseudo, indice.Pseudo,
- nc.Beale, indice.Beale, nc.Ratkowsky, indice.Ratkowsky,
- nc.Ball, indice.Ball, nc.ptbiserial, indice.ptbiserial,
- nc.Gap, indice.Gap, nc.Frey, indice.Frey, nc.McClain,
- indice.McClain, nc.Gamma, indice.Gamma, nc.Gplus,
- indice.Gplus, nc.Tau, indice.Tau, nc.Dunn, indice.Dunn,
- nc.Hubert, indice.Hubert, nc.sdindex, indice.sdindex,
- nc.Dindex, indice.Dindex, nc.SDbw, indice.SDbw)
- results1 <- matrix(c(results), nrow = 2, ncol = 30)
- resultats <- matrix(c(results), nrow = 2, ncol = 30,
- dimnames = list(c("Number_clusters", "Value_Index"),
- c("KL", "CH", "Hartigan", "CCC", "Scott", "Marriot",
- "TrCovW", "TraceW", "Friedman", "Rubin", "Cindex",
- "DB", "Silhouette", "Duda", "PseudoT2", "Beale",
- "Ratkowsky", "Ball", "PtBiserial", "Gap",
- "Frey", "McClain", "Gamma", "Gplus", "Tau",
- "Dunn", "Hubert", "SDindex", "Dindex", "SDbw")))
- }else {
- results <- c(nc.KL, indice.KL, nc.CH, indice.CH, nc.Hartigan,
- indice.Hartigan, nc.CCC, indice.CCC, nc.Scott, indice.Scott,
- nc.Marriot, indice.Marriot, nc.TrCovW, indice.TrCovW,
- nc.TraceW, indice.TraceW, nc.Friedman, indice.Friedman,
- nc.Rubin, indice.Rubin, nc.cindex, indice.cindex,
- nc.DB, indice.DB, nc.Silhouette, indice.Silhouette,
- nc.Duda, indice.Duda, nc.Pseudo, indice.Pseudo,
- nc.Beale, indice.Beale, nc.Ratkowsky, indice.Ratkowsky,
- nc.Ball, indice.Ball, nc.ptbiserial, indice.ptbiserial,
- nc.Frey, indice.Frey, nc.McClain, indice.McClain,
- nc.Dunn, indice.Dunn, nc.Hubert, indice.Hubert,
- nc.sdindex, indice.sdindex, nc.Dindex, indice.Dindex,
- nc.SDbw, indice.SDbw)
- results1 <- matrix(c(results), nrow = 2, ncol = 26)
- resultats <- matrix(c(results), nrow = 2, ncol = 26,
- dimnames = list(c("Number_clusters", "Value_Index"),
- c("KL", "CH", "Hartigan", "CCC", "Scott", "Marriot",
- "TrCovW", "TraceW", "Friedman", "Rubin", "Cindex",
- "DB", "Silhouette", "Duda", "PseudoT2", "Beale",
- "Ratkowsky", "Ball", "PtBiserial", "Frey",
- "McClain", "Dunn", "Hubert", "SDindex", "Dindex",
- "SDbw")))
- }
- if (any(indice <= 20) || (indice == 23) || (indice == 24) ||
- (indice == 25)) {
- resultats <- resultats[, c(indice)]
- }
- if (any(indice == 21) || (indice == 22)) {
- indice3 <- indice - 1
- resultats <- resultats[, c(indice3)]
- }
- if (any(indice == 26) || (indice == 27) || (indice == 28) ||
- (indice == 29) || (indice == 30)) {
- indice4 <- indice - 4
- resultats <- resultats[, c(indice4)]
- }
- resultats <- round(resultats, digits = 4)
- res <- round(res, digits = 4)
- resCritical <- round(resCritical, digits = 4)
- if (any(indice == 31) || (indice == 32)) {
- cat("*******************************************************************",
- "\n")
- cat("* Among all indices: ",
- "\n")
- BestCluster <- results1[1, ]
- c = 0
- for (i in min.nc:max.nc) {
- vect <- which(BestCluster == i)
- if (length(vect) > 0)
- cat("*", length(vect), "proposed", i, "as the best number of clusters",
- "\n")
- if (c < length(vect)) {
- j = i
- c <- length(vect)
- }
- }
- cat("\n", " ***** Conclusion ***** ",
- "\n", "\n")
- cat("* According to the majority rule, the best number of clusters is ",
- j, "\n", "\n", "\n")
- cat("*******************************************************************",
- "\n")
- if (any(method == 1) || (method == 2) || (method ==
- 3) || (method == 4) || (method == 5) || (method ==
- 6) || (method == 7) || (method == 9))
- partition <- cutree(hc, k = j)
- else {
- set.seed(1)
- partition <- kmeans(jeu, j)$cluster
- }
- }
- if (any(indice == 1) || (indice == 2) || (indice == 3) ||
- (indice == 4) || (indice == 5) || (indice == 6) || (indice ==
- 7) || (indice == 8) || (indice == 9) || (indice == 10) ||
- (indice == 11) || (indice == 12) || (indice == 13) ||
- (indice == 14) || (indice == 15) || (indice == 16) ||
- (indice == 17) || (indice == 18) || (indice == 19) ||
- (indice == 20) || (indice == 21) || (indice == 22) ||
- (indice == 23) || (indice == 24) || (indice == 25) ||
- (indice == 26) || (indice == 28) || (indice == 30)) {
- if (any(method == 1) || (method == 2) || (method ==
- 3) || (method == 4) || (method == 5) || (method ==
- 6) || (method == 7) || (method == 9))
- partition <- cutree(hc, k = best.nc)
- else {
- set.seed(1)
- partition <- kmeans(jeu, best.nc)$cluster
- }
- }
- if ((indice == 14) || (indice == 15) || (indice == 16) ||
- (indice == 20) || (indice == 31) || (indice == 32)) {
- results.final <- list(All.index = res, All.CriticalValues = resCritical,
- Best.nc = resultats, Best.partition = partition)
- }
- if ((indice == 27) || (indice == 29))
- results.final <- list(All.index = res)
- if (any(indice == 1) || (indice == 2) || (indice == 3) ||
- (indice == 4) || (indice == 5) || (indice == 6) || (indice ==
- 7) || (indice == 8) || (indice == 9) || (indice == 10) ||
- (indice == 11) || (indice == 12) || (indice == 13) ||
- (indice == 17) || (indice == 18) || (indice == 19) ||
- (indice == 21) || (indice == 22) || (indice == 23) ||
- (indice == 24) || (indice == 25) || (indice == 26) ||
- (indice == 28) || (indice == 30))
- results.final <- list(All.index = res, Best.nc = resultats,
- Best.partition = partition)
- return(results.final)
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement