Advertisement
Guest User

Untitled

a guest
Sep 6th, 2019
327
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 76.64 KB | None | 0 0
  1. NbCluster_copy <- function (data = NULL, diss = NULL, distance = "euclidean",
  2. min.nc = 2, max.nc = 15, method = NULL, index = "all", alphaBeale = 0.1)
  3. {
  4. x <- 0
  5. min_nc <- min.nc
  6. max_nc <- max.nc
  7. if (is.null(method))
  8. stop("method is NULL")
  9. method <- pmatch(method, c("ward.D2", "single", "complete",
  10. "average", "mcquitty", "median", "centroid", "kmeans",
  11. "ward.D"))
  12. indice <- pmatch(index, c("kl", "ch", "hartigan", "ccc",
  13. "scott", "marriot", "trcovw", "tracew", "friedman",
  14. "rubin", "cindex", "db", "silhouette", "duda", "pseudot2",
  15. "beale", "ratkowsky", "ball", "ptbiserial", "gap", "frey",
  16. "mcclain", "gamma", "gplus", "tau", "dunn", "hubert",
  17. "sdindex", "dindex", "sdbw", "all", "alllong"))
  18. if (is.na(indice))
  19. stop("invalid clustering index")
  20. if (indice == -1)
  21. stop("ambiguous index")
  22. if ((indice == 3) || (indice == 5) || (indice == 6) || (indice ==
  23. 7) || (indice == 8) || (indice == 9) || (indice == 10) ||
  24. (indice == 11) || (indice == 18) || (indice == 27) ||
  25. (indice == 29) || (indice == 31) || (indice == 32)) {
  26. if ((max.nc - min.nc) < 2)
  27. stop("The difference between the minimum and the maximum number of clusters must be at least equal to 2")
  28. }
  29. if (is.null(data)) {
  30. if (method == 8) {
  31. stop("\n", "method = kmeans, data matrix is needed")
  32. }else {
  33. if ((indice == 1) || (indice == 2) || (indice ==
  34. 3) || (indice == 4) || (indice == 5) || (indice ==
  35. 6) || (indice == 7) || (indice == 8) || (indice ==
  36. 9) || (indice == 10) || (indice == 12) || (indice ==
  37. 14) || (indice == 15) || (indice == 16) || (indice ==
  38. 17) || (indice == 18) || (indice == 19) || (indice ==
  39. 20) || (indice == 23) || (indice == 24) || (indice ==
  40. 25) || (indice == 27) || (indice == 28) || (indice ==
  41. 29) || (indice == 30) || (indice == 31) || (indice ==
  42. 32))
  43. stop("\n", "Data matrix is needed. Only frey, mcclain, cindex, sihouette and dunn can be computed.",
  44. "\n")
  45. if (is.null(diss))
  46. stop("data matrix and dissimilarity matrix are both null")
  47. else cat("\n", "Only frey, mcclain, cindex, sihouette and dunn can be computed. To compute the other indices, data matrix is needed",
  48. "\n")
  49. }
  50. }else {
  51. jeu1 <- as.matrix(data)
  52. numberObsBefore <- dim(jeu1)[1]
  53. jeu <- na.omit(jeu1)
  54. nn <- numberObsAfter <- dim(jeu)[1]
  55. pp <- dim(jeu)[2]
  56. TT <- t(jeu) %*% jeu
  57. sizeEigenTT <- length(eigen(TT)$value)
  58. eigenValues <- eigen(TT/(nn - 1))$value
  59. if (any(indice == 4) || (indice == 5) || (indice ==
  60. 6) || (indice == 7) || (indice == 8) || (indice ==
  61. 9) || (indice == 10) || (indice == 31) || (indice ==
  62. 32)) {
  63. for (i in 1:sizeEigenTT) {
  64. if (eigenValues[i] < 0) {
  65. stop("The TSS matrix is indefinite. There must be too many missing values. The index cannot be calculated.")
  66. }
  67. }
  68. s1 <- sqrt(eigenValues)
  69. ss <- rep(1, sizeEigenTT)
  70. for (i in 1:sizeEigenTT) {
  71. if (s1[i] != 0)
  72. ss[i] = s1[i]
  73. }
  74. vv <- prod(ss)
  75. }
  76. }
  77. if (is.null(distance))
  78. distanceM <- 7
  79. if (!is.null(distance))
  80. distanceM <- pmatch(distance, c("euclidean", "maximum",
  81. "manhattan", "canberra", "binary", "minkowski"))
  82. if (is.na(distanceM)) {
  83. stop("invalid distance")
  84. }
  85. if (is.null(diss)) {
  86. if (distanceM == 1) {
  87. md <- dist(jeu, method = "euclidean")
  88. }
  89. if (distanceM == 2) {
  90. md <- dist(jeu, method = "maximum")
  91. }
  92. if (distanceM == 3) {
  93. md <- dist(jeu, method = "manhattan")
  94. }
  95. if (distanceM == 4) {
  96. md <- dist(jeu, method = "canberra")
  97. }
  98. if (distanceM == 5) {
  99. md <- dist(jeu, method = "binary")
  100. }
  101. if (distanceM == 6) {
  102. md <- dist(jeu, method = "minkowski")
  103. }
  104. if (distanceM == 7) {
  105. stop("dissimilarity matrix and distance are both NULL")
  106. }
  107. }
  108. if (!is.null(diss)) {
  109. if ((distanceM == 1) || (distanceM == 2) || (distanceM ==
  110. 3) || (distanceM == 4) || (distanceM == 5) || (distanceM ==
  111. 6))
  112. stop("dissimilarity matrix and distance are both not null")
  113. else md <- diss
  114. }
  115. res <- array(0, c(max_nc - min_nc + 1, 30))
  116. x_axis <- min_nc:max_nc
  117. resCritical <- array(0, c(max_nc - min_nc + 1, 4))
  118. rownames(resCritical) <- min_nc:max_nc
  119. colnames(resCritical) <- c("CritValue_Duda", "CritValue_PseudoT2",
  120. "Fvalue_Beale", "CritValue_Gap")
  121. rownames(res) <- min_nc:max_nc
  122. colnames(res) <- c("KL", "CH", "Hartigan", "CCC", "Scott",
  123. "Marriot", "TrCovW", "TraceW", "Friedman", "Rubin",
  124. "Cindex", "DB", "Silhouette", "Duda", "Pseudot2", "Beale",
  125. "Ratkowsky", "Ball", "Ptbiserial", "Gap", "Frey", "McClain",
  126. "Gamma", "Gplus", "Tau", "Dunn", "Hubert", "SDindex",
  127. "Dindex", "SDbw")
  128. if (is.na(method))
  129. stop("invalid clustering method")
  130. if (method == -1)
  131. stop("ambiguous method")
  132. if (method == 1) {
  133. hc <- hclust(md, method = "ward.D2")
  134. }
  135. if (method == 2) {
  136. hc <- hclust(md, method = "single")
  137. }
  138. if (method == 3) {
  139. hc <- hclust(md, method = "complete")
  140. }
  141. if (method == 4) {
  142. hc <- hclust(md, method = "average")
  143. }
  144. if (method == 5) {
  145. hc <- hclust(md, method = "mcquitty")
  146. }
  147. if (method == 6) {
  148. hc <- hclust(md, method = "median")
  149. }
  150. if (method == 7) {
  151. hc <- hclust(md, method = "centroid")
  152. }
  153. if (method == 9) {
  154. hc <- hclust(md, method = "ward.D")
  155. }
  156. centers <- function(cl, x) {
  157. x <- as.matrix(x)
  158. n <- length(cl)
  159. k <- max(cl)
  160. centers <- matrix(nrow = k, ncol = ncol(x))
  161. {
  162. for (i in 1:k) {
  163. for (j in 1:ncol(x)) {
  164. centers[i, j] <- mean(x[cl == i, j])
  165. }
  166. }
  167. }
  168. return(centers)
  169. }
  170. Average.scattering <- function(cl, x) {
  171. x <- as.matrix(x)
  172. n <- length(cl)
  173. k <- max(cl)
  174. centers.matrix <- centers(cl, x)
  175. cluster.size <- numeric(0)
  176. variance.clusters <- matrix(0, ncol = ncol(x), nrow = k)
  177. var <- matrix(0, ncol = ncol(x), nrow = k)
  178. for (u in 1:k) cluster.size[u] <- sum(cl == u)
  179. for (u in 1:k) {
  180. for (j in 1:ncol(x)) {
  181. for (i in 1:n) {
  182. if (cl[i] == u)
  183. variance.clusters[u, j] <- variance.clusters[u,
  184. j] + (x[i, j] - centers.matrix[u, j])^2
  185. }
  186. }
  187. }
  188. for (u in 1:k) {
  189. for (j in 1:ncol(x)) variance.clusters[u, j] = variance.clusters[u,
  190. j]/cluster.size[u]
  191. }
  192. variance.matrix <- numeric(0)
  193. for (j in 1:ncol(x)) variance.matrix[j] = var(x[, j]) *
  194. (n - 1)/n
  195. Somme.variance.clusters <- 0
  196. for (u in 1:k) Somme.variance.clusters <- Somme.variance.clusters +
  197. sqrt((variance.clusters[u, ] %*% (variance.clusters[u,
  198. ])))
  199. stdev <- (1/k) * sqrt(Somme.variance.clusters)
  200. scat <- (1/k) * (Somme.variance.clusters/sqrt(variance.matrix %*%
  201. variance.matrix))
  202. scat <- list(stdev = stdev, centers = centers.matrix,
  203. variance.intraclusters = variance.clusters, scatt = scat)
  204. return(scat)
  205. }
  206. density.clusters <- function(cl, x) {
  207. x <- as.matrix(x)
  208. k <- max(cl)
  209. n <- length(cl)
  210. distance <- matrix(0, ncol = 1, nrow = n)
  211. density <- matrix(0, ncol = 1, nrow = k)
  212. centers.matrix <- centers(cl, x)
  213. stdev <- Average.scattering(cl, x)$stdev
  214. for (i in 1:n) {
  215. u = 1
  216. while (cl[i] != u) u <- u + 1
  217. for (j in 1:ncol(x)) {
  218. distance[i] <- distance[i] + (x[i, j] - centers.matrix[u,
  219. j])^2
  220. }
  221. distance[i] <- sqrt(distance[i])
  222. if (distance[i] <= stdev)
  223. density[u] = density[u] + 1
  224. }
  225. dens <- list(distance = distance, density = density)
  226. return(dens)
  227. }
  228. density.bw <- function(cl, x) {
  229. x <- as.matrix(x)
  230. k <- max(cl)
  231. n <- length(cl)
  232. centers.matrix <- centers(cl, x)
  233. stdev <- Average.scattering(cl, x)$stdev
  234. density.bw <- matrix(0, ncol = k, nrow = k)
  235. u <- 1
  236. for (u in 1:k) {
  237. for (v in 1:k) {
  238. if (v != u) {
  239. distance <- matrix(0, ncol = 1, nrow = n)
  240. moy <- (centers.matrix[u, ] + centers.matrix[v,
  241. ])/2
  242. for (i in 1:n) {
  243. if ((cl[i] == u) || (cl[i] == v)) {
  244. for (j in 1:ncol(x)) {
  245. distance[i] <- distance[i] + (x[i, j] -
  246. moy[j])^2
  247. }
  248. distance[i] <- sqrt(distance[i])
  249. if (distance[i] <= stdev) {
  250. density.bw[u, v] <- density.bw[u, v] +
  251. 1
  252. }
  253. }
  254. }
  255. }
  256. }
  257. }
  258. density.clust <- density.clusters(cl, x)$density
  259. S <- 0
  260. for (u in 1:k) for (v in 1:k) {
  261. if (max(density.clust[u], density.clust[v]) != 0)
  262. S = S + (density.bw[u, v]/max(density.clust[u],
  263. density.clust[v]))
  264. }
  265. density.bw <- S/(k * (k - 1))
  266. return(density.bw)
  267. }
  268. Dis <- function(cl, x) {
  269. x <- as.matrix(x)
  270. k <- max(cl)
  271. centers.matrix <- centers(cl, x)
  272. Distance.centers <- dist(centers.matrix)
  273. Dmin <- min(Distance.centers)
  274. Dmax <- max(Distance.centers)
  275. Distance.centers <- as.matrix(Distance.centers)
  276. s2 <- 0
  277. for (u in 1:k) {
  278. s1 = 0
  279. for (j in 1:ncol(Distance.centers)) {
  280. s1 <- s1 + Distance.centers[u, j]
  281. }
  282. s2 <- s2 + 1/s1
  283. }
  284. Dis <- (Dmax/Dmin) * s2
  285. return(Dis)
  286. }
  287. Index.Hubert <- function(x, cl) {
  288. k <- max(cl)
  289. n <- dim(x)[1]
  290. y <- matrix(0, ncol = dim(x)[2], nrow = n)
  291. P <- as.matrix(md)
  292. meanP <- mean(P)
  293. variance.matrix <- numeric(0)
  294. md <- dist(x, method = "euclidean")
  295. for (j in 1:n) variance.matrix[j] = var(P[, j]) * (n -
  296. 1)/n
  297. varP <- sqrt(variance.matrix %*% variance.matrix)
  298. centers.clusters <- centers(cl, x)
  299. for (i in 1:n) {
  300. for (u in 1:k) {
  301. if (cl[i] == u)
  302. y[i, ] <- centers.clusters[u, ]
  303. }
  304. }
  305. Q <- as.matrix(dist(y, method = "euclidean"))
  306. meanQ <- mean(Q)
  307. for (j in 1:n) variance.matrix[j] = var(Q[, j]) * (n -
  308. 1)/n
  309. varQ <- sqrt(variance.matrix %*% variance.matrix)
  310. M <- n * (n - 1)/2
  311. S <- 0
  312. n1 <- n - 1
  313. for (i in 1:n1) {
  314. j <- i + 1
  315. while (j <= n) {
  316. S <- S + (P[i, j] - meanP) * (Q[i, j] - meanQ)
  317. j <- j + 1
  318. }
  319. }
  320. gamma <- S/(M * varP * varQ)
  321. return(gamma)
  322. }
  323. Index.sPlussMoins <- function(cl1, md) {
  324. cn1 <- max(cl1)
  325. n1 <- length(cl1)
  326. dmat <- as.matrix(md)
  327. average.distance <- median.distance <- separation <- cluster.size <- within.dist1 <- between.dist1 <- numeric(0)
  328. separation.matrix <- matrix(0, ncol = cn1, nrow = cn1)
  329. di <- list()
  330. for (u in 1:cn1) {
  331. cluster.size[u] <- sum(cl1 == u)
  332. du <- as.dist(dmat[cl1 == u, cl1 == u])
  333. within.dist1 <- c(within.dist1, du)
  334. average.distance[u] <- mean(du)
  335. median.distance[u] <- median(du)
  336. bv <- numeric(0)
  337. for (v in 1:cn1) {
  338. if (v != u) {
  339. suv <- dmat[cl1 == u, cl1 == v]
  340. bv <- c(bv, suv)
  341. if (u < v) {
  342. separation.matrix[u, v] <- separation.matrix[v,
  343. u] <- min(suv)
  344. between.dist1 <- c(between.dist1, suv)
  345. }
  346. }
  347. }
  348. }
  349. nwithin1 <- length(within.dist1)
  350. nbetween1 <- length(between.dist1)
  351. meanwithin1 <- mean(within.dist1)
  352. meanbetween1 <- mean(between.dist1)
  353. s.plus <- s.moins <- 0
  354. for (k in 1:nwithin1) {
  355. s.plus <- s.plus + (colSums(outer(between.dist1,
  356. within.dist1[k], ">")))
  357. s.moins <- s.moins + (colSums(outer(between.dist1,
  358. within.dist1[k], "<")))
  359. }
  360. Index.Gamma <- (s.plus - s.moins)/(s.plus + s.moins)
  361. Index.Gplus <- (2 * s.moins)/(n1 * (n1 - 1))
  362. t.tau <- (nwithin1 * nbetween1) - (s.plus + s.moins)
  363. Index.Tau <- (s.plus - s.moins)/(((n1 * (n1 - 1)/2 -
  364. t.tau) * (n1 * (n1 - 1)/2))^(1/2))
  365. results <- list(gamma = Index.Gamma, gplus = Index.Gplus,
  366. tau = Index.Tau)
  367. return(results)
  368. }
  369. Index.15and28 <- function(cl1, cl2, md) {
  370. cn1 <- max(cl1)
  371. n1 <- length(cl1)
  372. dmat <- as.matrix(md)
  373. average.distance <- median.distance <- separation <- cluster.size <- within.dist1 <- between.dist1 <- numeric(0)
  374. separation.matrix <- matrix(0, ncol = cn1, nrow = cn1)
  375. di <- list()
  376. for (u in 1:cn1) {
  377. cluster.size[u] <- sum(cl1 == u)
  378. du <- as.dist(dmat[cl1 == u, cl1 == u])
  379. within.dist1 <- c(within.dist1, du)
  380. for (v in 1:cn1) {
  381. if (v != u) {
  382. suv <- dmat[cl1 == u, cl1 == v]
  383. if (u < v) {
  384. separation.matrix[u, v] <- separation.matrix[v,
  385. u] <- min(suv)
  386. between.dist1 <- c(between.dist1, suv)
  387. }
  388. }
  389. }
  390. }
  391. cn2 <- max(cl2)
  392. n2 <- length(cl2)
  393. dmat <- as.matrix(md)
  394. average.distance <- median.distance <- separation <- cluster.size <- within.dist2 <- between.dist2 <- numeric(0)
  395. separation.matrix <- matrix(0, ncol = cn2, nrow = cn2)
  396. di <- list()
  397. for (w in 1:cn2) {
  398. cluster.size[w] <- sum(cl2 == w)
  399. dw <- as.dist(dmat[cl2 == w, cl2 == w])
  400. within.dist2 <- c(within.dist2, dw)
  401. bx <- numeric(0)
  402. for (x in 1:cn2) {
  403. if (x != w) {
  404. swx <- dmat[cl2 == w, cl2 == x]
  405. bx <- c(bx, swx)
  406. if (w < x) {
  407. separation.matrix[w, x] <- separation.matrix[x,
  408. w] <- min(swx)
  409. between.dist2 <- c(between.dist2, swx)
  410. }
  411. }
  412. }
  413. }
  414. nwithin1 <- length(within.dist1)
  415. nbetween1 <- length(between.dist1)
  416. meanwithin1 <- mean(within.dist1)
  417. meanbetween1 <- mean(between.dist1)
  418. meanwithin2 <- mean(within.dist2)
  419. meanbetween2 <- mean(between.dist2)
  420. Index.15 <- (meanbetween2 - meanbetween1)/(meanwithin2 -
  421. meanwithin1)
  422. Index.28 <- (meanwithin1/nwithin1)/(meanbetween1/nbetween1)
  423. results <- list(frey = Index.15, mcclain = Index.28)
  424. return(results)
  425. }
  426. Indice.ptbiserial <- function(x, md, cl1) {
  427. nn <- dim(x)[1]
  428. pp <- dim(x)[2]
  429. md2 <- as.matrix(md)
  430. m01 <- array(NA, c(nn, nn))
  431. nbr <- (nn * (nn - 1))/2
  432. pb <- array(0, c(nbr, 2))
  433. m3 <- 1
  434. for (m1 in 2:nn) {
  435. m12 <- m1 - 1
  436. for (m2 in 1:m12) {
  437. if (cl1[m1] == cl1[m2])
  438. m01[m1, m2] <- 0
  439. if (cl1[m1] != cl1[m2])
  440. m01[m1, m2] <- 1
  441. pb[m3, 1] <- m01[m1, m2]
  442. pb[m3, 2] <- md2[m1, m2]
  443. m3 <- m3 + 1
  444. }
  445. }
  446. y <- pb[, 1]
  447. x <- pb[, 2]
  448. biserial.cor <- function(x, y, use = c("all.obs", "complete.obs"),
  449. level = 1) {
  450. if (!is.numeric(x))
  451. stop("'x' must be a numeric variable.\n")
  452. y <- as.factor(y)
  453. if (length(levs <- levels(y)) > 2)
  454. stop("'y' must be a dichotomous variable.\n")
  455. if (length(x) != length(y))
  456. stop("'x' and 'y' do not have the same length")
  457. use <- match.arg(use)
  458. if (use == "complete.obs") {
  459. cc.ind <- complete.cases(x, y)
  460. x <- x[cc.ind]
  461. y <- y[cc.ind]
  462. }
  463. ind <- y == levs[level]
  464. diff.mu <- mean(x[ind]) - mean(x[!ind])
  465. prob <- mean(ind)
  466. diff.mu * sqrt(prob * (1 - prob))/sd(x)
  467. }
  468. ptbiserial <- biserial.cor(x = pb[, 2], y = pb[, 1],
  469. level = 2)
  470. return(ptbiserial)
  471. }
  472. Indices.WKWL <- function(x, cl1 = cl1, cl2 = cl2) {
  473. dim2 <- dim(x)[2]
  474. wss <- function(x) {
  475. x <- as.matrix(x)
  476. n <- length(x)
  477. centers <- matrix(nrow = 1, ncol = ncol(x))
  478. if (ncol(x) == 1) {
  479. centers[1, ] <- mean(x)
  480. }
  481. if (is.null(dim(x))) {
  482. bb <- matrix(x, byrow = FALSE, nrow = 1, ncol = ncol(x))
  483. centers[1, ] <- apply(bb, 2, mean)
  484. }else {
  485. centers[1, ] <- apply(x, 2, mean)
  486. }
  487. x.2 <- sweep(x, 2, centers[1, ], "-")
  488. withins <- sum(x.2^2)
  489. wss <- sum(withins)
  490. return(wss)
  491. }
  492. ncg1 <- 1
  493. ncg1max <- max(cl1)
  494. while ((sum(cl1 == ncg1) == sum(cl2 == ncg1)) && ncg1 <=
  495. ncg1max) {
  496. ncg1 <- ncg1 + 1
  497. }
  498. g1 <- ncg1
  499. ncg2 <- max(cl2)
  500. nc2g2 <- ncg2 - 1
  501. while ((sum(cl1 == nc2g2) == sum(cl2 == ncg2)) && nc2g2 >=
  502. 1) {
  503. ncg2 <- ncg2 - 1
  504. nc2g2 <- nc2g2 - 1
  505. }
  506. g2 <- ncg2
  507. NK <- sum(cl2 == g1)
  508. WK.x <- x[cl2 == g1, ]
  509. WK <- wss(x = WK.x)
  510. NL <- sum(cl2 == g2)
  511. WL.x <- x[cl2 == g2, ]
  512. WL <- wss(x = WL.x)
  513. NM <- sum(cl1 == g1)
  514. WM.x <- x[cl1 == g1, ]
  515. WM <- wss(x = WM.x)
  516. duda <- (WK + WL)/WM
  517. BKL <- WM - WK - WL
  518. pseudot2 <- BKL/((WK + WL)/(NK + NL - 2))
  519. beale <- (BKL/(WK + WL))/(((NM - 1)/(NM - 2)) * (2^(2/dim2) -
  520. 1))
  521. results <- list(duda = duda, pseudot2 = pseudot2, NM = NM,
  522. NK = NK, NL = NL, beale = beale)
  523. return(results)
  524. }
  525. Indices.WBT <- function(x, cl, P, s, vv) {
  526. n <- dim(x)[1]
  527. pp <- dim(x)[2]
  528. qq <- max(cl)
  529. z <- matrix(0, ncol = qq, nrow = n)
  530. clX <- as.matrix(cl)
  531. for (i in 1:n) for (j in 1:qq) {
  532. z[i, j] == 0
  533. if (clX[i, 1] == j) {
  534. z[i, j] = 1
  535. }
  536. }
  537. xbar <- solve(t(z) %*% z) %*% t(z) %*% x
  538. B <- t(xbar) %*% t(z) %*% z %*% xbar
  539. W <- P - B
  540. marriot <- (qq^2) * det(W)
  541. trcovw <- sum(diag(cov(W)))
  542. tracew <- sum(diag(W))
  543. if (det(W) != 0)
  544. scott <- n * log(det(P)/det(W))
  545. else {
  546. cat("Error: division by zero!")
  547. }
  548. friedman <- sum(diag(solve(W) * B))
  549. rubin <- sum(diag(P))/sum(diag(W))
  550. R2 <- 1 - sum(diag(W))/sum(diag(P))
  551. v1 <- 1
  552. u <- rep(0, pp)
  553. c <- (vv/(qq))^(1/pp)
  554. u <- s/c
  555. k1 <- sum((u >= 1) == TRUE)
  556. p1 <- min(k1, qq - 1)
  557. if (all(p1 > 0, p1 < pp)) {
  558. for (i in 1:p1) v1 <- v1 * s[i]
  559. c <- (v1/(qq))^(1/p1)
  560. u <- s/c
  561. b1 <- sum(1/(n + u[1:p1]))
  562. b2 <- sum(u[p1 + 1:pp]^2/(n + u[p1 + 1:pp]), na.rm = TRUE)
  563. E_R2 <- 1 - ((b1 + b2)/sum(u^2)) * ((n - qq)^2/n) *
  564. (1 + 4/n)
  565. ccc <- log((1 - E_R2)/(1 - R2)) * (sqrt(n * p1/2)/((0.001 +
  566. E_R2)^1.2))
  567. }else {
  568. b1 <- sum(1/(n + u))
  569. E_R2 <- 1 - (b1/sum(u^2)) * ((n - qq)^2/n) * (1 +
  570. 4/n)
  571. ccc <- log((1 - E_R2)/(1 - R2)) * (sqrt(n * pp/2)/((0.001 +
  572. E_R2)^1.2))
  573. }
  574. results <- list(ccc = ccc, scott = scott, marriot = marriot,
  575. trcovw = trcovw, tracew = tracew, friedman = friedman,
  576. rubin = rubin)
  577. return(results)
  578. }
  579. Indices.Traces <- function(data, d, clall, index = "all") {
  580. x <- data
  581. cl0 <- clall[, 1]
  582. cl1 <- clall[, 2]
  583. cl2 <- clall[, 3]
  584. clall <- clall
  585. nb.cl0 <- table(cl0)
  586. nb.cl1 <- table(cl1)
  587. nb.cl2 <- table(cl2)
  588. nb1.cl0 <- sum(nb.cl0 == 1)
  589. nb1.cl1 <- sum(nb.cl1 == 1)
  590. nb1.cl2 <- sum(nb.cl2 == 1)
  591. gss <- function(x, cl, d) {
  592. n <- length(cl)
  593. k <- max(cl)
  594. centers <- matrix(nrow = k, ncol = ncol(x))
  595. for (i in 1:k) {
  596. if (ncol(x) == 1) {
  597. centers[i, ] <- mean(x[cl == i, ])
  598. }
  599. if (is.null(dim(x[cl == i, ]))) {
  600. bb <- matrix(x[cl == i, ], byrow = FALSE,
  601. nrow = 1, ncol = ncol(x))
  602. centers[i, ] <- apply(bb, 2, mean)
  603. }else {
  604. centers[i, ] <- apply(x[cl == i, ], 2, mean)
  605. }
  606. }
  607. allmean <- apply(x, 2, mean)
  608. dmean <- sweep(x, 2, allmean, "-")
  609. allmeandist <- sum(dmean^2)
  610. withins <- rep(0, k)
  611. x.2 <- (x - centers[cl, ])^2
  612. for (i in 1:k) {
  613. withins[i] <- sum(x.2[cl == i, ])
  614. }
  615. wgss <- sum(withins)
  616. bgss <- allmeandist - wgss
  617. results <- list(wgss = wgss, bgss = bgss, centers = centers)
  618. return(results)
  619. }
  620. vargss <- function(x, clsize, varwithins) {
  621. nvar <- dim(x)[2]
  622. n <- sum(clsize)
  623. k <- length(clsize)
  624. varallmean <- rep(0, nvar)
  625. varallmeandist <- rep(0, nvar)
  626. varwgss <- rep(0, nvar)
  627. for (l in 1:nvar) varallmean[l] <- mean(x[, l])
  628. vardmean <- sweep(x, 2, varallmean, "-")
  629. for (l in 1:nvar) {
  630. varallmeandist[l] <- sum((vardmean[, l])^2)
  631. varwgss[l] <- sum(varwithins[, l])
  632. }
  633. varbgss <- varallmeandist - varwgss
  634. vartss <- varbgss + varwgss
  635. zvargss <- list(vartss = vartss, varbgss = varbgss)
  636. return(zvargss)
  637. }
  638. varwithinss <- function(x, centers, cluster) {
  639. nrow <- dim(centers)[1]
  640. nvar <- dim(x)[2]
  641. varwithins <- matrix(0, nrow, nvar)
  642. x <- (x - centers[cluster, ])^2
  643. for (l in 1:nvar) {
  644. for (k in 1:nrow) {
  645. varwithins[k, l] <- sum(x[cluster == k, l])
  646. }
  647. }
  648. return(varwithins)
  649. }
  650. indice.kl <- function(x, clall, d = NULL, centrotypes = "centroids") {
  651. if (nb1.cl1 > 0) {
  652. KL <- NA
  653. }
  654. if (sum(c("centroids", "medoids") == centrotypes) ==
  655. 0)
  656. stop("Wrong centrotypes argument")
  657. if ("medoids" == centrotypes && is.null(d))
  658. stop("For argument centrotypes = 'medoids' d cannot be null")
  659. if (!is.null(d)) {
  660. if (!is.matrix(d)) {
  661. d <- as.matrix(d)
  662. }
  663. row.names(d) <- row.names(x)
  664. }
  665. m <- ncol(x)
  666. g <- k <- max(clall[, 2])
  667. KL <- abs((g - 1)^(2/m) * gss(x, clall[, 1], d)$wgss -
  668. g^(2/m) * gss(x, clall[, 2], d)$wgss)/abs((g)^(2/m) *
  669. gss(x, clall[, 2], d)$wgss - (g + 1)^(2/m) *
  670. gss(x, clall[, 3], d)$wgss)
  671. return(KL)
  672. }
  673. indice.ch <- function(x, cl, d = NULL, centrotypes = "centroids") {
  674. if (nb1.cl1 > 0) {
  675. CH <- NA
  676. }
  677. if (sum(c("centroids", "medoids") == centrotypes) ==
  678. 0)
  679. stop("Wrong centrotypes argument")
  680. if ("medoids" == centrotypes && is.null(d))
  681. stop("For argument centrotypes = 'medoids' d cannot be null")
  682. if (!is.null(d)) {
  683. if (!is.matrix(d)) {
  684. d <- as.matrix(d)
  685. }
  686. row.names(d) <- row.names(x)
  687. }
  688. n <- length(cl)
  689. k <- max(cl)
  690. CH <- (gss(x, cl, d)$bgss/(k - 1))/(gss(x, cl, d)$wgss/(n -
  691. k))
  692. return(CH)
  693. }
  694. indice.hart <- function(x, clall, d = NULL, centrotypes = "centroids") {
  695. if (sum(c("centroids", "medoids") == centrotypes) ==
  696. 0)
  697. stop("Wrong centrotypes argument")
  698. if ("medoids" == centrotypes && is.null(d))
  699. stop("For argument centrotypes = 'medoids' d cannot be null")
  700. if (!is.null(d)) {
  701. if (!is.matrix(d)) {
  702. d <- as.matrix(d)
  703. }
  704. row.names(d) <- row.names(x)
  705. }
  706. n <- nrow(x)
  707. g <- max(clall[, 1])
  708. HART <- (gss(x, clall[, 2], d)$wgss/gss(x, clall[,
  709. 3], d)$wgss - 1) * (n - g - 1)
  710. return(HART)
  711. }
  712. indice.ball <- function(x, cl, d = NULL, centrotypes = "centroids") {
  713. wgssB <- gss(x, cl, d)$wgss
  714. qq <- max(cl)
  715. ball <- wgssB/qq
  716. return(ball)
  717. }
  718. indice.ratkowsky <- function(x, cl, d, centrotypes = "centroids") {
  719. qq <- max(cl)
  720. clsize <- table(cl)
  721. centers <- gss(x, cl, d)$centers
  722. varwithins <- varwithinss(x, centers, cl)
  723. zvargss <- vargss(x, clsize, varwithins)
  724. ratio <- mean(sqrt(zvargss$varbgss/zvargss$vartss))
  725. ratkowsky <- ratio/sqrt(qq)
  726. return(ratkowsky)
  727. }
  728. indice <- pmatch(index, c("kl", "ch", "hart", "ratkowsky",
  729. "ball", "all"))
  730. if (is.na(indice))
  731. stop("invalid clustering index")
  732. if (indice == -1)
  733. stop("ambiguous index")
  734. vecallindex <- numeric(5)
  735. if (any(indice == 1) || (indice == 6))
  736. vecallindex[1] <- indice.kl(x, clall, d)
  737. if (any(indice == 2) || (indice == 6))
  738. vecallindex[2] <- indice.ch(x, cl = clall[, 2],
  739. d)
  740. if (any(indice == 3) || (indice == 6))
  741. vecallindex[3] <- indice.hart(x, clall, d)
  742. if (any(indice == 4) || (indice == 6))
  743. vecallindex[4] <- indice.ratkowsky(x, cl = cl1,
  744. d)
  745. if (any(indice == 5) || (indice == 6))
  746. vecallindex[5] <- indice.ball(x, cl = cl1, d)
  747. names(vecallindex) <- c("kl", "ch", "hart", "ratkowsky",
  748. "ball")
  749. if (indice < 6)
  750. vecallindex <- vecallindex[indice]
  751. return(vecallindex)
  752. }
  753. Indice.cindex <- function(d, cl) {
  754. d <- data.matrix(d)
  755. DU <- 0
  756. r <- 0
  757. v_max <- array(1, max(cl))
  758. v_min <- array(1, max(cl))
  759. for (i in 1:max(cl)) {
  760. n <- sum(cl == i)
  761. if (n > 1) {
  762. t <- d[cl == i, cl == i]
  763. DU = DU + sum(t)/2
  764. v_max[i] = max(t)
  765. if (sum(t == 0) == n)
  766. v_min[i] <- min(t[t != 0])
  767. else v_min[i] <- 0
  768. r <- r + n * (n - 1)/2
  769. }
  770. }
  771. Dmin = min(v_min)
  772. Dmax = max(v_max)
  773. if (Dmin == Dmax)
  774. result <- NA
  775. else result <- (DU - r * Dmin)/(Dmax * r - Dmin * r)
  776. result
  777. }
  778. Indice.DB <- function(x, cl, d = NULL, centrotypes = "centroids",
  779. p = 2, q = 2) {
  780. if (sum(c("centroids") == centrotypes) == 0)
  781. stop("Wrong centrotypes argument")
  782. if (!is.null(d)) {
  783. if (!is.matrix(d)) {
  784. d <- as.matrix(d)
  785. }
  786. row.names(d) <- row.names(x)
  787. }
  788. if (is.null(dim(x))) {
  789. dim(x) <- c(length(x), 1)
  790. }
  791. x <- as.matrix(x)
  792. n <- length(cl)
  793. k <- max(cl)
  794. dAm <- d
  795. centers <- matrix(nrow = k, ncol = ncol(x))
  796. if (centrotypes == "centroids") {
  797. for (i in 1:k) {
  798. for (j in 1:ncol(x)) {
  799. centers[i, j] <- mean(x[cl == i, j])
  800. }
  801. }
  802. }else {
  803. stop("wrong centrotypes argument")
  804. }
  805. S <- rep(0, k)
  806. for (i in 1:k) {
  807. ind <- (cl == i)
  808. if (sum(ind) > 1) {
  809. centerI <- centers[i, ]
  810. centerI <- rep(centerI, sum(ind))
  811. centerI <- matrix(centerI, nrow = sum(ind),
  812. ncol = ncol(x), byrow = TRUE)
  813. S[i] <- mean(sqrt(apply((x[ind, ] - centerI)^2,
  814. 1, sum))^q)^(1/q)
  815. }else S[i] <- 0
  816. }
  817. M <- as.matrix(dist(centers, p = p))
  818. R <- array(Inf, c(k, k))
  819. r = rep(0, k)
  820. for (i in 1:k) {
  821. for (j in 1:k) {
  822. R[i, j] = (S[i] + S[j])/M[i, j]
  823. }
  824. r[i] = max(R[i, ][is.finite(R[i, ])])
  825. }
  826. DB = mean(r[is.finite(r)])
  827. resul <- list(DB = DB, r = r, R = R, d = M, S = S, centers = centers)
  828. resul
  829. }
  830. Indice.S <- function(d, cl) {
  831. d <- as.matrix(d)
  832. Si <- 0
  833. for (k in 1:max(cl)) {
  834. if ((sum(cl == k)) <= 1)
  835. Sil <- 1
  836. else {
  837. Sil <- 0
  838. for (i in 1:length(cl)) {
  839. if (cl[i] == k) {
  840. ai <- sum(d[i, cl == k])/(sum(cl == k) -
  841. 1)
  842. dips <- NULL
  843. for (j in 1:max(cl)) if (cl[i] != j)
  844. if (sum(cl == j) != 1)
  845. dips <- cbind(dips, c((sum(d[i, cl ==
  846. j]))/(sum(cl == j))))
  847. else dips <- cbind(dips, c((sum(d[i, cl ==
  848. j]))))
  849. bi <- min(dips)
  850. Sil <- Sil + (bi - ai)/max(c(ai, bi))
  851. }
  852. }
  853. }
  854. Si <- Si + Sil
  855. }
  856. Si/length(cl)
  857. }
  858. Indice.Gap <- function(x, clall, reference.distribution = "unif",
  859. B = 10, method = "ward.D2", d = NULL, centrotypes = "centroids") {
  860. GAP <- function(X, cl, referenceDistribution, B, method,
  861. d, centrotypes) {
  862. set.seed(1)
  863. simgap <- function(Xvec) {
  864. ma <- max(Xvec)
  865. mi <- min(Xvec)
  866. set.seed(1)
  867. Xout <- runif(length(Xvec), min = mi, max = ma)
  868. return(Xout)
  869. }
  870. pcsim <- function(X, d, centrotypes) {
  871. if (centrotypes == "centroids") {
  872. Xmm <- apply(X, 2, mean)
  873. }
  874. for (k in (1:dim(X)[2])) {
  875. X[, k] <- X[, k] - Xmm[k]
  876. }
  877. ss <- svd(X)
  878. Xs <- X %*% ss$v
  879. Xnew <- apply(Xs, 2, simgap)
  880. Xt <- Xnew %*% t(ss$v)
  881. for (k in (1:dim(X)[2])) {
  882. Xt[, k] <- Xt[, k] + Xmm[k]
  883. }
  884. return(Xt)
  885. }
  886. if (is.null(dim(x))) {
  887. dim(x) <- c(length(x), 1)
  888. }
  889. ClassNr <- max(cl)
  890. Wk0 <- 0
  891. WkB <- matrix(0, 1, B)
  892. for (bb in (1:B)) {
  893. if (reference.distribution == "unif")
  894. Xnew <- apply(X, 2, simgap)
  895. else if (reference.distribution == "pc")
  896. Xnew <- pcsim(X, d, centrotypes)
  897. else stop("Wrong reference distribution type")
  898. if (bb == 1) {
  899. pp <- cl
  900. if (ClassNr == length(cl))
  901. pp2 <- 1:ClassNr
  902. else if (method == "k-means") {
  903. set.seed(1)
  904. pp2 <- kmeans(Xnew, ClassNr, 100)$cluster
  905. }else if (method == "single" || method == "complete" ||
  906. method == "average" || method == "ward.D2" ||
  907. method == "mcquitty" || method == "median" ||
  908. method == "centroid" || method == "ward.D")
  909. pp2 <- cutree(hclust(dist(Xnew), method = method),
  910. ClassNr)
  911. else stop("Wrong clustering method")
  912. if (ClassNr > 1) {
  913. for (zz in (1:ClassNr)) {
  914. Xuse <- X[pp == zz, ]
  915. Wk0 <- Wk0 + sum(diag(var(Xuse))) * (length(pp[pp ==
  916. zz]) - 1)/(dim(X)[1] - ClassNr)
  917. Xuse2 <- Xnew[pp2 == zz, ]
  918. WkB[1, bb] <- WkB[1, bb] + sum(diag(var(Xuse2))) *
  919. (length(pp2[pp2 == zz]) - 1)/(dim(X)[1] -
  920. ClassNr)
  921. }
  922. }
  923. if (ClassNr == 1) {
  924. Wk0 <- sum(diag(var(X)))
  925. WkB[1, bb] <- sum(diag(var(Xnew)))
  926. }
  927. }
  928. if (bb > 1) {
  929. if (ClassNr == length(cl))
  930. pp2 <- 1:ClassNr
  931. else if (method == "k-means") {
  932. set.seed(1)
  933. pp2 <- kmeans(Xnew, ClassNr, 100)$cluster
  934. }else if (method == "single" || method == "complete" ||
  935. method == "average" || method == "ward.D2" ||
  936. method == "mcquitty" || method == "median" ||
  937. method == "centroid" || method == "ward.D")
  938. pp2 <- cutree(hclust(dist(Xnew), method = method),
  939. ClassNr)
  940. else stop("Wrong clustering method")
  941. if (ClassNr > 1) {
  942. for (zz in (1:ClassNr)) {
  943. Xuse2 <- Xnew[pp2 == zz, ]
  944. WkB[1, bb] <- WkB[1, bb] + sum(diag(var(Xuse2))) *
  945. length(pp2[pp2 == zz])/(dim(X)[1] -
  946. ClassNr)
  947. }
  948. }
  949. if (ClassNr == 1) {
  950. WkB[1, bb] <- sum(diag(var(Xnew)))
  951. }
  952. }
  953. }
  954. Sgap <- mean(log(WkB[1, ])) - log(Wk0)
  955. Sdgap <- sqrt(1 + 1/B) * sqrt(var(log(WkB[1, ]))) *
  956. sqrt((B - 1)/B)
  957. resul <- list(Sgap = Sgap, Sdgap = Sdgap)
  958. resul
  959. }
  960. if (sum(c("centroids", "medoids") == centrotypes) ==
  961. 0)
  962. stop("Wrong centrotypes argument")
  963. if ("medoids" == centrotypes && is.null(d))
  964. stop("For argument centrotypes = 'medoids' d can not be null")
  965. if (!is.null(d)) {
  966. if (!is.matrix(d)) {
  967. d <- as.matrix(d)
  968. }
  969. row.names(d) <- row.names(x)
  970. }
  971. X <- as.matrix(x)
  972. gap1 <- GAP(X, clall[, 1], reference.distribution, B,
  973. method, d, centrotypes)
  974. gap <- gap1$Sgap
  975. gap2 <- GAP(X, clall[, 2], reference.distribution, B,
  976. method, d, centrotypes)
  977. diffu <- gap - (gap2$Sgap - gap2$Sdgap)
  978. resul <- list(gap = gap, diffu = diffu)
  979. resul
  980. }
  981. Index.sdindex <- function(x, clmax, cl) {
  982. x <- as.matrix(x)
  983. Alpha <- Dis(clmax, x)
  984. Scatt <- Average.scattering(cl, x)$scatt
  985. Dis0 <- Dis(cl, x)
  986. SD.indice <- Alpha * Scatt + Dis0
  987. return(SD.indice)
  988. }
  989. Index.SDbw <- function(x, cl) {
  990. x <- as.matrix(x)
  991. Scatt <- Average.scattering(cl, x)$scatt
  992. Dens.bw <- density.bw(cl, x)
  993. SDbw <- Scatt + Dens.bw
  994. return(SDbw)
  995. }
  996. Index.Dindex <- function(cl, x) {
  997. x <- as.matrix(x)
  998. distance <- density.clusters(cl, x)$distance
  999. n <- length(distance)
  1000. S <- 0
  1001. for (i in 1:n) S <- S + distance[i]
  1002. inertieIntra <- S/n
  1003. return(inertieIntra)
  1004. }
  1005. Index.dunn <- function(md, clusters, Data = NULL, method = "euclidean") {
  1006. distance <- as.matrix(md)
  1007. nc <- max(clusters)
  1008. interClust <- matrix(NA, nc, nc)
  1009. intraClust <- rep(NA, nc)
  1010. for (i in 1:nc) {
  1011. c1 <- which(clusters == i)
  1012. for (j in i:nc) {
  1013. if (j == i)
  1014. intraClust[i] <- max(distance[c1, c1])
  1015. if (j > i) {
  1016. c2 <- which(clusters == j)
  1017. interClust[i, j] <- min(distance[c1, c2])
  1018. }
  1019. }
  1020. }
  1021. dunn <- min(interClust, na.rm = TRUE)/max(intraClust)
  1022. return(dunn)
  1023. }
  1024. for (nc in min_nc:max_nc) {
  1025. if (any(method == 1) || (method == 2) || (method ==
  1026. 3) || (method == 4) || (method == 5) || (method ==
  1027. 6) || (method == 7) || (method == 9)) {
  1028. cl1 <- cutree(hc, k = nc)
  1029. cl2 <- cutree(hc, k = nc + 1)
  1030. clall <- cbind(cl1, cl2)
  1031. clmax <- cutree(hc, k = max_nc)
  1032. if (nc >= 2) {
  1033. cl0 <- cutree(hc, k = nc - 1)
  1034. clall1 <- cbind(cl0, cl1, cl2)
  1035. }
  1036. if (nc == 1) {
  1037. cl0 <- rep(NA, nn)
  1038. clall1 <- cbind(cl0, cl1, cl2)
  1039. }
  1040. }
  1041. if (method == 8) {
  1042. set.seed(1)
  1043. cl2 <- kmeans(jeu, nc + 1)$cluster
  1044. set.seed(1)
  1045. clmax <- kmeans(jeu, max_nc)$cluster
  1046. if (nc > 2) {
  1047. set.seed(1)
  1048. cl1 <- kmeans(jeu, nc)$cluster
  1049. clall <- cbind(cl1, cl2)
  1050. set.seed(1)
  1051. cl0 <- kmeans(jeu, nc - 1)$cluster
  1052. clall1 <- cbind(cl0, cl1, cl2)
  1053. }
  1054. if (nc == 2) {
  1055. set.seed(1)
  1056. cl1 <- kmeans(jeu, nc)$cluster
  1057. clall <- cbind(cl1, cl2)
  1058. cl0 <- rep(1, nn)
  1059. clall1 <- cbind(cl0, cl1, cl2)
  1060. }
  1061. if (nc == 1) {
  1062. stop("Number of clusters must be higher than 2")
  1063. }
  1064. }
  1065. j <- table(cl1)
  1066. s <- sum(j == 1)
  1067. j2 <- table(cl2)
  1068. s2 <- sum(j2 == 1)
  1069. if (any(indice == 3) || (indice == 31) || (indice ==
  1070. 32)) {
  1071. res[nc - min_nc + 1, 3] <- Indices.Traces(jeu, md,
  1072. clall1, index = "hart")
  1073. }
  1074. if (any(indice == 4) || (indice == 31) || (indice ==
  1075. 32)) {
  1076. res[nc - min_nc + 1, 4] <- Indices.WBT(x = jeu,
  1077. cl = cl1, P = TT, s = ss, vv = vv)$ccc
  1078. }
  1079. if (any(indice == 5) || (indice == 31) || (indice ==
  1080. 32)) {
  1081. res[nc - min_nc + 1, 5] <- Indices.WBT(x = jeu,
  1082. cl = cl1, P = TT, s = ss, vv = vv)$scott
  1083. }
  1084. if (any(indice == 6) || (indice == 31) || (indice ==
  1085. 32)) {
  1086. res[nc - min_nc + 1, 6] <- Indices.WBT(x = jeu,
  1087. cl = cl1, P = TT, s = ss, vv = vv)$marriot
  1088. }
  1089. if (any(indice == 7) || (indice == 31) || (indice ==
  1090. 32)) {
  1091. res[nc - min_nc + 1, 7] <- Indices.WBT(x = jeu,
  1092. cl = cl1, P = TT, s = ss, vv = vv)$trcovw
  1093. }
  1094. if (any(indice == 8) || (indice == 31) || (indice ==
  1095. 32)) {
  1096. res[nc - min_nc + 1, 8] <- Indices.WBT(x = jeu,
  1097. cl = cl1, P = TT, s = ss, vv = vv)$tracew
  1098. }
  1099. if (any(indice == 9) || (indice == 31) || (indice ==
  1100. 32)) {
  1101. res[nc - min_nc + 1, 9] <- Indices.WBT(x = jeu,
  1102. cl = cl1, P = TT, s = ss, vv = vv)$friedman
  1103. }
  1104. if (any(indice == 10) || (indice == 31) || (indice ==
  1105. 32)) {
  1106. res[nc - min_nc + 1, 10] <- Indices.WBT(x = jeu,
  1107. cl = cl1, P = TT, s = ss, vv = vv)$rubin
  1108. }
  1109. if (any(indice == 14) || (indice == 31) || (indice ==
  1110. 32)) {
  1111. res[nc - min_nc + 1, 14] <- Indices.WKWL(x = jeu,
  1112. cl1 = cl1, cl2 = cl2)$duda
  1113. }
  1114. if (any(indice == 15) || (indice == 31) || (indice ==
  1115. 32)) {
  1116. res[nc - min_nc + 1, 15] <- Indices.WKWL(x = jeu,
  1117. cl1 = cl1, cl2 = cl2)$pseudot2
  1118. }
  1119. if (any(indice == 16) || (indice == 31) || (indice ==
  1120. 32)) {
  1121. res[nc - min_nc + 1, 16] <- beale <- Indices.WKWL(x = jeu,
  1122. cl1 = cl1, cl2 = cl2)$beale
  1123. }
  1124. if (any(indice == 14) || (indice == 15) || (indice ==
  1125. 16) || (indice == 31) || (indice == 32)) {
  1126. NM <- Indices.WKWL(x = jeu, cl1 = cl1, cl2 = cl2)$NM
  1127. NK <- Indices.WKWL(x = jeu, cl1 = cl1, cl2 = cl2)$NK
  1128. NL <- Indices.WKWL(x = jeu, cl1 = cl1, cl2 = cl2)$NL
  1129. zz <- 3.2
  1130. zzz <- zz * sqrt(2 * (1 - 8/((pi^2) * pp))/(NM *
  1131. pp))
  1132. if (any(indice == 14) || (indice == 31) || (indice ==
  1133. 32)) {
  1134. resCritical[nc - min_nc + 1, 1] <- critValue <- 1 -
  1135. (2/(pi * pp)) - zzz
  1136. }
  1137. if ((indice == 15) || (indice == 31) || (indice ==
  1138. 32)) {
  1139. critValue <- 1 - (2/(pi * pp)) - zzz
  1140. resCritical[nc - min_nc + 1, 2] <- ((1 - critValue)/critValue) *
  1141. (NK + NL - 2)
  1142. }
  1143. if (any(indice == 16) || (indice == 31) || (indice ==
  1144. 32)) {
  1145. df2 <- (NM - 2) * pp
  1146. resCritical[nc - min_nc + 1, 3] <- 1 - pf(beale,
  1147. pp, df2)
  1148. }
  1149. }
  1150. if (any(indice == 18) || (indice == 31) || (indice ==
  1151. 32)) {
  1152. res[nc - min_nc + 1, 18] <- Indices.Traces(jeu,
  1153. md, clall1, index = "ball")
  1154. }
  1155. if (any(indice == 19) || (indice == 31) || (indice ==
  1156. 32)) {
  1157. res[nc - min_nc + 1, 19] <- Indice.ptbiserial(x = jeu,
  1158. md = md, cl1 = cl1)
  1159. }
  1160. if (any(indice == 20) || (indice == 32)) {
  1161. if (method == 1) {
  1162. if(exists("Indice.Gap")){
  1163. print("Indice.Gap exists")
  1164. }
  1165. debug(fun = "Indice.Gap")
  1166. resultSGAP <- Indice.Gap(x = jeu, clall = clall,
  1167. reference.distribution = "unif", B = 10, method = "ward.D2",
  1168. d = NULL, centrotypes = "centroids")
  1169. }
  1170. if (method == 2) {
  1171. resultSGAP <- Indice.Gap(x = jeu, clall = clall,
  1172. reference.distribution = "unif", B = 10, method = "single",
  1173. d = NULL, centrotypes = "centroids")
  1174. }
  1175. if (method == 3) {
  1176. resultSGAP <- Indice.Gap(x = jeu, clall = clall,
  1177. reference.distribution = "unif", B = 10, method = "complete",
  1178. d = NULL, centrotypes = "centroids")
  1179. }
  1180. if (method == 4) {
  1181. resultSGAP <- Indice.Gap(x = jeu, clall = clall,
  1182. reference.distribution = "unif", B = 10, method = "average",
  1183. d = NULL, centrotypes = "centroids")
  1184. }
  1185. if (method == 5) {
  1186. resultSGAP <- Indice.Gap(x = jeu, clall = clall,
  1187. reference.distribution = "unif", B = 10, method = "mcquitty",
  1188. d = NULL, centrotypes = "centroids")
  1189. }
  1190. if (method == 6) {
  1191. resultSGAP <- Indice.Gap(x = jeu, clall = clall,
  1192. reference.distribution = "unif", B = 10, method = "median",
  1193. d = NULL, centrotypes = "centroids")
  1194. }
  1195. if (method == 7) {
  1196. resultSGAP <- Indice.Gap(x = jeu, clall = clall,
  1197. reference.distribution = "unif", B = 10, method = "centroid",
  1198. d = NULL, centrotypes = "centroids")
  1199. }
  1200. if (method == 9) {
  1201. resultSGAP <- Indice.Gap(x = jeu, clall = clall,
  1202. reference.distribution = "unif", B = 10, method = "ward.D",
  1203. d = NULL, centrotypes = "centroids")
  1204. }
  1205. if (method == 8) {
  1206. resultSGAP <- Indice.Gap(x = jeu, clall = clall,
  1207. reference.distribution = "unif", B = 10, method = "k-means",
  1208. d = NULL, centrotypes = "centroids")
  1209. }
  1210. res[nc - min_nc + 1, 20] <- resultSGAP$gap
  1211. resCritical[nc - min_nc + 1, 4] <- resultSGAP$diffu
  1212. }
  1213. if (nc >= 2) {
  1214. if (any(indice == 1) || (indice == 31) || (indice ==
  1215. 32)) {
  1216. res[nc - min_nc + 1, 1] <- Indices.Traces(jeu,
  1217. md, clall1, index = "kl")
  1218. }
  1219. if (any(indice == 2) || (indice == 31) || (indice ==
  1220. 32)) {
  1221. res[nc - min_nc + 1, 2] <- Indices.Traces(jeu,
  1222. md, clall1, index = "ch")
  1223. }
  1224. if (any(indice == 11) || (indice == 31) || (indice ==
  1225. 32)) {
  1226. res[nc - min_nc + 1, 11] <- Indice.cindex(d = md,
  1227. cl = cl1)
  1228. }
  1229. if (any(indice == 12) || (indice == 31) || (indice ==
  1230. 32)) {
  1231. res[nc - min_nc + 1, 12] <- Indice.DB(x = jeu,
  1232. cl = cl1, d = NULL, centrotypes = "centroids",
  1233. p = 2, q = 2)$DB
  1234. }
  1235. if (any(indice == 13) || (indice == 31) || (indice ==
  1236. 32)) {
  1237. res[nc - min_nc + 1, 13] <- Indice.S(d = md,
  1238. cl = cl1)
  1239. }
  1240. if (any(indice == 17) || (indice == 31) || (indice ==
  1241. 32)) {
  1242. res[nc - min_nc + 1, 17] <- Indices.Traces(jeu,
  1243. md, clall1, index = "ratkowsky")
  1244. }
  1245. if (any(indice == 21) || (indice == 31) || (indice ==
  1246. 32)) {
  1247. res[nc - min_nc + 1, 21] <- Index.15and28(cl1 = cl1,
  1248. cl2 = cl2, md = md)$frey
  1249. }
  1250. if (any(indice == 22) || (indice == 31) || (indice ==
  1251. 32)) {
  1252. res[nc - min_nc + 1, 22] <- Index.15and28(cl1 = cl1,
  1253. cl2 = cl2, md = md)$mcclain
  1254. }
  1255. if (any(indice == 23) || (indice == 32)) {
  1256. res[nc - min_nc + 1, 23] <- Index.sPlussMoins(cl1 = cl1,
  1257. md = md)$gamma
  1258. }
  1259. if (any(indice == 24) || (indice == 32)) {
  1260. res[nc - min_nc + 1, 24] <- Index.sPlussMoins(cl1 = cl1,
  1261. md = md)$gplus
  1262. }
  1263. if (any(indice == 25) || (indice == 32)) {
  1264. res[nc - min_nc + 1, 25] <- Index.sPlussMoins(cl1 = cl1,
  1265. md = md)$tau
  1266. }
  1267. if (any(indice == 26) || (indice == 31) || (indice ==
  1268. 32)) {
  1269. res[nc - min_nc + 1, 26] <- Index.dunn(md, cl1,
  1270. Data = jeu, method = NULL)
  1271. }
  1272. if (any(indice == 27) || (indice == 31) || (indice ==
  1273. 32)) {
  1274. res[nc - min_nc + 1, 27] <- Index.Hubert(jeu,
  1275. cl1)
  1276. }
  1277. if (any(indice == 28) || (indice == 31) || (indice ==
  1278. 32)) {
  1279. res[nc - min_nc + 1, 28] <- Index.sdindex(jeu,
  1280. clmax, cl1)
  1281. }
  1282. if (any(indice == 29) || (indice == 31) || (indice ==
  1283. 32)) {
  1284. res[nc - min_nc + 1, 29] <- Index.Dindex(cl1,
  1285. jeu)
  1286. }
  1287. if (any(indice == 30) || (indice == 31) || (indice ==
  1288. 32)) {
  1289. res[nc - min_nc + 1, 30] <- Index.SDbw(jeu,
  1290. cl1)
  1291. }
  1292. }else {
  1293. res[nc - min_nc + 1, 1] <- NA
  1294. res[nc - min_nc + 1, 2] <- NA
  1295. res[nc - min_nc + 1, 11] <- NA
  1296. res[nc - min_nc + 1, 12] <- NA
  1297. res[nc - min_nc + 1, 13] <- NA
  1298. res[nc - min_nc + 1, 17] <- NA
  1299. res[nc - min_nc + 1, 21] <- NA
  1300. res[nc - min_nc + 1, 22] <- NA
  1301. res[nc - min_nc + 1, 23] <- NA
  1302. res[nc - min_nc + 1, 24] <- NA
  1303. res[nc - min_nc + 1, 25] <- NA
  1304. res[nc - min_nc + 1, 26] <- NA
  1305. res[nc - min_nc + 1, 27] <- NA
  1306. res[nc - min_nc + 1, 28] <- NA
  1307. res[nc - min_nc + 1, 29] <- NA
  1308. res[nc - min_nc + 1, 30] <- NA
  1309. }
  1310. }
  1311. nc.KL <- indice.KL <- 0
  1312. if (any(indice == 1) || (indice == 31) || (indice == 32)) {
  1313. nc.KL <- (min_nc:max_nc)[which.max(res[, 1])]
  1314. indice.KL <- max(res[, 1], na.rm = TRUE)
  1315. best.nc <- nc.KL
  1316. }
  1317. nc.CH <- indice.CH <- 0
  1318. if (any(indice == 2) || (indice == 31) || (indice == 32)) {
  1319. nc.CH <- (min_nc:max_nc)[which.max(res[, 2])]
  1320. indice.CH <- max(res[, 2], na.rm = TRUE)
  1321. best.nc <- nc.CH
  1322. }
  1323. nc.CCC <- indice.CCC <- 0
  1324. if (any(indice == 4) || (indice == 31) || (indice == 32)) {
  1325. nc.CCC <- (min_nc:max_nc)[which.max(res[, 4])]
  1326. indice.CCC <- max(res[, 4], na.rm = TRUE)
  1327. best.nc <- nc.CCC
  1328. }
  1329. nc.DB <- indice.DB <- 0
  1330. if (any(indice == 12) || (indice == 31) || (indice == 32)) {
  1331. nc.DB <- (min_nc:max_nc)[which.min(res[, 12])]
  1332. indice.DB <- min(res[, 12], na.rm = TRUE)
  1333. best.nc <- nc.DB
  1334. }
  1335. nc.Silhouette <- indice.Silhouette <- 0
  1336. if (any(indice == 13) || (indice == 31) || (indice == 32)) {
  1337. nc.Silhouette <- (min_nc:max_nc)[which.max(res[, 13])]
  1338. indice.Silhouette <- max(res[, 13], na.rm = TRUE)
  1339. best.nc <- nc.Silhouette
  1340. }
  1341. nc.Gap <- indice.Gap <- 0
  1342. if (any(indice == 20) || (indice == 32)) {
  1343. found <- FALSE
  1344. for (ncG in min_nc:max_nc) {
  1345. if ((resCritical[ncG - min_nc + 1, 4] >= 0) && (!found)) {
  1346. ncGap <- ncG
  1347. indiceGap <- res[ncG - min_nc + 1, 20]
  1348. found <- TRUE
  1349. }
  1350. }
  1351. if (found) {
  1352. nc.Gap <- ncGap
  1353. indice.Gap <- indiceGap
  1354. best.nc <- nc.Gap
  1355. }else {
  1356. nc.Gap <- NA
  1357. indice.Gap <- NA
  1358. }
  1359. }
  1360. nc.Duda <- indice.Duda <- 0
  1361. if (any(indice == 14) || (indice == 31) || (indice == 32)) {
  1362. foundDuda <- FALSE
  1363. for (ncD in min_nc:max_nc) {
  1364. if ((res[ncD - min_nc + 1, 14] >= resCritical[ncD -
  1365. min_nc + 1, 1]) && (!foundDuda)) {
  1366. ncDuda <- ncD
  1367. indiceDuda <- res[ncD - min_nc + 1, 14]
  1368. foundDuda <- TRUE
  1369. }
  1370. }
  1371. if (foundDuda) {
  1372. nc.Duda <- ncDuda
  1373. indice.Duda <- indiceDuda
  1374. best.nc <- nc.Duda
  1375. }else {
  1376. nc.Duda <- NA
  1377. indice.Duda <- NA
  1378. }
  1379. }
  1380. nc.Pseudo <- indice.Pseudo <- 0
  1381. if (any(indice == 15) || (indice == 31) || (indice == 32)) {
  1382. foundPseudo <- FALSE
  1383. for (ncP in min_nc:max_nc) {
  1384. if ((res[ncP - min_nc + 1, 15] <= resCritical[ncP -
  1385. min_nc + 1, 2]) && (!foundPseudo)) {
  1386. ncPseudo <- ncP
  1387. indicePseudo <- res[ncP - min_nc + 1, 15]
  1388. foundPseudo <- TRUE
  1389. }
  1390. }
  1391. if (foundPseudo) {
  1392. nc.Pseudo <- ncPseudo
  1393. indice.Pseudo <- indicePseudo
  1394. best.nc <- nc.Pseudo
  1395. }else {
  1396. nc.Pseudo <- NA
  1397. indice.Pseudo <- NA
  1398. }
  1399. }
  1400. nc.Beale <- indice.Beale <- 0
  1401. if (any(indice == 16) || (indice == 31) || (indice == 32)) {
  1402. foundBeale <- FALSE
  1403. for (ncB in min_nc:max_nc) {
  1404. if ((resCritical[ncB - min_nc + 1, 3] >= alphaBeale) &&
  1405. (!foundBeale)) {
  1406. ncBeale <- ncB
  1407. indiceBeale <- res[ncB - min_nc + 1, 16]
  1408. foundBeale <- TRUE
  1409. }
  1410. }
  1411. if (foundBeale) {
  1412. nc.Beale <- ncBeale
  1413. indice.Beale <- indiceBeale
  1414. best.nc <- nc.Beale
  1415. }else {
  1416. nc.Beale <- NA
  1417. indice.Beale <- NA
  1418. }
  1419. }
  1420. nc.ptbiserial <- indice.ptbiserial <- 0
  1421. if (any(indice == 19) || (indice == 31) || (indice == 32)) {
  1422. nc.ptbiserial <- (min_nc:max_nc)[which.max(res[, 19])]
  1423. indice.ptbiserial <- max(res[, 19], na.rm = TRUE)
  1424. best.nc <- nc.ptbiserial
  1425. }
  1426. foundNC <- foundIndice <- numeric(0)
  1427. nc.Frey <- indice.Frey <- 0
  1428. if (any(indice == 21) || (indice == 31) || (indice == 32)) {
  1429. foundFrey <- FALSE
  1430. i <- 1
  1431. for (ncF in min_nc:max_nc) {
  1432. if (res[ncF - min_nc + 1, 21] < 1) {
  1433. ncFrey <- ncF - 1
  1434. indiceFrey <- res[ncF - 1 - min_nc + 1, 21]
  1435. foundFrey <- TRUE
  1436. foundNC[i] <- ncFrey
  1437. foundIndice[i] <- indiceFrey
  1438. i <- i + 1
  1439. }
  1440. }
  1441. if (foundFrey) {
  1442. nc.Frey <- foundNC[1]
  1443. indice.Frey <- foundIndice[1]
  1444. best.nc <- nc.Frey
  1445. }else {
  1446. nc.Frey <- NA
  1447. indice.Frey <- NA
  1448. print(paste("Frey index : No clustering structure in this data set"))
  1449. }
  1450. }
  1451. nc.McClain <- indice.McClain <- 0
  1452. if (any(indice == 22) || (indice == 31) || (indice == 32)) {
  1453. nc.McClain <- (min_nc:max_nc)[which.min(res[, 22])]
  1454. indice.McClain <- min(res[, 22], na.rm = TRUE)
  1455. best.nc <- nc.McClain
  1456. }
  1457. nc.Gamma <- indice.Gamma <- 0
  1458. if (any(indice == 23) || (indice == 31) || (indice == 32)) {
  1459. nc.Gamma <- (min_nc:max_nc)[which.max(res[, 23])]
  1460. indice.Gamma <- max(res[, 23], na.rm = TRUE)
  1461. best.nc <- nc.Gamma
  1462. }
  1463. nc.Gplus <- indice.Gplus <- 0
  1464. if (any(indice == 24) || (indice == 31) || (indice == 32)) {
  1465. nc.Gplus <- (min_nc:max_nc)[which.min(res[, 24])]
  1466. indice.Gplus <- min(res[, 24], na.rm = TRUE)
  1467. best.nc <- nc.Gplus
  1468. }
  1469. nc.Tau <- indice.Tau <- 0
  1470. if (any(indice == 25) || (indice == 31) || (indice == 32)) {
  1471. nc.Tau <- (min_nc:max_nc)[which.max(res[, 25])]
  1472. indice.Tau <- max(res[, 25], na.rm = TRUE)
  1473. best.nc <- nc.Tau
  1474. }
  1475. if ((indice == 3) || (indice == 5) || (indice == 6) || (indice ==
  1476. 7) || (indice == 8) || (indice == 9) || (indice == 10) ||
  1477. (indice == 18) || (indice == 27) || (indice == 11) ||
  1478. (indice == 29) || (indice == 31) || (indice == 32)) {
  1479. DiffLev <- array(0, c(max_nc - min_nc + 1, 12))
  1480. DiffLev[, 1] <- min_nc:max_nc
  1481. for (nc3 in min_nc:max_nc) {
  1482. if (nc3 == min_nc) {
  1483. DiffLev[nc3 - min_nc + 1, 2] <- abs(res[nc3 -
  1484. min_nc + 1, 3] - NA)
  1485. DiffLev[nc3 - min_nc + 1, 3] <- abs(res[nc3 -
  1486. min_nc + 1, 5] - NA)
  1487. DiffLev[nc3 - min_nc + 1, 4] <- abs(res[nc3 -
  1488. min_nc + 1, 6] - NA)
  1489. DiffLev[nc3 - min_nc + 1, 5] <- abs(res[nc3 -
  1490. min_nc + 1, 7] - NA)
  1491. DiffLev[nc3 - min_nc + 1, 6] <- abs(res[nc3 -
  1492. min_nc + 1, 8] - NA)
  1493. DiffLev[nc3 - min_nc + 1, 7] <- abs(res[nc3 -
  1494. min_nc + 1, 9] - NA)
  1495. DiffLev[nc3 - min_nc + 1, 8] <- abs(res[nc3 -
  1496. min_nc + 1, 10] - NA)
  1497. DiffLev[nc3 - min_nc + 1, 9] <- abs(res[nc3 -
  1498. min_nc + 1, 18] - NA)
  1499. DiffLev[nc3 - min_nc + 1, 10] <- abs(res[nc3 -
  1500. min_nc + 1, 27] - NA)
  1501. DiffLev[nc3 - min_nc + 1, 12] <- abs(res[nc3 -
  1502. min_nc + 1, 29] - NA)
  1503. }else {
  1504. if (nc3 == max_nc) {
  1505. DiffLev[nc3 - min_nc + 1, 2] <- abs(res[nc3 -
  1506. min_nc + 1, 3] - res[nc3 - min_nc, 3])
  1507. DiffLev[nc3 - min_nc + 1, 3] <- abs(res[nc3 -
  1508. min_nc + 1, 5] - res[nc3 - min_nc, 5])
  1509. DiffLev[nc3 - min_nc + 1, 4] <- abs(res[nc3 -
  1510. min_nc + 1, 6] - NA)
  1511. DiffLev[nc3 - min_nc + 1, 5] <- abs(res[nc3 -
  1512. min_nc + 1, 7] - res[nc3 - min_nc, 7])
  1513. DiffLev[nc3 - min_nc + 1, 6] <- abs(res[nc3 -
  1514. min_nc + 1, 8] - NA)
  1515. DiffLev[nc3 - min_nc + 1, 7] <- abs(res[nc3 -
  1516. min_nc + 1, 9] - res[nc3 - min_nc, 9])
  1517. DiffLev[nc3 - min_nc + 1, 8] <- abs(res[nc3 -
  1518. min_nc + 1, 10] - NA)
  1519. DiffLev[nc3 - min_nc + 1, 9] <- abs(res[nc3 -
  1520. min_nc + 1, 18] - res[nc3 - min_nc, 18])
  1521. DiffLev[nc3 - min_nc + 1, 10] <- abs(res[nc3 -
  1522. min_nc + 1, 27] - NA)
  1523. DiffLev[nc3 - min_nc + 1, 12] <- abs(res[nc3 -
  1524. min_nc + 1, 29] - NA)
  1525. }else {
  1526. DiffLev[nc3 - min_nc + 1, 2] <- abs(res[nc3 -
  1527. min_nc + 1, 3] - res[nc3 - min_nc, 3])
  1528. DiffLev[nc3 - min_nc + 1, 3] <- abs(res[nc3 -
  1529. min_nc + 1, 5] - res[nc3 - min_nc, 5])
  1530. DiffLev[nc3 - min_nc + 1, 4] <- ((res[nc3 -
  1531. min_nc + 2, 6] - res[nc3 - min_nc + 1, 6]) -
  1532. (res[nc3 - min_nc + 1, 6] - res[nc3 - min_nc,
  1533. 6]))
  1534. DiffLev[nc3 - min_nc + 1, 5] <- abs(res[nc3 -
  1535. min_nc + 1, 7] - res[nc3 - min_nc, 7])
  1536. DiffLev[nc3 - min_nc + 1, 6] <- ((res[nc3 -
  1537. min_nc + 2, 8] - res[nc3 - min_nc + 1, 8]) -
  1538. (res[nc3 - min_nc + 1, 8] - res[nc3 - min_nc,
  1539. 8]))
  1540. DiffLev[nc3 - min_nc + 1, 7] <- abs(res[nc3 -
  1541. min_nc + 1, 9] - res[nc3 - min_nc, 9])
  1542. DiffLev[nc3 - min_nc + 1, 8] <- ((res[nc3 -
  1543. min_nc + 2, 10] - res[nc3 - min_nc + 1,
  1544. 10]) - (res[nc3 - min_nc + 1, 10] - res[nc3 -
  1545. min_nc, 10]))
  1546. DiffLev[nc3 - min_nc + 1, 9] <- abs(res[nc3 -
  1547. min_nc + 1, 18] - res[nc3 - min_nc, 18])
  1548. DiffLev[nc3 - min_nc + 1, 10] <- abs((res[nc3 -
  1549. min_nc + 1, 27] - res[nc3 - min_nc, 27]))
  1550. DiffLev[nc3 - min_nc + 1, 12] <- ((res[nc3 -
  1551. min_nc + 2, 29] - res[nc3 - min_nc + 1,
  1552. 29]) - (res[nc3 - min_nc + 1, 29] - res[nc3 -
  1553. min_nc, 29]))
  1554. }
  1555. }
  1556. }
  1557. }
  1558. nc.Hartigan <- indice.Hartigan <- 0
  1559. if (any(indice == 3) || (indice == 31) || (indice == 32)) {
  1560. nc.Hartigan <- DiffLev[, 1][which.max(DiffLev[, 2])]
  1561. indice.Hartigan <- max(DiffLev[, 2], na.rm = TRUE)
  1562. best.nc <- nc.Hartigan
  1563. }
  1564. nc.Ratkowsky <- indice.Ratkowsky <- 0
  1565. if (any(indice == 17) || (indice == 31) || (indice == 32)) {
  1566. nc.Ratkowsky <- (min_nc:max_nc)[which.max(res[, 17])]
  1567. indice.Ratkowsky <- max(res[, 17], na.rm = TRUE)
  1568. best.nc <- nc.Ratkowsky
  1569. }
  1570. nc.cindex <- indice.cindex <- 0
  1571. if (any(indice == 11) || (indice == 31) || (indice == 32)) {
  1572. nc.cindex <- (min_nc:max_nc)[which.min(res[, 11])]
  1573. indice.cindex <- min(res[, 11], na.rm = TRUE)
  1574. best.nc <- nc.cindex
  1575. }
  1576. nc.Scott <- indice.Scott <- 0
  1577. if (any(indice == 5) || (indice == 31) || (indice == 32)) {
  1578. nc.Scott <- DiffLev[, 1][which.max(DiffLev[, 3])]
  1579. indice.Scott <- max(DiffLev[, 3], na.rm = TRUE)
  1580. best.nc <- nc.Scott
  1581. }
  1582. nc.Marriot <- indice.Marriot <- 0
  1583. if (any(indice == 6) || (indice == 31) || (indice == 32)) {
  1584. nc.Marriot <- DiffLev[, 1][which.max(DiffLev[, 4])]
  1585. round(nc.Marriot, digits = 1)
  1586. indice.Marriot <- max(DiffLev[, 4], na.rm = TRUE)
  1587. best.nc <- nc.Marriot
  1588. }
  1589. nc.TrCovW <- indice.TrCovW <- 0
  1590. if (any(indice == 7) || (indice == 31) || (indice == 32)) {
  1591. nc.TrCovW <- DiffLev[, 1][which.max(DiffLev[, 5])]
  1592. indice.TrCovW <- max(DiffLev[, 5], na.rm = TRUE)
  1593. best.nc <- nc.TrCovW
  1594. }
  1595. nc.TraceW <- indice.TraceW <- 0
  1596. if (any(indice == 8) || (indice == 31) || (indice == 32)) {
  1597. nc.TraceW <- DiffLev[, 1][which.max(DiffLev[, 6])]
  1598. indice.TraceW <- max(DiffLev[, 6], na.rm = TRUE)
  1599. best.nc <- nc.TraceW
  1600. }
  1601. nc.Friedman <- indice.Friedman <- 0
  1602. if (any(indice == 9) || (indice == 31) || (indice == 32)) {
  1603. nc.Friedman <- DiffLev[, 1][which.max(DiffLev[, 7])]
  1604. indice.Friedman <- max(DiffLev[, 7], na.rm = TRUE)
  1605. best.nc <- nc.Friedman
  1606. }
  1607. nc.Rubin <- indice.Rubin <- 0
  1608. if (any(indice == 10) || (indice == 31) || (indice == 32)) {
  1609. nc.Rubin <- DiffLev[, 1][which.min(DiffLev[, 8])]
  1610. indice.Rubin <- min(DiffLev[, 8], na.rm = TRUE)
  1611. best.nc <- nc.Rubin
  1612. }
  1613. nc.Ball <- indice.Ball <- 0
  1614. if (any(indice == 18) || (indice == 31) || (indice == 32)) {
  1615. nc.Ball <- DiffLev[, 1][which.max(DiffLev[, 9])]
  1616. indice.Ball <- max(DiffLev[, 9], na.rm = TRUE)
  1617. best.nc <- nc.Ball
  1618. }
  1619. nc.Dunn <- indice.Dunn <- 0
  1620. if (any(indice == 26) || (indice == 31) || (indice == 32)) {
  1621. nc.Dunn <- (min_nc:max_nc)[which.max(res[, 26])]
  1622. indice.Dunn <- max(res[, 26], na.rm = TRUE)
  1623. best.nc <- nc.Dunn
  1624. }
  1625. nc.Hubert <- indice.Hubert <- 0
  1626. if (any(indice == 27) || (indice == 31) || (indice == 32)) {
  1627. nc.Hubert <- 0
  1628. indice.Hubert <- 0
  1629. par(mfrow = c(1, 2))
  1630. plot(x_axis, res[, 27], tck = 0, type = "b", col = "red",
  1631. xlab = expression(paste("Number of clusters ")),
  1632. ylab = expression(paste("Hubert Statistic values")))
  1633. plot(DiffLev[, 1], DiffLev[, 10], tck = 0, type = "b",
  1634. col = "blue", xlab = expression(paste("Number of clusters ")),
  1635. ylab = expression(paste("Hubert statistic second differences")))
  1636. cat(paste("*** : The Hubert index is a graphical method of determining the number of clusters.\n In the plot of Hubert index, we seek a significant knee that corresponds to a \n significant increase of the value of the measure i.e the significant peak in Hubert\n index second differences plot.",
  1637. "\n", "\n"))
  1638. }
  1639. nc.sdindex <- indice.sdindex <- 0
  1640. if (any(indice == 28) || (indice == 31) || (indice == 32)) {
  1641. nc.sdindex <- (min_nc:max_nc)[which.min(res[, 28])]
  1642. indice.sdindex <- min(res[, 28], na.rm = TRUE)
  1643. best.nc <- nc.sdindex
  1644. }
  1645. nc.Dindex <- indice.Dindex <- 0
  1646. if (any(indice == 29) || (indice == 31) || (indice == 32)) {
  1647. nc.Dindex <- 0
  1648. indice.Dindex <- 0
  1649. par(mfrow = c(1, 2))
  1650. plot(x_axis, res[, 29], tck = 0, type = "b", col = "red",
  1651. xlab = expression(paste("Number of clusters ")),
  1652. ylab = expression(paste("Dindex Values")))
  1653. plot(DiffLev[, 1], DiffLev[, 12], tck = 0, type = "b",
  1654. col = "blue", xlab = expression(paste("Number of clusters ")),
  1655. ylab = expression(paste("Second differences Dindex Values")))
  1656. cat(paste("*** : The D index is a graphical method of determining the number of clusters. \n In the plot of D index, we seek a significant knee (the significant peak in Dindex\n second differences plot) that corresponds to a significant increase of the value of\n the measure.",
  1657. "\n", "\n"))
  1658. }
  1659. nc.SDbw <- indice.SDbw <- 0
  1660. if (any(indice == 30) || (indice == 31) || (indice == 32)) {
  1661. nc.SDbw <- (min_nc:max_nc)[which.min(res[, 30])]
  1662. indice.SDbw <- min(res[, 30], na.rm = TRUE)
  1663. best.nc <- nc.SDbw
  1664. }
  1665. if (indice < 31) {
  1666. res <- res[, c(indice)]
  1667. if (indice == 14) {
  1668. resCritical <- resCritical[, 1]
  1669. }
  1670. if (indice == 15) {
  1671. resCritical <- resCritical[, 2]
  1672. }
  1673. if (indice == 16) {
  1674. resCritical <- resCritical[, 3]
  1675. }
  1676. if (indice == 20) {
  1677. resCritical <- resCritical[, 4]
  1678. }
  1679. }
  1680. if (indice == 31) {
  1681. res <- res[, c(1:19, 21:22, 26:30)]
  1682. resCritical <- resCritical[, c(1:3)]
  1683. }
  1684. if (any(indice == 20) || (indice == 23) || (indice == 24) ||
  1685. (indice == 25) || (indice == 32)) {
  1686. results <- c(nc.KL, indice.KL, nc.CH, indice.CH, nc.Hartigan,
  1687. indice.Hartigan, nc.CCC, indice.CCC, nc.Scott, indice.Scott,
  1688. nc.Marriot, indice.Marriot, nc.TrCovW, indice.TrCovW,
  1689. nc.TraceW, indice.TraceW, nc.Friedman, indice.Friedman,
  1690. nc.Rubin, indice.Rubin, nc.cindex, indice.cindex,
  1691. nc.DB, indice.DB, nc.Silhouette, indice.Silhouette,
  1692. nc.Duda, indice.Duda, nc.Pseudo, indice.Pseudo,
  1693. nc.Beale, indice.Beale, nc.Ratkowsky, indice.Ratkowsky,
  1694. nc.Ball, indice.Ball, nc.ptbiserial, indice.ptbiserial,
  1695. nc.Gap, indice.Gap, nc.Frey, indice.Frey, nc.McClain,
  1696. indice.McClain, nc.Gamma, indice.Gamma, nc.Gplus,
  1697. indice.Gplus, nc.Tau, indice.Tau, nc.Dunn, indice.Dunn,
  1698. nc.Hubert, indice.Hubert, nc.sdindex, indice.sdindex,
  1699. nc.Dindex, indice.Dindex, nc.SDbw, indice.SDbw)
  1700. results1 <- matrix(c(results), nrow = 2, ncol = 30)
  1701. resultats <- matrix(c(results), nrow = 2, ncol = 30,
  1702. dimnames = list(c("Number_clusters", "Value_Index"),
  1703. c("KL", "CH", "Hartigan", "CCC", "Scott", "Marriot",
  1704. "TrCovW", "TraceW", "Friedman", "Rubin", "Cindex",
  1705. "DB", "Silhouette", "Duda", "PseudoT2", "Beale",
  1706. "Ratkowsky", "Ball", "PtBiserial", "Gap",
  1707. "Frey", "McClain", "Gamma", "Gplus", "Tau",
  1708. "Dunn", "Hubert", "SDindex", "Dindex", "SDbw")))
  1709. }else {
  1710. results <- c(nc.KL, indice.KL, nc.CH, indice.CH, nc.Hartigan,
  1711. indice.Hartigan, nc.CCC, indice.CCC, nc.Scott, indice.Scott,
  1712. nc.Marriot, indice.Marriot, nc.TrCovW, indice.TrCovW,
  1713. nc.TraceW, indice.TraceW, nc.Friedman, indice.Friedman,
  1714. nc.Rubin, indice.Rubin, nc.cindex, indice.cindex,
  1715. nc.DB, indice.DB, nc.Silhouette, indice.Silhouette,
  1716. nc.Duda, indice.Duda, nc.Pseudo, indice.Pseudo,
  1717. nc.Beale, indice.Beale, nc.Ratkowsky, indice.Ratkowsky,
  1718. nc.Ball, indice.Ball, nc.ptbiserial, indice.ptbiserial,
  1719. nc.Frey, indice.Frey, nc.McClain, indice.McClain,
  1720. nc.Dunn, indice.Dunn, nc.Hubert, indice.Hubert,
  1721. nc.sdindex, indice.sdindex, nc.Dindex, indice.Dindex,
  1722. nc.SDbw, indice.SDbw)
  1723. results1 <- matrix(c(results), nrow = 2, ncol = 26)
  1724. resultats <- matrix(c(results), nrow = 2, ncol = 26,
  1725. dimnames = list(c("Number_clusters", "Value_Index"),
  1726. c("KL", "CH", "Hartigan", "CCC", "Scott", "Marriot",
  1727. "TrCovW", "TraceW", "Friedman", "Rubin", "Cindex",
  1728. "DB", "Silhouette", "Duda", "PseudoT2", "Beale",
  1729. "Ratkowsky", "Ball", "PtBiserial", "Frey",
  1730. "McClain", "Dunn", "Hubert", "SDindex", "Dindex",
  1731. "SDbw")))
  1732. }
  1733. if (any(indice <= 20) || (indice == 23) || (indice == 24) ||
  1734. (indice == 25)) {
  1735. resultats <- resultats[, c(indice)]
  1736. }
  1737. if (any(indice == 21) || (indice == 22)) {
  1738. indice3 <- indice - 1
  1739. resultats <- resultats[, c(indice3)]
  1740. }
  1741. if (any(indice == 26) || (indice == 27) || (indice == 28) ||
  1742. (indice == 29) || (indice == 30)) {
  1743. indice4 <- indice - 4
  1744. resultats <- resultats[, c(indice4)]
  1745. }
  1746. resultats <- round(resultats, digits = 4)
  1747. res <- round(res, digits = 4)
  1748. resCritical <- round(resCritical, digits = 4)
  1749. if (any(indice == 31) || (indice == 32)) {
  1750. cat("*******************************************************************",
  1751. "\n")
  1752. cat("* Among all indices: ",
  1753. "\n")
  1754. BestCluster <- results1[1, ]
  1755. c = 0
  1756. for (i in min.nc:max.nc) {
  1757. vect <- which(BestCluster == i)
  1758. if (length(vect) > 0)
  1759. cat("*", length(vect), "proposed", i, "as the best number of clusters",
  1760. "\n")
  1761. if (c < length(vect)) {
  1762. j = i
  1763. c <- length(vect)
  1764. }
  1765. }
  1766. cat("\n", " ***** Conclusion ***** ",
  1767. "\n", "\n")
  1768. cat("* According to the majority rule, the best number of clusters is ",
  1769. j, "\n", "\n", "\n")
  1770. cat("*******************************************************************",
  1771. "\n")
  1772. if (any(method == 1) || (method == 2) || (method ==
  1773. 3) || (method == 4) || (method == 5) || (method ==
  1774. 6) || (method == 7) || (method == 9))
  1775. partition <- cutree(hc, k = j)
  1776. else {
  1777. set.seed(1)
  1778. partition <- kmeans(jeu, j)$cluster
  1779. }
  1780. }
  1781. if (any(indice == 1) || (indice == 2) || (indice == 3) ||
  1782. (indice == 4) || (indice == 5) || (indice == 6) || (indice ==
  1783. 7) || (indice == 8) || (indice == 9) || (indice == 10) ||
  1784. (indice == 11) || (indice == 12) || (indice == 13) ||
  1785. (indice == 14) || (indice == 15) || (indice == 16) ||
  1786. (indice == 17) || (indice == 18) || (indice == 19) ||
  1787. (indice == 20) || (indice == 21) || (indice == 22) ||
  1788. (indice == 23) || (indice == 24) || (indice == 25) ||
  1789. (indice == 26) || (indice == 28) || (indice == 30)) {
  1790. if (any(method == 1) || (method == 2) || (method ==
  1791. 3) || (method == 4) || (method == 5) || (method ==
  1792. 6) || (method == 7) || (method == 9))
  1793. partition <- cutree(hc, k = best.nc)
  1794. else {
  1795. set.seed(1)
  1796. partition <- kmeans(jeu, best.nc)$cluster
  1797. }
  1798. }
  1799. if ((indice == 14) || (indice == 15) || (indice == 16) ||
  1800. (indice == 20) || (indice == 31) || (indice == 32)) {
  1801. results.final <- list(All.index = res, All.CriticalValues = resCritical,
  1802. Best.nc = resultats, Best.partition = partition)
  1803. }
  1804. if ((indice == 27) || (indice == 29))
  1805. results.final <- list(All.index = res)
  1806. if (any(indice == 1) || (indice == 2) || (indice == 3) ||
  1807. (indice == 4) || (indice == 5) || (indice == 6) || (indice ==
  1808. 7) || (indice == 8) || (indice == 9) || (indice == 10) ||
  1809. (indice == 11) || (indice == 12) || (indice == 13) ||
  1810. (indice == 17) || (indice == 18) || (indice == 19) ||
  1811. (indice == 21) || (indice == 22) || (indice == 23) ||
  1812. (indice == 24) || (indice == 25) || (indice == 26) ||
  1813. (indice == 28) || (indice == 30))
  1814. results.final <- list(All.index = res, Best.nc = resultats,
  1815. Best.partition = partition)
  1816. return(results.final)
  1817. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement