Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- function makeMatrix($columns, $rows) {
- $matrix = array();
- for ($i = 0; $i < $columns; $i ++) {
- $column = array();
- for ($j = 0; $j < $rows; $j ++) {
- $column[] = 0;
- }
- $matrix[] = $column;
- }
- return $matrix;
- }
- function uniqueCountSorted($input) {
- $uniqueValueCount = 0;
- $lastSeenValue = FALSE;
- for ($i = 0; $i < count($input); $i++) {
- if ($i === 0 || $input[$i] !== $lastSeenValue) {
- $lastSeenValue = $input[$i];
- $uniqueValueCount ++;
- }
- }
- return $uniqueValueCount;
- }
- function ckmeans($data, $nClusters) {
- if ($nClusters > count($data)) {
- throw Exception('Cannot generate more classes than there are data values');
- }
- sort($data);
- $sorted = $data;
- $uniqueCount = uniqueCountSorted($sorted);
- $matrix = makeMatrix($nClusters, count($data));
- $backtrackMatrix = makeMatrix($nClusters, count($data));
- for ($cluster = 0; $cluster < $nClusters; $cluster ++) {
- $firstClusterMean = $sorted[0];
- for ($sortedIdx = max($cluster, 1); $sortedIdx < count($sorted); $sortedIdx ++) {
- if ($cluster === 0) {
- $squaredDifference = pow($sorted[$sortedIdx] - $firstClusterMean, 2);
- $matrix[$cluster][$sortedIdx] = $matrix[$cluster][$sortedIdx - 1] + ($sortedIdx / ($sortedIdx + 1)) * $squaredDifference;
- $newSum = $sortedIdx * $firstClusterMean + $sorted[$sortedIdx];
- $firstClusterMean = $newSum / ($sortedIdx + 1);
- }
- else {
- $sumSquaredDistances = 0;
- $meanXJ = 0;
- for ($j = $sortedIdx; $j >= $cluster; $j --) {
- $sumSquaredDistances += ($sortedIdx - $j) / ($sortedIdx - $j + 1) * pow($sorted[$j] - $meanXJ, 2);
- $meanXJ = ($sorted[$j] + ($sortedIdx - $j) * $meanXJ) / ($sortedIdx - $j + 1);
- if ($j === $sortedIdx) {
- $matrix[$cluster][$sortedIdx] = $sumSquaredDistances;
- $backtrackMatrix[$cluster][$sortedIdx] = $j;
- if ($j > 0) {
- $matrix[$cluster][$sortedIdx] += $matrix[$cluster - 1][$j - 1];
- }
- }
- else {
- if ($j === 0) {
- if ($sumSquaredDistances <= $matrix[$cluster][$sortedIdx]) {
- $matrix[$cluster][$sortedIdx] = $sumSquaredDistances;
- $backtrackMatrix[$cluster][$sortedIdx] = $j;
- }
- }
- else if ($sumSquaredDistances + $matrix[$cluster - 1][$j - 1] < $matrix[$cluster][$sortedIdx]) {
- $matrix[$cluster][$sortedIdx] = $sumSquaredDistances + $matrix[$cluster - 1][$j - 1];
- $backtrackMatrix[$cluster][$sortedIdx] = $j;
- }
- }
- }
- }
- }
- }
- $clusters = array();
- $clusterRight = count($backtrackMatrix[0]) - 1;
- for ($cluster = count($backtrackMatrix) - 1; $cluster >= 0; $cluster --) {
- $clusterLeft = $backtrackMatrix[$cluster][$clusterRight];
- $clusters[$cluster] = array_slice($sorted, $clusterLeft, ($clusterRight + 1) - $clusterLeft);
- if ($cluster > 0) {
- $clusterRight = $clusterLeft - 1;
- }
- }
- return $clusters;
- }
- ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement