Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.io.InputStreamReader;
- import java.util.ArrayList;
- import java.util.Scanner;
- public class Kmeans_f
- {
- private static int NUM_CLUSTERS; // Total clusters. Ввод и присвоение в main
- private static int TOTAL_DATA; // Total data points. Ввод и присвоение в main
- private static double SAMPLES[][] = new double[TOTAL_DATA][2];
- private static ArrayList<Data> dataSet = new ArrayList<Data>();
- private static ArrayList<Centroid> centroids = new ArrayList<Centroid>();
- private static void initialize()
- {
- System.out.println("Centroids initialized at:");
- centroids.add(new Centroid(SAMPLES[lowest()][0], SAMPLES[lowest()][1])); // lowest set.
- centroids.add(new Centroid(SAMPLES[hightest()][0], SAMPLES[hightest()][1])); // highest set.
- System.out.println(" (" + centroids.get(0).X() + ", " + centroids.get(0).Y() + ")");
- System.out.println(" (" + centroids.get(1).X() + ", " + centroids.get(1).Y() + ")");
- System.out.print("\n");
- return;
- }
- private static void kMeanCluster()
- {
- final double bigNumber = Math.pow(10, 10); // some big number that's sure to be larger than our data range.
- double minimum = bigNumber; // The minimum value to beat.
- double distance = 0.0; // The current minimum value.
- int sampleNumber = 0;
- int cluster = 0;
- boolean isStillMoving = true;
- Data newData = null;
- // Add in new data, one at a time, recalculating centroids with each new one.
- while(dataSet.size() < TOTAL_DATA)
- {
- newData = new Data(SAMPLES[sampleNumber][0], SAMPLES[sampleNumber][1]);
- dataSet.add(newData);
- minimum = bigNumber;
- for(int i = 0; i < NUM_CLUSTERS; i++)
- {
- distance = dist(newData, centroids.get(i));
- if(distance < minimum){
- minimum = distance;
- cluster = i;
- }
- }
- newData.cluster(cluster);
- // calculate new centroids.
- for(int i = 0; i < NUM_CLUSTERS; i++)
- {
- int totalX = 0;
- int totalY = 0;
- int totalInCluster = 0;
- for(int j = 0; j < dataSet.size(); j++)
- {
- if(dataSet.get(j).cluster() == i){
- totalX += dataSet.get(j).X();
- totalY += dataSet.get(j).Y();
- totalInCluster++;
- }
- }
- if(totalInCluster > 0){
- centroids.get(i).X(totalX / totalInCluster);
- centroids.get(i).Y(totalY / totalInCluster);
- }
- }
- sampleNumber++;
- }
- // Now, keep shifting centroids until equilibrium occurs.
- while(isStillMoving)
- {
- // calculate new centroids.
- for(int i = 0; i < NUM_CLUSTERS; i++)
- {
- int totalX = 0;
- int totalY = 0;
- int totalInCluster = 0;
- for(int j = 0; j < dataSet.size(); j++)
- {
- if(dataSet.get(j).cluster() == i){
- totalX += dataSet.get(j).X();
- totalY += dataSet.get(j).Y();
- totalInCluster++;
- }
- }
- if(totalInCluster > 0){
- centroids.get(i).X(totalX / totalInCluster);
- centroids.get(i).Y(totalY / totalInCluster);
- }
- }
- // Assign all data to the new centroids
- isStillMoving = false;
- for(int i = 0; i < dataSet.size(); i++)
- {
- Data tempData = dataSet.get(i);
- minimum = bigNumber;
- for(int j = 0; j < NUM_CLUSTERS; j++)
- {
- distance = dist(tempData, centroids.get(j));
- if(distance < minimum){
- minimum = distance;
- cluster = j;
- }
- }
- tempData.cluster(cluster);
- if(tempData.cluster() != cluster){
- tempData.cluster(cluster);
- isStillMoving = true;
- }
- }
- }
- return;
- }
- /**
- * // Calculate Euclidean distance.
- * @param d - Data object.
- * @param c - Centroid object.
- * @return - double value.
- */
- private static double dist(Data d, Centroid c)
- {
- return Math.sqrt(Math.pow((c.Y() - d.Y()), 2) + Math.pow((c.X() - d.X()), 2));
- }
- private static class Data
- {
- private double mX = 0;
- private double mY = 0;
- private int mCluster = 0;
- public Data()
- {
- return;
- }
- public Data(double x, double y)
- {
- this.X(x);
- this.Y(y);
- return;
- }
- public void X(double x)
- {
- this.mX = x;
- return;
- }
- public double X()
- {
- return this.mX;
- }
- public void Y(double y)
- {
- this.mY = y;
- return;
- }
- public double Y()
- {
- return this.mY;
- }
- public void cluster(int clusterNumber)
- {
- this.mCluster = clusterNumber;
- return;
- }
- public int cluster()
- {
- return this.mCluster;
- }
- }
- private static class Centroid
- {
- private double mX = 0.0;
- private double mY = 0.0;
- public Centroid()
- {
- return;
- }
- public Centroid(double newX, double newY)
- {
- this.mX = newX;
- this.mY = newY;
- return;
- }
- public void X(double newX)
- {
- this.mX = newX;
- return;
- }
- public double X()
- {
- return this.mX;
- }
- public void Y(double newY)
- {
- this.mY = newY;
- return;
- }
- public double Y()
- {
- return this.mY;
- }
- }
- private static int lowest() {
- double lowx = SAMPLES[0][0];
- double lowy = SAMPLES[0][1];
- int minX=0;
- for (int i = 0; i < SAMPLES.length; i++) {
- double numx = SAMPLES[i][0];
- double numy = SAMPLES[i][1];
- // double newLow = (Math.min(numx,lowx));
- if (numx < lowx && numy < lowy) {
- // (Math.min(numy,lowy)== numy)
- lowx = numx;
- lowy = numy;
- minX = i;
- }
- else if ((numx == lowx || numx< lowx) && numy < lowy){
- lowx = numx;
- lowy = numy;
- minX = i;
- }
- else if (numx < lowx && (numy<lowy || numy == lowy)){
- lowx = numx;
- lowy = numy;
- minX = i;
- }
- }
- return minX;
- }
- private static int hightest(){
- double hightx = SAMPLES[0][0];
- double highty = SAMPLES[0][1];
- int maxX=0;
- for (int i = 0; i < SAMPLES.length; i++){
- double numx = SAMPLES[i][0];
- double numy = SAMPLES[i][1];
- if (numx > hightx && numy > highty){
- hightx = numx;
- highty = numy;
- maxX = i;
- }
- else if ((numx == hightx || numx > hightx) && numy > highty){
- hightx = numx;
- highty = numy;
- maxX = i;
- }
- else if (numx > hightx && (numy > highty || numy == highty)){
- hightx = numx;
- highty = numy;
- maxX = i;
- }
- }
- return maxX;
- }
- public static void main(String[] args)
- {
- // вот это все до initialize() было в отдельных методах, но они не инициализировались
- Scanner c = new Scanner(System.in);
- System.out.println("Введите количество кластеров:");
- NUM_CLUSTERS = c.nextInt(); //присвоение введенной цифры
- System.out.println("Введите кол-во точек:");
- TOTAL_DATA = c.nextInt(); //присвоение
- //ввод коорд точек
- for (int i = 0; i < TOTAL_DATA; i++) {
- for (int j = 0; j < 2; j++) {
- if (j==0)
- {
- System.out.print("Введите значение x элемента Matrix[" + i + "][" + j + "]:");
- SAMPLES[i][j] = c.nextDouble();
- }
- else if (j==1){
- System.out.print("Введите значение y элементa Matrix[" + i + "][" + j + "]:");
- SAMPLES[i][j] = c.nextDouble();
- }
- }
- }
- c.close();
- // работа алгоритма, там все нормально
- initialize();
- kMeanCluster();
- // Print out clustering results.
- for(int i = 0; i < NUM_CLUSTERS; i++)
- {
- System.out.println("Cluster " + i + " includes:");
- for(int j = 0; j < TOTAL_DATA; j++)
- {
- if(dataSet.get(j).cluster() == i){
- System.out.println(" (" + dataSet.get(j).X() + ", " + dataSet.get(j).Y() + ")");
- }
- } // j
- System.out.println();
- } // i
- // Print out centroid results.
- System.out.println("Centroids finalized at:");
- for(int i = 0; i < NUM_CLUSTERS; i++)
- {
- System.out.println(" (" + centroids.get(i).X() + ", " + centroids.get(i).Y());
- }
- System.out.print("\n");
- return;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment