Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- * Kocaeli University Computer Engineering Department 2011-Fall
- *
- * Class : Data Mining
- * Subject : Inspection and Implementation of Model Based Clustering Method
- * Expectation-Maximization (EM)
- *
- * Student : Mustafa KIYAR
- * Number : 105112014
- *
- */
- package em;
- import java.io.BufferedWriter;
- import java.io.FileInputStream;
- import java.io.FileWriter;
- import java.io.IOException;
- import java.util.Properties;
- import org.apache.commons.lang3.ArrayUtils;
- /**
- *
- * @author Mustafa KIYAR
- */
- public class EM {
- public static double findProbablity(double x, double mu, double sigma) {
- double dExp = Math.exp(-Math.pow(x - mu, 2) / (2 * Math.pow(sigma, 2)));
- return dExp / (Math.sqrt(2 * Math.PI) * sigma);
- }
- public static double findMean(double dArr[]) {
- double mu = 0;
- for (int i = 0; i < dArr.length; i++) {
- mu += dArr[i];
- }
- return mu / dArr.length;
- }
- public static double findStandartDeviation(double dArr[], double mu) {
- double sigma = 0;
- for (int i = 0; i < dArr.length; i++) {
- sigma += Math.pow(dArr[i] - mu, 2);
- }
- return Math.sqrt(sigma / dArr.length);
- }
- public static void myLog(String str) {
- System.out.println(str);
- }
- public static void analyzeArr(double dArr[]) {
- double mu, sigma;
- mu = findMean(dArr);
- sigma = findStandartDeviation(dArr, mu);
- myLog("mean : " + mu);
- myLog("standart deviation : " + sigma);
- for (int i = 0; i < dArr.length; i++) {
- myLog("probablity of " + dArr[i] + " is "
- + findProbablity(dArr[i], mu, sigma));
- }
- }
- public static double findProbablity(double x, double dArr[]) {
- double mu, sigma, probablity;
- mu = findMean(dArr);
- sigma = findStandartDeviation(dArr, mu);
- probablity = findProbablity(x, mu, sigma);
- myLog("mean : " + mu);
- myLog("standart deviation : " + sigma);
- myLog("probablity of object : " + probablity);
- return probablity;
- }
- public static int findRelation(double x, double dArr[][]) {
- double nominator = 0, denominator = 0, probablity = 0;
- int iBestCluster = 0;
- for (int i = 0; i < dArr.length; i++) {
- myLog("");
- myLog("Inspection of " + x + " in Cluster :" + i);
- probablity = findProbablity(x, dArr[i]);
- denominator += probablity;
- if (nominator < probablity) {
- nominator = probablity;
- iBestCluster = i;
- }
- }
- return iBestCluster;
- }
- public static void switchCluster(int iCluster, int iPos, int iBestCluster,
- double dArr[][]) {
- double dVal = dArr[iCluster][iPos];
- dArr[iCluster] = ArrayUtils.remove(dArr[iCluster], iPos);
- dArr[iBestCluster] = ArrayUtils.add(dArr[iBestCluster], dVal);
- myLog("Object : " + dVal);
- myLog("Original cluster : " + iCluster);
- myLog("Moved to cluster : " + iBestCluster);
- }
- public static void runProgram(double dArr[][]) {
- int iBestCluster;
- boolean check;
- do {
- check = false;
- for (int i = 0; i < dArr.length; i++) {
- myLog("##########################################################");
- myLog("Scaning Cluster : " + i);
- for (int j = 0; j < dArr[i].length; j++) {
- iBestCluster = findRelation(dArr[i][j], dArr);
- if (i != iBestCluster) {
- switchCluster(i, j, iBestCluster, dArr);
- j--;
- check = true;
- } else {
- myLog("Object belongs to orginal cluster.");
- }
- myLog("......................................................");
- }
- }
- } while (check);
- }
- /**
- * @param args the command line arguments
- */
- public static void main(String[] args) {
- String sArrInput[] = {""};
- int numberOfCluster = 0, iSizeCuster = 0;
- Properties property = new Properties();
- try {
- property.load(new FileInputStream(args[0]));
- sArrInput = property.getProperty("data").split(",");
- numberOfCluster = Integer.parseInt(property.getProperty("numberofclusters"));
- } catch (IOException ex) {
- ex.printStackTrace();
- }
- if (sArrInput.length < 3 || sArrInput.length > 1000
- || sArrInput.length <= (numberOfCluster * 2)) {
- myLog("Input data is irrelevant!");
- return;
- }
- /* find size of an cluster */
- iSizeCuster = sArrInput.length / numberOfCluster;
- double dArr[][] = new double[numberOfCluster][iSizeCuster];
- double dCluster[] = new double[iSizeCuster];
- for (int i = 0; i < sArrInput.length; i++) {
- dCluster[i % iSizeCuster] = Double.parseDouble(sArrInput[i]);
- if (i != 0 && (i + 1) % iSizeCuster == 0) {
- dArr[i / iSizeCuster] = dCluster;
- dCluster = new double[iSizeCuster];
- }
- }
- if (sArrInput.length % iSizeCuster != 0) {
- dCluster = ArrayUtils.subarray(dCluster, 0, (sArrInput.length % iSizeCuster));
- dArr[numberOfCluster - 1] = ArrayUtils.addAll(dArr[numberOfCluster - 1], dCluster);
- }
- runProgram(dArr);
- try {
- FileWriter fw = new FileWriter("out.log");
- BufferedWriter bw = new BufferedWriter(fw);
- double dStdDev = 0.0, dMean = 0.0;
- for (int i = 0; i < dArr.length; i++) {
- dMean = findMean(dArr[i]);
- dStdDev = findStandartDeviation(dArr[i], dMean);
- bw.write("Cluster " + i + " : " + ArrayUtils.toString(dArr[i]) + "\n");
- bw.write("Mean of Cluster " + i + " : " + Double.toString(dMean)+ "\n");
- bw.write("Stdev of Cluster " + i + " : " + Double.toString(dStdDev)+"\n");
- bw.write("\n");
- }
- bw.close();
- } catch (IOException ex) {
- ex.printStackTrace();
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement