Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.io.FileReader;
- import java.io.IOException;
- import java.util.Scanner;
- import au.com.bytecode.opencsv.CSVReader;
- // This program reads the dataset, and the new instances.
- // Note that an instance is a patient in this example.
- // Then the program will compute and display the distances between the new
- // instance / patient and all the instances / patients from the dataset, based on symptoms.
- // The distance between two instances is defined here as the number of attribute value disparities.
- // For instance the distance between two patients is 2 if one has fever and the other doesn't,
- // and one has headache and the other doesn't, and all their other symptoms are the same.
- public class example1 {
- //here one specifies the file containing the dataset in comma separated values format CSV
- private static final String datasetfile="diagnoses.csv";
- static String [] nextLine;
- static String[][] data= new String[100][7]; //this array will contain the dataset
- static String[] newinstance=new String[7]; //this array will contain a new instance / new patient symptoms
- static int[] distance= new int[100]; //this array will contain the distance from the new instance to each instance in the dataset
- static int min = 0;
- static int index = 1;
- static String x,y;
- static int numberOFinstances;
- public static void KNearestNeighbour(int k){
- /*
- * K Nearest Neighbour Algorithm
- *
- */
- //loop through how many nearest neighbour's we want where k is the limit.
- for(int j=0;j<k;j++){
- //get the min value.
- min = distance[1];
- //loop through all of the distances in the array.
- for(int i=1;i<numberOFinstances+1;i++){
- //if current value of array is less than min.
- if(distance[i]<min){
- //assign min to current array value.
- min=distance[i];
- //record index so we know where we found it.
- index=i;
- }
- }
- //Put a random high number so we don't get the same minimum
- distance[index]=999;
- //Print Distance
- System.out.println("\nDistance is "+min);
- //Print Diagnosis
- System.out.println("Diagnosis is "+data[index][6]);
- //Print patient using the index we found the minimum distance at.
- System.out.println("Close distance to Patient"+index+"\n");
- }
- }
- public static void NearestNeighbour(){
- /*
- * Nearest Neighbour Algorithm
- *
- */
- //first element of array as min.
- min = distance[1];
- for(int k=2;k<numberOFinstances;k++){
- //if current value of array is less than min
- if(distance[k]<min){
- //assign min to current array value
- min=distance[k];
- //record index
- index=k;
- }
- }
- //Print Distance
- System.out.println("\nDistance is "+min);
- //Print Diagnosis
- System.out.println("Diagnosis is "+data[index][6]);
- //print the patient using the index we found the min value.
- System.out.println("Nearest Neighbour is Patient "+index);
- }
- public static void main(String[] args) throws IOException {
- //one declares variables and arrays to be used
- CSVReader reader = new CSVReader(new FileReader(datasetfile)); //this is used to read from CSV file
- // prepare to read the dataset in array data
- // and then to read a new instance / patient symptos from the keyboard
- int j,i=0;
- while ((nextLine = reader.readNext()) != null) { //while the line inputed is not empty do
- for(j=0;j<=6;j++)
- data[i][j]=nextLine[j];
- i++; // this increses i with 1
- }
- //in variable i we counted the number of instances in the dataset plus 1 because
- //the attribute names were also included at the top of the dataset
- numberOFinstances = i-1;
- //this prepares a loop to input a variable number of new instances and compute thier distances
- //to the dataset instances
- Scanner in =new Scanner(System.in); // prepare to read new instance from keyboard
- boolean finished=false; // the next loop should execute until new instances finish
- while (!finished) {
- // Read a new instance (patient symptoms) and put it in an array called newinstance
- System.out.println();
- System.out.println("Input the new patient's 5 symptoms regarding");
- System.out.println("Sore Throat, Fever, Swollen Glands, Congestion, Headache");
- System.out.println("Input Yes or No, ONE PER LINE, CASE SENSITIVE!:");
- System.out.println();
- /*if(in.nextLine().isEmpty())
- System.out.println("Empty: PLease enter Yes or No.");
- System.exit(0);
- */
- for(j=1;j<=5;j++){
- String read = in.nextLine();
- if(read.isEmpty()){
- System.err.println("Please enter either yes or no.");
- j
- --;
- }
- newinstance[j]=read;
- }
- // Compute the distances from the new instance to each instance in the dataset
- // For example distance[8] should contain the distance from the new instance to instance 8
- // Start with value 0 for all these distances first.
- for(i=1; i<= numberOFinstances; i++)
- distance[i]=0;
- System.out.println();
- for(i=1; i<= numberOFinstances; i++) {
- for(j=1;j<=5;j++) {
- x=newinstance[j];
- y=data[i][j];
- if (x.compareTo(y)!=0) //if x and y do not coincide
- distance[i]++; //then add 1 to the distance
- }
- // Now the computed distances are ready, do display them
- System.out.println("The distance between the new patient and patient"+i+" is "+distance[i]);
- }
- System.out.println("Calculating Nearest Neighbour");
- //Nearest Neighbour
- NearestNeighbour();
- System.out.println("\n Calculating K Nearest Neighbour Where K=3");
- //K Nearest Neighbour - 3 Closest Matches.
- KNearestNeighbour(3);
- // Ask user if new instances are to be inputed.
- System.out.println();
- System.out.print("Any more new patients? y/n: ");
- x=in.nextLine();
- //If the input from user is not "y" (no more new instances) then one should finish the loop.
- if (x.compareTo("y")!=0){
- finished=true;
- System.out.println();
- System.out.println("GOODBYE .........");
- }
- }
- }
- }
Add Comment
Please, Sign In to add comment