Advertisement
Guest User

Untitled

a guest
Mar 21st, 2018
81
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.21 KB | None | 0 0
  1. //
  2. // TrainAndTest.c
  3. // MLCoursework
  4. //
  5. // This is a fairly inefficient implentation that does not use any dynamic memory allocation
  6. // because that wouldnot be safe on the DEWIS marking system
  7. //
  8. // Created by Jim Smith on 06/02/2017.
  9. // Copyright © 2017 Jim SmithJim Smith. All rights reserved.
  10. //
  11.  
  12. #include "TrainAndTest.h"
  13. #include "math.h"
  14. #include <stdio.h>
  15. #include <stdlib.h>
  16.  
  17.  
  18.  
  19. //declare this array as static but make it available to any function in this file
  20. //in case we want to store the training examples and use them later
  21. static double myModel[NUM_TRAINING_SAMPLES][NUM_FEATURES];
  22. //even if each item in the training set is from a diffferent class we know how many there are
  23. static char myModelLabels[NUM_TRAINING_SAMPLES];
  24.  
  25. static int trainingSetSize = 0;
  26.  
  27.  
  28. //Swap function for Bubble Sort
  29. void swap(double *a, double *b) {
  30. double placeholder = *a;
  31. *a = *b;
  32. *b = placeholder;
  33. }
  34.  
  35. //Bubble sort
  36. void bubbles(double array[], int n) {
  37. int i, j;
  38. for (i = 0; i < n - 1; i++)
  39.  
  40. // Last i elements are already in place
  41. for (j = 0; j < n - i - 1; j++)
  42. if (array[j] > array[j + 1])
  43. swap(&array[j], &array[j + 1]);
  44. }//end of bubble sort
  45.  
  46.  
  47. int train(double **trainingSamples, char *trainingLabels, int numSamples, int numFeatures) {
  48.  
  49. int returnval = 1;
  50. int sample, feature;
  51.  
  52. //clean the model because C leaves whatever is in the memory |JIM|
  53. for (sample = 0; sample < NUM_TRAINING_SAMPLES; sample++) {
  54. for (feature = 0; feature<NUM_FEATURES; feature++) {
  55. myModel[sample][feature] = 0.0;
  56. }
  57. }
  58.  
  59. //sanity checking |JIM|
  60. if (numFeatures > NUM_FEATURES || numSamples > NUM_TRAINING_SAMPLES) {
  61. fprintf(stdout, "error: called train with data set larger than spaced allocated to store it");
  62. returnval = 0;
  63. }
  64.  
  65.  
  66. if (returnval == 1) {
  67. //store the labels and the feature values
  68. trainingSetSize = numSamples;
  69. int index, feature;
  70. for (index = 0; index < numSamples; index++) {
  71. myModelLabels[index] = trainingLabels[index];
  72. for (feature = 0; feature < numFeatures; feature++) {
  73. myModel[index][feature] = trainingSamples[index][feature];
  74. }
  75. }
  76. fprintf(stdout, "data stored locally \n");
  77. }//end else
  78.  
  79.  
  80. return returnval;
  81. }
  82.  
  83.  
  84.  
  85. char predictLabel(double *testSample, int numFeatures) {
  86.  
  87. double duplicateArray[NUM_TRAINING_SAMPLES]; //Array to hold neighbourDistances before sorting
  88. double neighbourDistances[NUM_TRAINING_SAMPLES]; //Array to hold distances to neighbours
  89. int catA = 0, catB = 0, catC = 0; // Variables for classifying and holding the different categories of data
  90. int k = 9; //Number of neighbours checked
  91. int prediction[9];
  92.  
  93.  
  94. // square root of the sum of the squared differences between the two arrays of numbers
  95. for (int i = 0; i < NUM_TRAINING_SAMPLES; i++)
  96. neighbourDistances[i] =
  97. sqrt((myModel[i][0] - testSample[0]) * (myModel[i][0] - testSample[0]) +
  98. (myModel[i][1] - testSample[1]) * (myModel[i][1] - testSample[1]) +
  99. (myModel[i][2] - testSample[2]) * (myModel[i][2] - testSample[2]) +
  100. (myModel[i][3] - testSample[3]) * (myModel[i][3] - testSample[3]));
  101.  
  102. //puts original array into new duplicateArray
  103. for (int i = 0; i < NUM_TRAINING_SAMPLES; i++)
  104. duplicateArray[i] = neighbourDistances[i];
  105.  
  106.  
  107. //sorts original array
  108. bubbles(neighbourDistances, NUM_TRAINING_SAMPLES);
  109.  
  110.  
  111. //compares original array with the sorted array
  112. for (int i = 0; i < k; i++) {
  113. for (int j = 0; j < NUM_TRAINING_SAMPLES; j++) {
  114. if (neighbourDistances[i] == duplicateArray[j]) {
  115. prediction[i] = j;
  116. }
  117. }
  118. }
  119.  
  120.  
  121. //prints sorted neighbours array
  122. printf("Sorted array: \n");
  123. for (int i = 0; i < k; i++) {
  124. printf("%lf\n", neighbourDistances[i]);
  125. }
  126.  
  127.  
  128. for (int i = 0; i < k; i++)
  129. {
  130. if (myModelLabels[prediction[i]] == myModelLabels[0])
  131. catA++;
  132. else if (myModelLabels[prediction[i]] > myModelLabels[0] && myModelLabels[prediction[i]] <= myModelLabels[49])
  133. catB++;
  134. else if (myModelLabels[prediction[i]] >= myModelLabels[99])
  135. catC++;
  136. }
  137.  
  138. //Check likelihood of category and return most prominent one
  139. if ((catA > catB) && (catA > catC)) return myModelLabels[0];
  140. else if ((catB > catA) && (catB > catC)) return myModelLabels[49];
  141. else return myModelLabels[99];
  142.  
  143. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement