Advertisement
Guest User

Untitled

a guest
Mar 24th, 2019
95
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.27 KB | None | 0 0
  1. //
  2. // TrainAndTest.c
  3. // MLCoursework
  4. //
  5. // This is a fairly inefficient implentation that does not use any dynamic memory allocation
  6. // because that wouldnot be safe on the DEWIS marking system
  7. //
  8. // Created by Jim Smith on 06/02/2017.
  9. // Copyright © 2017 Jim SmithJim Smith. All rights reserved.
  10. //
  11.  
  12.  
  13. #include "TrainAndTest.h"
  14. #include <math.h>
  15. #include <stdlib.h>
  16.  
  17. double calculateDistance(double *sample1, double *sample2);
  18.  
  19.  
  20. int main(int argc, char **argv)
  21. {
  22. extern int Xmain(int argc, char **argv);
  23. Xmain(argc, argv);
  24. }
  25. //declare this array as static but make it available to any function in this file
  26. //in case we want to store the training examples and use them later
  27. static double myModel[NUM_TRAINING_SAMPLES][NUM_FEATURES];
  28. //even if each item in the training set is from a diffferent class we know how many there are
  29. static char myModelLabels[NUM_TRAINING_SAMPLES];
  30.  
  31. static int labelUsed[256];
  32.  
  33. static int trainingSetSize = 0;
  34.  
  35. int train(double **trainingSamples, char *trainingLabels, int numSamples, int numFeatures)
  36. {
  37. int returnval = 1;
  38. int sample, feature;
  39. char thisLabel;
  40. int ithisLabel;
  41.  
  42.  
  43. //clean the model because C leaves whatever is in the memory
  44. for (sample = 0; sample < NUM_TRAINING_SAMPLES; sample++)
  45. {
  46. for (feature = 0; feature < NUM_FEATURES; feature++)
  47. {
  48. myModel[sample][feature] = 0.0;
  49. }
  50. }
  51. for (sample = 0; sample < 256; sample++)
  52. {
  53. labelUsed[sample] = 0;
  54. }
  55.  
  56. //sanity checking
  57. if (numFeatures > NUM_FEATURES || numSamples > NUM_TRAINING_SAMPLES) {
  58. fprintf(stdout, "error: called train with data set larger than spaced allocated to store it");
  59. returnval = 0;
  60. }
  61.  
  62. //this is a silly trivial train()_ function
  63. fprintf(stdout, "no ML algorithm implemented yet\n");
  64.  
  65.  
  66. //make a simple copy of the data we are being passed but don't do anything with it
  67. //I'm just giving you this for the sake of people less familiar with pointers etc.
  68.  
  69.  
  70. if (returnval == 1) {
  71. //store the labels and the feature values
  72. trainingSetSize = numSamples;
  73. int index, feature;
  74. for (index = 0; index < numSamples; index++)
  75. {
  76. myModelLabels[index] = trainingLabels[index];
  77. for (feature = 0; feature < 4; feature++)
  78. {
  79. myModel[index][feature] = trainingSamples[index][feature];
  80. }
  81.  
  82. thisLabel = trainingLabels[index];
  83. ithisLabel = (int)thisLabel;
  84. labelUsed[ithisLabel] ++;
  85.  
  86.  
  87. }
  88. fprintf(stdout, "data stored locally \n");
  89. }//end else
  90.  
  91.  
  92. //now you could do whatever you like with the data
  93. //for example, you could populate some rules etc.
  94. //you were given pseudocode in semester 1 to do this
  95. // you could also normalise the data to remove scaling effects if you want to use something like a MLP or kNN
  96. //just remember that anything that you want to acess in your predictLabel() function
  97. //needs to be declared static at the top of this file - as I have done for the "myModel" and myModelLabels data .
  98.  
  99.  
  100.  
  101. return returnval;
  102. }
  103.  
  104. double calculateDistance(double *sample1, double *sample2)
  105. {
  106. double distance = 0;
  107. double sum = 0;
  108. for (int i = 0; i < NUM_FEATURES; i++)
  109. {
  110. sum = sample1[i] - sample2[i];
  111. distance = distance + sum * sum;
  112.  
  113.  
  114. }
  115. distance = sqrt(distance);
  116. return distance;
  117. }
  118. /* this is a really trivial classifier that just returns any valid label.
  119. So it assumes that the features are all irrelevant,
  120. and even ignores the relative frequency that values occur in training
  121. */
  122. char predictLabel(double *sample, int numFeatures)
  123. {
  124.  
  125. char prediction = (char)NULL;
  126. int choice, validChoiceMade = 0;
  127. double distances[NUM_TRAINING_SAMPLES];
  128. int closest;
  129.  
  130. for (int i = 0; i < NUM_TRAINING_SAMPLES; i++)
  131. {
  132. distances[i] = calculateDistance(sample, myModel[i]);
  133.  
  134. }
  135. closest = 0;
  136. for (int i = 0; i < NUM_TRAINING_SAMPLES; i++)
  137. {
  138. if (distances[i] < distances[closest])
  139. {
  140. closest = i;
  141. }
  142. }
  143.  
  144. return myModelLabels[closest];
  145.  
  146. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement