daily pastebin goal
47%
SHARE
TWEET

Untitled

a guest Mar 24th, 2019 48 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. //
  2. //  TrainAndTest.c
  3. //  MLCoursework
  4. //
  5. //  This is a fairly inefficient implentation that does not use any dynamic memory allocation
  6. // because that wouldnot be safe on the DEWIS marking system
  7. //
  8. //  Created by Jim Smith on 06/02/2017.
  9. //  Copyright © 2017 Jim SmithJim Smith. All rights reserved.
  10. //
  11.  
  12.  
  13. #include "TrainAndTest.h"
  14. #include <math.h>
  15. #include <stdlib.h>
  16.  
  17. double calculateDistance(double *sample1, double *sample2);
  18.  
  19.  
  20. int main(int argc, char **argv)
  21. {
  22.     extern int Xmain(int argc, char **argv);
  23.     Xmain(argc, argv);
  24. }
  25. //declare this array as static but make it available to any function in this file
  26. //in case we want to store the training examples  and use them later
  27. static    double myModel[NUM_TRAINING_SAMPLES][NUM_FEATURES];
  28. //even if each item in the training set is from a diffferent class we know how many there are
  29. static char myModelLabels[NUM_TRAINING_SAMPLES];
  30.  
  31. static int labelUsed[256];
  32.  
  33. static int trainingSetSize = 0;
  34.  
  35. int  train(double **trainingSamples, char *trainingLabels, int numSamples, int numFeatures)
  36. {
  37.     int returnval = 1;
  38.     int sample, feature;
  39.     char thisLabel;
  40.     int ithisLabel;
  41.  
  42.  
  43.     //clean the model because C leaves whatever is in the memory
  44.     for (sample = 0; sample < NUM_TRAINING_SAMPLES; sample++)
  45.     {
  46.         for (feature = 0; feature < NUM_FEATURES; feature++)
  47.         {
  48.             myModel[sample][feature] = 0.0;
  49.         }
  50.     }
  51.     for (sample = 0; sample < 256; sample++)
  52.     {
  53.         labelUsed[sample] = 0;
  54.     }
  55.  
  56.     //sanity checking
  57.     if (numFeatures > NUM_FEATURES || numSamples > NUM_TRAINING_SAMPLES) {
  58.         fprintf(stdout, "error: called train with data set larger than spaced allocated to store it");
  59.         returnval = 0;
  60.     }
  61.  
  62.     //this is a silly trivial train()_ function
  63.     fprintf(stdout, "no ML algorithm implemented yet\n");
  64.  
  65.  
  66.     //make a simple copy of the data we are being passed but don't do anything with it
  67.     //I'm just giving you this for the sake of people less familiar with pointers etc.
  68.  
  69.  
  70.     if (returnval == 1) {
  71.         //store the labels and the feature values
  72.         trainingSetSize = numSamples;
  73.         int index, feature;
  74.         for (index = 0; index < numSamples; index++)
  75.         {
  76.             myModelLabels[index] = trainingLabels[index];
  77.             for (feature = 0; feature < 4; feature++)
  78.             {
  79.                 myModel[index][feature] = trainingSamples[index][feature];
  80.             }
  81.  
  82.             thisLabel = trainingLabels[index];
  83.             ithisLabel = (int)thisLabel;
  84.             labelUsed[ithisLabel] ++;
  85.  
  86.  
  87.         }
  88.         fprintf(stdout, "data stored locally \n");
  89.     }//end else
  90.  
  91.  
  92.     //now you could do whatever you like with the data
  93.     //for example,  you could populate some rules etc.
  94.     //you were given pseudocode in semester 1 to do this
  95.     // you could also normalise the data to remove scaling effects if you want to use something like a MLP or kNN
  96.     //just remember that anything that you want to acess in your predictLabel() function
  97.     //needs to be declared static at the top of this file - as I have done for the "myModel"  and myModelLabels data .
  98.  
  99.  
  100.  
  101.     return returnval;
  102. }
  103.  
  104. double calculateDistance(double *sample1, double *sample2)
  105. {
  106.     double distance = 0;
  107.     double  sum = 0;
  108.     for (int i = 0; i < NUM_FEATURES; i++)
  109.     {
  110.         sum = sample1[i] - sample2[i];
  111.         distance = distance + sum * sum;
  112.        
  113.        
  114.     }
  115.     distance = sqrt(distance);
  116.     return distance;
  117. }
  118. /* this is a really trivial classifier that just returns any valid label.
  119.  So it assumes that the features are all irrelevant,
  120.  and even ignores  the relative frequency that values occur in training
  121.  */
  122. char  predictLabel(double *sample, int numFeatures)
  123. {
  124.  
  125.     char prediction = (char)NULL;
  126.     int  choice, validChoiceMade = 0;
  127.     double distances[NUM_TRAINING_SAMPLES];
  128.     int closest;
  129.  
  130.     for (int i = 0; i < NUM_TRAINING_SAMPLES; i++)
  131.     {
  132.         distances[i] = calculateDistance(sample, myModel[i]);
  133.        
  134.     }
  135.     closest = 0;
  136.     for (int i = 0; i < NUM_TRAINING_SAMPLES; i++)
  137.     {
  138.         if (distances[i] < distances[closest])
  139.         {
  140.             closest = i;
  141.         }
  142.     }
  143.  
  144.     return myModelLabels[closest];
  145.  
  146. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top