Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- //
- // TrainAndTest.c
- // MLCoursework
- //
- // This is a fairly inefficient implementation that does not use any dynamic memory allocation
- // because that would not be safe on the DEWIS marking system
- //
- // Created by Jim Smith on 06/02/2017.
- // Copyright © 2017 Jim SmithJim Smith. All rights reserved.
- //
- #include "TrainAndTest.h"
- #include <math.h>
- #include <stdlib.h>
- double calculateDistance(double *sample1, double *sample2);
- /*int main(int argc, char **argv)
- {
- extern int Xmain(int argc, char **argv);
- Xmain(argc,argv);
- }*/
- //declare this array as static but make it available to any function in this file
- //in case we want to store the training examples and use them later
- static double myModel[NUM_TRAINING_SAMPLES][NUM_FEATURES];
- //even if each item in the training set is from a different class we know how many there are
- static char myModelLabels[NUM_TRAINING_SAMPLES];
- static int labelUsed[256];
- static int trainingSetSize = 0;
- int train(double **trainingSamples, char *trainingLabels, int numSamples, int numFeatures) {
- int returnval = 1;
- int sample, feature;
- char thisLabel;
- int ithisLabel;
- //clean the model because C leaves whatever is in the memory
- for (sample = 0; sample < NUM_TRAINING_SAMPLES; sample++) {
- for (feature = 0; feature < NUM_FEATURES; feature++) {
- myModel[sample][feature] = 0.0;
- }
- }
- for (sample = 0; sample < 256; sample++) {
- labelUsed[sample] = 0;
- }
- //sanity checking
- if (numFeatures > NUM_FEATURES || numSamples > NUM_TRAINING_SAMPLES) {
- fprintf(stdout, "error: called train with data set larger than spaced allocated to store it");
- returnval = 0;
- }
- //this is a silly trivial train()_ function
- fprintf(stdout,"no ML algorithm implemented yet\n");
- //make a simple copy of the data we are being passed but don't do anything with it
- //I'm just giving you this for the sake of people less familiar with pointers etc.
- if (returnval == 1) {
- //store the labels and the feature values
- trainingSetSize = numSamples;
- int index, feature;
- for (index = 0; index < numSamples; index++) {
- myModelLabels[index] = trainingLabels[index];
- for (feature = 0; feature < 4; feature++) {
- myModel[index][feature] = trainingSamples[index][feature];
- }
- thisLabel = trainingLabels[index];
- ithisLabel = (int) thisLabel;
- labelUsed[ithisLabel]++;
- }
- fprintf(stdout, "data stored locally \n");
- }//end else
- //now you could do whatever you like with the data
- //for example, you could populate some rules etc.
- //you were given pseudocode in semester 1 to do this
- // you could also normalise the data to remove scaling effects if you want to use something like a MLP or kNN
- //just remember that anything that you want to access in your predictLabel() function
- //needs to be declared static at the top of this file - as I have done for the "myModel" and myModelLabels data .
- return returnval;
- }
- double calculateDistance(double *sample1, double *sample2) {
- double distance = 0;
- double difference = 0;
- for (int i = 0; i < NUM_FEATURES; i++) {
- difference = sample1[i] - sample2[i];
- distance = sqrt(distance + (difference * difference));
- }
- return distance;
- }
- char predictLabel(double *sample, int numFeatures) {
- char prediction = (char) NULL;
- double distances[NUM_TRAINING_SAMPLES];
- int choice, validChoice = 0;
- int nearest;
- int i;
- for (i = 0; i < NUM_TRAINING_SAMPLES; i++) {
- distances[i] = calculateDistance(sample, myModel[i]);
- }
- nearest = 0;
- for (i = 0; i < NUM_TRAINING_SAMPLES - 1; i++);
- {
- if (distances[i] < distances[nearest]) {
- nearest = i;
- }
- }
- prediction = myModelLabels[nearest];
- return prediction;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement