Untitled

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <iso646.h>
#include <string.h>
#include <ctype.h>
#include <stdint.h>
#include <time.h>

// exit on failure
#define FAIL return EXIT_FAILURE
#define MAX_WORD_LENGTH_PRIME 251 // some high word length
#define LAST_ELEMENT "<END_OF_STRING>" // do not use in text
#define EMPTY_ELEMENT "<EMPTY_SYMBOL>" // do not use in text
#define MIN_PRIME_NUMBER 2

// remainders for each letter code in hash function computation
size_t remainders[MAX_WORD_LENGTH_PRIME];

// hash table entry
typedef struct
{
    char word[MAX_WORD_LENGTH_PRIME]; // 8-bit UTF-8
    size_t count;
} element; // to use declaration without struct keyword over all source

typedef struct
{
    size_t index;
    size_t count;
} found_element;

typedef struct
{
    bool result;
    bool is_error;
    size_t index;
    size_t new_size;
} insert_status;

typedef struct
{
    bool result;
    size_t new_size;
} increase_status;

found_element found_data = { 0, 0 };

void hashtable_content_print(element * hashtable, size_t size)
{
    if (hashtable != NULL)
    {
        element element_i;

        printf("HASHTABLE PRIME SIZE = %zu\n", size);

        for(size_t i = 0; i< size; ++i)
        {
            element_i = hashtable[i];

            //if (strcmp(element_i.word, EMPTY_ELEMENT) != 0 and strcmp(element_i.word, LAST_ELEMENT) != 0)
                printf("%s count is %zu\n", element_i.word, element_i.count);
        }
    }
    else
        puts("NULL table pointer passed to print.");
}


void recalculate_remainders()
{
    for (size_t i = 0; i < MAX_WORD_LENGTH_PRIME; ++i)
        remainders[i] = rand() % MAX_WORD_LENGTH_PRIME;
}

size_t get_next_prime_number(size_t current_prime_number) // required to get new hashtable size on reconstruction (with rounds == 1 we will got minimal memory loss)
{
    size_t saved = current_prime_number;
    bool is_prime = true;

    if (current_prime_number >= MIN_PRIME_NUMBER)
        for(current_prime_number = current_prime_number + 1; current_prime_number < SIZE_MAX; ++current_prime_number)
            {
                for(size_t j = 2; j < current_prime_number; ++j)
                    if (current_prime_number % j == 0)
                    {
                        is_prime = false;

                        break;
                    }
                    else
                        is_prime = true;

                if (is_prime)
                    break;
            }

    return (current_prime_number == saved) ? 0 : current_prime_number;
}

// argc, argv checking
bool check_params(int params_count, char *params[])
{
    return (params_count == 2) and (params != NULL) and (params[1] != NULL);
}

size_t hash(char value[], size_t offset, size_t hashtable_prime_size) // double hashing probe
{
    size_t result = 0;

    if (value != NULL and strlen(value) > 0)
        for (size_t i = 0; value[i] != '\0'; ++i)
            result += remainders[i] * value[i];
    else
        puts("NULL value pointer or empty value passed to hash function.");
    result = (result % hashtable_prime_size + offset * (1 + result % (hashtable_prime_size - 1))) % hashtable_prime_size;

    printf("word = %s; offset = %zu; hash = %zu\n", value, offset, result);

    return result;
}

bool hashtable_element_search(element * hashtable, char key[], size_t hashtable_prime_size)
{
    if ((hashtable != NULL) and (key != NULL))
    {
        puts("SEARCHING...");

        element found;
        size_t offset = 0, index;

        do
        {
            if (strcmp((found = hashtable[index = hash(key, offset++, hashtable_prime_size)]).word, key) == 0)
            {
                found_data.index = index;
                found_data.count = found.count;

                printf("FOUND ELEMENT WORD = \"%s\" WITH INDEX = %zu AND COUNT = %zu\n", found.word, found_data.index, found_data.count);

                return true;
            }
            else
                printf("PROBE OF ELEMENT WORD = \"%s\" WITH INDEX = %zu AND KEY = \"%s\"\n", found.word, index, key);
        }
        while(offset != hashtable_prime_size);
    }
    else
        puts("NULL table pointer or NULL key pointer passed to search.");

    return false;
}

increase_status hashtable_size_increase(element * hashtable, size_t rounds, size_t hashtable_prime_size) // rounds is a number of prime hashtable size rises
{
    increase_status result = { false, 0 };

    if (hashtable != NULL and rounds > 0)
        {
            printf("INCREASING HASHTABLE USING %zu ROUNDS...\n", rounds);

            size_t new_size, offset = 0, index = 0, saved_size = 0, old_size = 0;

            while(rounds >= 1)
            {
                saved_size = new_size = get_next_prime_number(hashtable_prime_size);
                --rounds;
            }

            puts("===HASHTABLE CONTENT BEFORE RESIZEMENT===");
            hashtable_content_print(hashtable, hashtable_prime_size);
            puts("===END===");
            element new_hashtable[new_size];
            memset(&new_hashtable, 0, new_size * sizeof(element));
            old_size = hashtable_prime_size;
            result.new_size = new_size;
            printf("NEW PRIME SIZE = %zu\n", new_size);
            new_hashtable[--new_size].count = 0;
            new_hashtable[new_size].word[0] = '\0';
            strcpy(new_hashtable[new_size].word, LAST_ELEMENT);

            // cleaning array
            while(--new_size > 0)
            {
                new_hashtable[new_size].count = 0;
                new_hashtable[new_size].word[0] = '\0';
                strcpy(new_hashtable[new_size].word, EMPTY_ELEMENT);
            }

            new_hashtable[new_size].count = 0;
            new_hashtable[new_size].word[0] = '\0';
            strcpy(new_hashtable[new_size].word, EMPTY_ELEMENT);
            element found;

            puts("FINDING EMPTY ELEMENT...");

            // empty element in new hashtable already exists
            while(strcmp((found = hashtable[new_size++]).word, LAST_ELEMENT) != 0)
                {
                    printf("FINDING PLACE FOR WORD = \"%s\" WITH COUNT = %zu\n", found.word, found.count);

                    if (strcmp(found.word, EMPTY_ELEMENT) == 0) // pass
                    {
                        puts("PASSED BECAUSE OF EMPTY");

                        continue;
                    }
                    else
                    {
                        size_t stop_defense = 2 * hashtable_prime_size;

                        while(strcmp(new_hashtable[index = hash(found.word, offset++, hashtable_prime_size)].word, EMPTY_ELEMENT) != 0 and offset < stop_defense);

                        printf("TEMP TABLE WORD = \"%s\" AT INDEX = %zu\n", new_hashtable[index].word, index);

                        new_hashtable[index].count = found.count;
                        new_hashtable[index].word[0] = '\0';
                        strcpy(new_hashtable[index].word, found.word);

                        printf("POSTED WORD = \"%s\" WITH INDEX = %zu AND COUNT = %zu INTO TEMP TABLE\n", found.word, index, found.count);
                    }
                }
            puts("===OLD HASHTABLE CONTENT===");
            hashtable_content_print(hashtable, old_size);
            puts("===END===");
            void * temp = realloc(hashtable, saved_size * sizeof(element));

            if(temp)
            {

                //memset(&temp, 0, sizeof(temp)); WHY ISN'T WORKING?
                puts("===TEMPORARY HASHTABLE CONTENT===");
                hashtable_content_print(hashtable, saved_size);
                puts("===END===");

                if (memcpy(temp, new_hashtable, saved_size * sizeof(element)))
                {
                    hashtable = temp;

                    puts("COPIED INTO NEW MEMORY LOCATION");
                    puts("===NEW HASHTABLE CONTENT===");
                    hashtable_content_print(hashtable, saved_size);
                    puts("===END===");

                    result.result = true;

                    return result; // success
                }
            }
            else
            {
                puts("Memory reallocation fails. Terminated.");

                free(hashtable);
                exit(1);
            }
        }
        else
            puts("NULL pointer to hashtable or incorrect rounds count passed to increase function.");

    return result;
}

insert_status hashtable_element_insert(element * hashtable, element new_element, size_t hashtable_prime_size)
{
    insert_status result = { false, true, 0, hashtable_prime_size };

    if (new_element.word != NULL and strcmp(new_element.word, LAST_ELEMENT) != 0 and strcmp(new_element.word, EMPTY_ELEMENT) != 0) //validation
    {
        puts("INSERTING...");

        result.is_error = false;

        if (hashtable_element_search(hashtable, new_element.word, hashtable_prime_size))
        {
            result.index = found_data.index;

            printf("STEP 1. FOUND AT INDEX = %zu. NOT INSERTED.\n", result.index);

            return result; // element already exists
        }

        if (!hashtable_element_search(hashtable, EMPTY_ELEMENT, hashtable_prime_size)) // key not found
        {
            puts("STEP 2. EMPTY ELEMENT NOT FOUND.");

            increase_status output = hashtable_size_increase(hashtable, 1, hashtable_prime_size); // reconstruct hashtable to add empty elements

            if (output.result)
                result.new_size = output.new_size;
            else
            {
                puts("Resizement of hashtable failed. Terminated.");

                free(hashtable);
                exit(1);
            }

            hashtable_element_search(hashtable, EMPTY_ELEMENT, hashtable_prime_size);
        }

        hashtable[found_data.index].count = new_element.count; // rewrite empty element
        hashtable[found_data.index].word[0] = '\0';
        strcpy(hashtable[found_data.index].word, new_element.word);
        result.result = true;

        printf("NEW ELEMENT WORD = \"%s\" FOUND WITH INDEX = %zu\n", new_element.word, found_data.index);

        return result;
    }
    else
        puts("Incorrect new element word.");

    return result;
}

void hashtable_elements_fill(FILE * textfile, element * hashtable, size_t hashtable_prime_size)
{
    char word[MAX_WORD_LENGTH_PRIME];
    word[0] = '\0';
    bool is_word = false;
    char input;

    while(!feof(textfile) and !ferror(textfile))
    {
        input = getc(textfile);

        puts("FILLING...");

        if (!isspace(input)) // append to a whole UTF-8 word
        {
            is_word = true;
            strncat(word, &input, 1);
        }
        else if (is_word)
        {
            element new_element = { "", 1 };
            new_element.word[0] = '\0';
            strcpy(new_element.word, word);

            puts("===WORD===");
            puts(word);
            puts("===END===");

            insert_status output = hashtable_element_insert(hashtable, new_element, hashtable_prime_size);

            printf("OUTPUT INDEX = %zu\n", output.index);

            if(output.is_error)
            {
                puts("OUTPUT.IS_ERROR = TRUE");

                continue; // invalid word
            }
            else if (!output.result)
            {
                puts("OUTPUT.RESULT = FALSE. INCREASING COUNT...");
                printf("COUNT BEFORE = %zu\n", hashtable[output.index].count);

                ++hashtable[output.index].count; // increasing found word counter

                printf("COUNT AFTER = %zu\n", hashtable[output.index].count);
            }

            word[0] = '\0';
            is_word = false;
        }
    }

    if (ferror(textfile))
        perror("File reading error occured.");
}

int main(int params_count, char *params[])
{
    srand(time(NULL)); // setup randomizer

    if (!check_params(params_count, params))
    {
        puts("Params check failed. Program terminated.");

        FAIL;
    }

    char *filename;

    filename = params[1];
    FILE *textfile = fopen(filename, "r");

    if(textfile == NULL)
    {
        puts("Cannot read input file. Program terminated.");

        FAIL;
    }
    printf("FILENAME = %s", filename);
    recalculate_remainders();
    printf("HASHTABLE SIZE = %zu", MIN_PRIME_NUMBER * sizeof(element));
    element * hashtable = malloc(MIN_PRIME_NUMBER * sizeof(element));

    if (hashtable)
    {
        size_t counter = MIN_PRIME_NUMBER,
                hashtable_prime_size = MIN_PRIME_NUMBER; // starts from minimal prime size because of empty textfile existence probability (we do not got memory loss in this case);
        hashtable[--counter].count = 0;
        hashtable[counter].word[0] = '\0';
        strcpy(hashtable[counter].word, LAST_ELEMENT);

        while(counter > 0)
        {
            hashtable[--counter].count = 0;
            hashtable[counter].word[0] = '\0';
            strcpy(hashtable[counter].word, EMPTY_ELEMENT);
        }
        // there are may be no words in a file at all (end element with NULL string always stored for barrier search)

        puts("===HASHTABLE CONTENT===");
        hashtable_content_print(hashtable, hashtable_prime_size);
        puts("===END===");

        hashtable_elements_fill(textfile, hashtable, hashtable_prime_size);

        puts("===HASHTABLE CONTENT===");
        hashtable_content_print(hashtable, hashtable_prime_size);
        puts("===END===");

        free(hashtable);
    }
    else
        perror("Cannot assign memory location.");

    fclose(textfile);

    return EXIT_SUCCESS;
}