Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <stdlib.h>
- #include <stdbool.h>
- #include <iso646.h>
- #include <string.h>
- #include <ctype.h>
- #include <stdint.h>
- #include <time.h>
- // exit on failure
- #define FAIL return EXIT_FAILURE
- #define MAX_WORD_LENGTH_PRIME 251 // some high word length
- #define LAST_ELEMENT "<END_OF_STRING>" // do not use in text
- #define EMPTY_ELEMENT "<EMPTY_SYMBOL>" // do not use in text
- #define MIN_PRIME_NUMBER 2
- // remainders for each letter code in hash function computation
- size_t remainders[MAX_WORD_LENGTH_PRIME];
- // hash table entry
- typedef struct
- {
- char word[MAX_WORD_LENGTH_PRIME]; // 8-bit UTF-8
- size_t count;
- } element; // to use declaration without struct keyword over all source
- typedef struct
- {
- size_t index;
- size_t count;
- } found_element;
- typedef struct
- {
- bool result;
- bool is_error;
- size_t index;
- size_t new_size;
- } insert_status;
- typedef struct
- {
- bool result;
- size_t new_size;
- } increase_status;
- found_element found_data = { 0, 0 };
- void hashtable_content_print(element * hashtable, size_t size)
- {
- if (hashtable != NULL)
- {
- element element_i;
- printf("HASHTABLE PRIME SIZE = %zu\n", size);
- for(size_t i = 0; i< size; ++i)
- {
- element_i = hashtable[i];
- //if (strcmp(element_i.word, EMPTY_ELEMENT) != 0 and strcmp(element_i.word, LAST_ELEMENT) != 0)
- printf("%s count is %zu\n", element_i.word, element_i.count);
- }
- }
- else
- puts("NULL table pointer passed to print.");
- }
- void recalculate_remainders()
- {
- for (size_t i = 0; i < MAX_WORD_LENGTH_PRIME; ++i)
- remainders[i] = rand() % MAX_WORD_LENGTH_PRIME;
- }
- size_t get_next_prime_number(size_t current_prime_number) // required to get new hashtable size on reconstruction (with rounds == 1 we will got minimal memory loss)
- {
- size_t saved = current_prime_number;
- bool is_prime = true;
- if (current_prime_number >= MIN_PRIME_NUMBER)
- for(current_prime_number = current_prime_number + 1; current_prime_number < SIZE_MAX; ++current_prime_number)
- {
- for(size_t j = 2; j < current_prime_number; ++j)
- if (current_prime_number % j == 0)
- {
- is_prime = false;
- break;
- }
- else
- is_prime = true;
- if (is_prime)
- break;
- }
- return (current_prime_number == saved) ? 0 : current_prime_number;
- }
- // argc, argv checking
- bool check_params(int params_count, char *params[])
- {
- return (params_count == 2) and (params != NULL) and (params[1] != NULL);
- }
- size_t hash(char value[], size_t offset, size_t hashtable_prime_size) // double hashing probe
- {
- size_t result = 0;
- if (value != NULL and strlen(value) > 0)
- for (size_t i = 0; value[i] != '\0'; ++i)
- result += remainders[i] * value[i];
- else
- puts("NULL value pointer or empty value passed to hash function.");
- result = (result % hashtable_prime_size + offset * (1 + result % (hashtable_prime_size - 1))) % hashtable_prime_size;
- printf("word = %s; offset = %zu; hash = %zu\n", value, offset, result);
- return result;
- }
- bool hashtable_element_search(element * hashtable, char key[], size_t hashtable_prime_size)
- {
- if ((hashtable != NULL) and (key != NULL))
- {
- puts("SEARCHING...");
- element found;
- size_t offset = 0, index;
- do
- {
- if (strcmp((found = hashtable[index = hash(key, offset++, hashtable_prime_size)]).word, key) == 0)
- {
- found_data.index = index;
- found_data.count = found.count;
- printf("FOUND ELEMENT WORD = \"%s\" WITH INDEX = %zu AND COUNT = %zu\n", found.word, found_data.index, found_data.count);
- return true;
- }
- else
- printf("PROBE OF ELEMENT WORD = \"%s\" WITH INDEX = %zu AND KEY = \"%s\"\n", found.word, index, key);
- }
- while(offset != hashtable_prime_size);
- }
- else
- puts("NULL table pointer or NULL key pointer passed to search.");
- return false;
- }
- increase_status hashtable_size_increase(element * hashtable, size_t rounds, size_t hashtable_prime_size) // rounds is a number of prime hashtable size rises
- {
- increase_status result = { false, 0 };
- if (hashtable != NULL and rounds > 0)
- {
- printf("INCREASING HASHTABLE USING %zu ROUNDS...\n", rounds);
- size_t new_size, offset = 0, index = 0, saved_size = 0, old_size = 0;
- while(rounds >= 1)
- {
- saved_size = new_size = get_next_prime_number(hashtable_prime_size);
- --rounds;
- }
- puts("===HASHTABLE CONTENT BEFORE RESIZEMENT===");
- hashtable_content_print(hashtable, hashtable_prime_size);
- puts("===END===");
- element new_hashtable[new_size];
- memset(&new_hashtable, 0, new_size * sizeof(element));
- old_size = hashtable_prime_size;
- result.new_size = new_size;
- printf("NEW PRIME SIZE = %zu\n", new_size);
- new_hashtable[--new_size].count = 0;
- new_hashtable[new_size].word[0] = '\0';
- strcpy(new_hashtable[new_size].word, LAST_ELEMENT);
- // cleaning array
- while(--new_size > 0)
- {
- new_hashtable[new_size].count = 0;
- new_hashtable[new_size].word[0] = '\0';
- strcpy(new_hashtable[new_size].word, EMPTY_ELEMENT);
- }
- new_hashtable[new_size].count = 0;
- new_hashtable[new_size].word[0] = '\0';
- strcpy(new_hashtable[new_size].word, EMPTY_ELEMENT);
- element found;
- puts("FINDING EMPTY ELEMENT...");
- // empty element in new hashtable already exists
- while(strcmp((found = hashtable[new_size++]).word, LAST_ELEMENT) != 0)
- {
- printf("FINDING PLACE FOR WORD = \"%s\" WITH COUNT = %zu\n", found.word, found.count);
- if (strcmp(found.word, EMPTY_ELEMENT) == 0) // pass
- {
- puts("PASSED BECAUSE OF EMPTY");
- continue;
- }
- else
- {
- size_t stop_defense = 2 * hashtable_prime_size;
- while(strcmp(new_hashtable[index = hash(found.word, offset++, hashtable_prime_size)].word, EMPTY_ELEMENT) != 0 and offset < stop_defense);
- printf("TEMP TABLE WORD = \"%s\" AT INDEX = %zu\n", new_hashtable[index].word, index);
- new_hashtable[index].count = found.count;
- new_hashtable[index].word[0] = '\0';
- strcpy(new_hashtable[index].word, found.word);
- printf("POSTED WORD = \"%s\" WITH INDEX = %zu AND COUNT = %zu INTO TEMP TABLE\n", found.word, index, found.count);
- }
- }
- puts("===OLD HASHTABLE CONTENT===");
- hashtable_content_print(hashtable, old_size);
- puts("===END===");
- void * temp = realloc(hashtable, saved_size * sizeof(element));
- if(temp)
- {
- //memset(&temp, 0, sizeof(temp)); WHY ISN'T WORKING?
- puts("===TEMPORARY HASHTABLE CONTENT===");
- hashtable_content_print(hashtable, saved_size);
- puts("===END===");
- if (memcpy(temp, new_hashtable, saved_size * sizeof(element)))
- {
- hashtable = temp;
- puts("COPIED INTO NEW MEMORY LOCATION");
- puts("===NEW HASHTABLE CONTENT===");
- hashtable_content_print(hashtable, saved_size);
- puts("===END===");
- result.result = true;
- return result; // success
- }
- }
- else
- {
- puts("Memory reallocation fails. Terminated.");
- free(hashtable);
- exit(1);
- }
- }
- else
- puts("NULL pointer to hashtable or incorrect rounds count passed to increase function.");
- return result;
- }
- insert_status hashtable_element_insert(element * hashtable, element new_element, size_t hashtable_prime_size)
- {
- insert_status result = { false, true, 0, hashtable_prime_size };
- if (new_element.word != NULL and strcmp(new_element.word, LAST_ELEMENT) != 0 and strcmp(new_element.word, EMPTY_ELEMENT) != 0) //validation
- {
- puts("INSERTING...");
- result.is_error = false;
- if (hashtable_element_search(hashtable, new_element.word, hashtable_prime_size))
- {
- result.index = found_data.index;
- printf("STEP 1. FOUND AT INDEX = %zu. NOT INSERTED.\n", result.index);
- return result; // element already exists
- }
- if (!hashtable_element_search(hashtable, EMPTY_ELEMENT, hashtable_prime_size)) // key not found
- {
- puts("STEP 2. EMPTY ELEMENT NOT FOUND.");
- increase_status output = hashtable_size_increase(hashtable, 1, hashtable_prime_size); // reconstruct hashtable to add empty elements
- if (output.result)
- result.new_size = output.new_size;
- else
- {
- puts("Resizement of hashtable failed. Terminated.");
- free(hashtable);
- exit(1);
- }
- hashtable_element_search(hashtable, EMPTY_ELEMENT, hashtable_prime_size);
- }
- hashtable[found_data.index].count = new_element.count; // rewrite empty element
- hashtable[found_data.index].word[0] = '\0';
- strcpy(hashtable[found_data.index].word, new_element.word);
- result.result = true;
- printf("NEW ELEMENT WORD = \"%s\" FOUND WITH INDEX = %zu\n", new_element.word, found_data.index);
- return result;
- }
- else
- puts("Incorrect new element word.");
- return result;
- }
- void hashtable_elements_fill(FILE * textfile, element * hashtable, size_t hashtable_prime_size)
- {
- char word[MAX_WORD_LENGTH_PRIME];
- word[0] = '\0';
- bool is_word = false;
- char input;
- while(!feof(textfile) and !ferror(textfile))
- {
- input = getc(textfile);
- puts("FILLING...");
- if (!isspace(input)) // append to a whole UTF-8 word
- {
- is_word = true;
- strncat(word, &input, 1);
- }
- else if (is_word)
- {
- element new_element = { "", 1 };
- new_element.word[0] = '\0';
- strcpy(new_element.word, word);
- puts("===WORD===");
- puts(word);
- puts("===END===");
- insert_status output = hashtable_element_insert(hashtable, new_element, hashtable_prime_size);
- printf("OUTPUT INDEX = %zu\n", output.index);
- if(output.is_error)
- {
- puts("OUTPUT.IS_ERROR = TRUE");
- continue; // invalid word
- }
- else if (!output.result)
- {
- puts("OUTPUT.RESULT = FALSE. INCREASING COUNT...");
- printf("COUNT BEFORE = %zu\n", hashtable[output.index].count);
- ++hashtable[output.index].count; // increasing found word counter
- printf("COUNT AFTER = %zu\n", hashtable[output.index].count);
- }
- word[0] = '\0';
- is_word = false;
- }
- }
- if (ferror(textfile))
- perror("File reading error occured.");
- }
- int main(int params_count, char *params[])
- {
- srand(time(NULL)); // setup randomizer
- if (!check_params(params_count, params))
- {
- puts("Params check failed. Program terminated.");
- FAIL;
- }
- char *filename;
- filename = params[1];
- FILE *textfile = fopen(filename, "r");
- if(textfile == NULL)
- {
- puts("Cannot read input file. Program terminated.");
- FAIL;
- }
- printf("FILENAME = %s", filename);
- recalculate_remainders();
- printf("HASHTABLE SIZE = %zu", MIN_PRIME_NUMBER * sizeof(element));
- element * hashtable = malloc(MIN_PRIME_NUMBER * sizeof(element));
- if (hashtable)
- {
- size_t counter = MIN_PRIME_NUMBER,
- hashtable_prime_size = MIN_PRIME_NUMBER; // starts from minimal prime size because of empty textfile existence probability (we do not got memory loss in this case);
- hashtable[--counter].count = 0;
- hashtable[counter].word[0] = '\0';
- strcpy(hashtable[counter].word, LAST_ELEMENT);
- while(counter > 0)
- {
- hashtable[--counter].count = 0;
- hashtable[counter].word[0] = '\0';
- strcpy(hashtable[counter].word, EMPTY_ELEMENT);
- }
- // there are may be no words in a file at all (end element with NULL string always stored for barrier search)
- puts("===HASHTABLE CONTENT===");
- hashtable_content_print(hashtable, hashtable_prime_size);
- puts("===END===");
- hashtable_elements_fill(textfile, hashtable, hashtable_prime_size);
- puts("===HASHTABLE CONTENT===");
- hashtable_content_print(hashtable, hashtable_prime_size);
- puts("===END===");
- free(hashtable);
- }
- else
- perror("Cannot assign memory location.");
- fclose(textfile);
- return EXIT_SUCCESS;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement