Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <stdlib.h>
- #include <pthread.h>
- #include <string.h>
- #include <ctype.h>
- #include <sys/time.h>
- #define UNIQUE_WORDS 50000
- typedef enum
- {
- false,
- true
- } bool;
- clock_t timers[32] = {0};
- double restart_timer(int index)
- {
- double lap = ((double)(clock() - timers[index])) / CLOCKS_PER_SEC;
- timers[index] = clock();
- return lap;
- }
- typedef struct word_t
- {
- pthread_mutex_t lock;
- char *word;
- unsigned count;
- } word_t;
- typedef struct word_counter_t
- {
- pthread_mutex_t lock;
- word_t words[UNIQUE_WORDS];
- unsigned num_words;
- } word_counter_t;
- void new_word(word_counter_t *word_counter, char *word)
- {
- if (word_counter->num_words >= UNIQUE_WORDS)
- {
- printf("MORE THAN %u UNIQUE WORDS! ABORT!\n", UNIQUE_WORDS);
- exit(10);
- }
- char *new_word = malloc(sizeof(char) * strlen(word));
- strcpy(new_word, word);
- word_counter->words[word_counter->num_words].word = new_word;
- word_counter->words[word_counter->num_words].count = 1;
- pthread_mutex_init(
- &word_counter->words[word_counter->num_words].lock,
- NULL);
- word_counter->num_words++;
- }
- void add_word(word_counter_t *word_counter, char *word)
- {
- int ret = pthread_mutex_lock(&word_counter->lock);
- if (ret != 0)
- {
- printf("LOCK FAILED, %d", ret);
- exit(3);
- }
- bool found_word = false;
- for (unsigned i = 0; i < word_counter->num_words && found_word == false; i++)
- {
- if (strcmp(word, word_counter->words[i].word) == 0)
- {
- found_word = true;
- pthread_mutex_lock(&word_counter->words[i].lock);
- word_counter->words[i].count++;
- pthread_mutex_unlock(&word_counter->words[i].lock);
- }
- }
- if (found_word == false)
- {
- new_word(word_counter, word);
- }
- pthread_mutex_unlock(&word_counter->lock);
- }
- void init(word_counter_t *word_counter)
- {
- pthread_mutex_init(&word_counter->lock, NULL);
- word_counter->num_words = 0;
- }
- void destroy(word_counter_t *word_counter)
- {
- pthread_mutex_destroy(&word_counter->lock);
- for (unsigned i = 0; i < word_counter->num_words; i++)
- {
- free(word_counter->words[i].word);
- pthread_mutex_destroy(&word_counter->words[i].lock);
- }
- }
- void find_words(word_counter_t *word_counter, char *string)
- {
- char *word = strtok(string, " ");
- while (word != NULL)
- {
- add_word(word_counter, word);
- word = strtok(NULL, " ");
- }
- }
- typedef struct args_t
- {
- char file_path[512];
- unsigned num_threads;
- } args_t;
- args_t parse_args(int argc, char **argv)
- {
- if (argc < 2)
- {
- printf("Usage: word_count FILE_PATH NUM_THREADS\n");
- exit(2);
- }
- args_t args = {0};
- strcpy(args.file_path, argv[1]);
- args.num_threads = atoi(argv[2]);
- return args;
- }
- void file_to_str(char *src_file_path, char **dest_str, size_t *dest_size)
- {
- FILE *file_ptr = fopen(src_file_path, "r");
- fseek(file_ptr, 0, SEEK_END);
- size_t file_size = ftell(file_ptr);
- fseek(file_ptr, 0, SEEK_SET);
- char *file = malloc(sizeof(char) * file_size);
- fread(file, sizeof(char), file_size, file_ptr);
- fclose(file_ptr);
- *dest_str = file;
- *dest_size = file_size;
- }
- void swap(word_t *a, word_t *b)
- {
- if (a != b)
- {
- word_t t = *a;
- *a = *b;
- *b = t;
- }
- }
- int partition(word_t *words, int start, int end)
- {
- int pivot = start;
- int i = start + 1;
- while (i <= end)
- {
- if (words[i].count < words[pivot].count)
- {
- swap(&words[i], &words[pivot + 1]);
- swap(&words[pivot], &words[pivot + 1]);
- pivot++;
- }
- else
- {
- i++;
- }
- }
- return pivot;
- }
- void quick_sort(word_t *words, int start, int end)
- {
- if (start < end)
- {
- int p = partition(words, start, end);
- quick_sort(words, start, p - 1);
- quick_sort(words, p + 1, end);
- }
- }
- void sort(word_counter_t *word_counter)
- {
- quick_sort(word_counter->words, 0, word_counter->num_words - 1);
- }
- void print_stats(word_counter_t *word_counter)
- {
- printf("Word Count:\n");
- unsigned cols = 4;
- for (size_t i = 0; i < word_counter->num_words; i++)
- {
- printf("%-16s: %-8u",
- word_counter->words[i].word, word_counter->words[i].count);
- if (i % cols == cols - 1)
- printf("\n");
- }
- printf("\n%d Unique Words\n", word_counter->num_words);
- }
- void split_str(char *str, size_t num_parts, char **dest_parts)
- {
- size_t len = strlen(str);
- size_t chunk_size = len / num_parts;
- for (size_t i = 0; i < len; i++)
- {
- if (isalnum(str[i]) == 0 && str[i] != '\'')
- {
- str[i] = ' ';
- }
- else
- {
- str[i] = tolower(str[i]);
- }
- }
- for (size_t i = 0; i < num_parts; i++)
- {
- dest_parts[i] = &str[chunk_size * i];
- dest_parts[i][chunk_size - 1] = '\0';
- }
- }
- typedef struct worker_t
- {
- size_t id;
- pthread_t thread;
- char *slize;
- word_counter_t *word_counter;
- double work_time;
- } worker_t;
- void *worker_func(worker_t *self)
- {
- restart_timer(self->id);
- find_words(self->word_counter, self->slize);
- self->work_time = restart_timer(self->id);
- }
- int main(int argc, char **argv)
- {
- args_t args = parse_args(argc, argv);
- // prepare file content
- char *file_str;
- size_t file_size;
- char **parts = malloc(sizeof(char *) * args.num_threads);
- file_to_str(args.file_path, &file_str, &file_size);
- split_str(file_str, args.num_threads, parts);
- // initialize word counter
- word_counter_t word_counter;
- init(&word_counter);
- // create threads
- worker_t *workers = malloc(sizeof(worker_t) * args.num_threads);
- for (size_t i = 0; i < args.num_threads; i++)
- {
- workers[i].id = i + 1;
- workers[i].slize = parts[i];
- workers[i].word_counter = &word_counter;
- pthread_create(&workers[i].thread, NULL, (void *(*)(void *))worker_func, &workers[i]);
- }
- // join workers
- for (size_t i = 0; i < args.num_threads; i++)
- {
- pthread_join(workers[i].thread, NULL);
- }
- // sort word counter
- restart_timer(0);
- sort(&word_counter);
- double sort_time = restart_timer(0);
- // display results
- double work_time_sum = 0;
- print_stats(&word_counter);
- for (size_t i = 0; i < args.num_threads; i++)
- {
- printf("Thread #%d finished in %.2f seconds\n",
- workers[i].id,
- workers[i].work_time);
- work_time_sum += workers[i].work_time;
- }
- printf("For a total of %.2f seconds.\nSorting took %.2f seconds.\n",
- work_time_sum, sort_time);
- // cleanup
- destroy(&word_counter);
- free(file_str);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement