Advertisement
Guest User

word_count.c

a guest
May 27th, 2018
93
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 6.99 KB | None | 0 0
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <pthread.h>
  4. #include <string.h>
  5. #include <ctype.h>
  6. #include <sys/time.h>
  7.  
  8. #define UNIQUE_WORDS 50000
  9.  
  10. typedef enum
  11. {
  12.     false,
  13.     true
  14. } bool;
  15.  
  16. clock_t timers[32] = {0};
  17. double restart_timer(int index)
  18. {
  19.     double lap = ((double)(clock() - timers[index])) / CLOCKS_PER_SEC;
  20.     timers[index] = clock();
  21.     return lap;
  22. }
  23.  
  24. typedef struct word_t
  25. {
  26.     pthread_mutex_t lock;
  27.     char *word;
  28.     unsigned count;
  29. } word_t;
  30.  
  31. typedef struct word_counter_t
  32. {
  33.     pthread_mutex_t lock;
  34.     word_t words[UNIQUE_WORDS];
  35.     unsigned num_words;
  36. } word_counter_t;
  37.  
  38. void new_word(word_counter_t *word_counter, char *word)
  39. {
  40.     if (word_counter->num_words >= UNIQUE_WORDS)
  41.     {
  42.         printf("MORE THAN %u UNIQUE WORDS! ABORT!\n", UNIQUE_WORDS);
  43.         exit(10);
  44.     }
  45.  
  46.     char *new_word = malloc(sizeof(char) * strlen(word));
  47.     strcpy(new_word, word);
  48.  
  49.     word_counter->words[word_counter->num_words].word = new_word;
  50.     word_counter->words[word_counter->num_words].count = 1;
  51.     pthread_mutex_init(
  52.         &word_counter->words[word_counter->num_words].lock,
  53.         NULL);
  54.     word_counter->num_words++;
  55. }
  56.  
  57. void add_word(word_counter_t *word_counter, char *word)
  58. {
  59.     int ret = pthread_mutex_lock(&word_counter->lock);
  60.     if (ret != 0)
  61.     {
  62.         printf("LOCK FAILED, %d", ret);
  63.         exit(3);
  64.     }
  65.  
  66.     bool found_word = false;
  67.     for (unsigned i = 0; i < word_counter->num_words && found_word == false; i++)
  68.     {
  69.         if (strcmp(word, word_counter->words[i].word) == 0)
  70.         {
  71.             found_word = true;
  72.             pthread_mutex_lock(&word_counter->words[i].lock);
  73.             word_counter->words[i].count++;
  74.             pthread_mutex_unlock(&word_counter->words[i].lock);
  75.         }
  76.     }
  77.  
  78.     if (found_word == false)
  79.     {
  80.         new_word(word_counter, word);
  81.     }
  82.  
  83.     pthread_mutex_unlock(&word_counter->lock);
  84. }
  85.  
  86. void init(word_counter_t *word_counter)
  87. {
  88.     pthread_mutex_init(&word_counter->lock, NULL);
  89.     word_counter->num_words = 0;
  90. }
  91.  
  92. void destroy(word_counter_t *word_counter)
  93. {
  94.     pthread_mutex_destroy(&word_counter->lock);
  95.  
  96.     for (unsigned i = 0; i < word_counter->num_words; i++)
  97.     {
  98.         free(word_counter->words[i].word);
  99.         pthread_mutex_destroy(&word_counter->words[i].lock);
  100.     }
  101. }
  102.  
  103. void find_words(word_counter_t *word_counter, char *string)
  104. {
  105.     char *word = strtok(string, " ");
  106.  
  107.     while (word != NULL)
  108.     {
  109.         add_word(word_counter, word);
  110.         word = strtok(NULL, " ");
  111.     }
  112. }
  113.  
  114. typedef struct args_t
  115. {
  116.     char file_path[512];
  117.     unsigned num_threads;
  118. } args_t;
  119.  
  120. args_t parse_args(int argc, char **argv)
  121. {
  122.     if (argc < 2)
  123.     {
  124.         printf("Usage: word_count FILE_PATH NUM_THREADS\n");
  125.         exit(2);
  126.     }
  127.  
  128.     args_t args = {0};
  129.  
  130.     strcpy(args.file_path, argv[1]);
  131.     args.num_threads = atoi(argv[2]);
  132.  
  133.     return args;
  134. }
  135.  
  136. void file_to_str(char *src_file_path, char **dest_str, size_t *dest_size)
  137. {
  138.     FILE *file_ptr = fopen(src_file_path, "r");
  139.     fseek(file_ptr, 0, SEEK_END);
  140.     size_t file_size = ftell(file_ptr);
  141.     fseek(file_ptr, 0, SEEK_SET);
  142.  
  143.     char *file = malloc(sizeof(char) * file_size);
  144.     fread(file, sizeof(char), file_size, file_ptr);
  145.     fclose(file_ptr);
  146.  
  147.     *dest_str = file;
  148.     *dest_size = file_size;
  149. }
  150.  
  151. void swap(word_t *a, word_t *b)
  152. {
  153.     if (a != b)
  154.     {
  155.         word_t t = *a;
  156.         *a = *b;
  157.         *b = t;
  158.     }
  159. }
  160.  
  161. int partition(word_t *words, int start, int end)
  162. {
  163.     int pivot = start;
  164.     int i = start + 1;
  165.  
  166.     while (i <= end)
  167.     {
  168.         if (words[i].count < words[pivot].count)
  169.         {
  170.             swap(&words[i], &words[pivot + 1]);
  171.             swap(&words[pivot], &words[pivot + 1]);
  172.             pivot++;
  173.         }
  174.         else
  175.         {
  176.             i++;
  177.         }
  178.     }
  179.  
  180.     return pivot;
  181. }
  182.  
  183. void quick_sort(word_t *words, int start, int end)
  184. {
  185.     if (start < end)
  186.     {
  187.         int p = partition(words, start, end);
  188.         quick_sort(words, start, p - 1);
  189.         quick_sort(words, p + 1, end);
  190.     }
  191. }
  192.  
  193. void sort(word_counter_t *word_counter)
  194. {
  195.     quick_sort(word_counter->words, 0, word_counter->num_words - 1);
  196. }
  197.  
  198. void print_stats(word_counter_t *word_counter)
  199. {
  200.     printf("Word Count:\n");
  201.     unsigned cols = 4;
  202.  
  203.     for (size_t i = 0; i < word_counter->num_words; i++)
  204.     {
  205.  
  206.         printf("%-16s: %-8u",
  207.                word_counter->words[i].word, word_counter->words[i].count);
  208.  
  209.         if (i % cols == cols - 1)
  210.             printf("\n");
  211.     }
  212.  
  213.     printf("\n%d Unique Words\n", word_counter->num_words);
  214. }
  215.  
  216. void split_str(char *str, size_t num_parts, char **dest_parts)
  217. {
  218.     size_t len = strlen(str);
  219.     size_t chunk_size = len / num_parts;
  220.  
  221.     for (size_t i = 0; i < len; i++)
  222.     {
  223.         if (isalnum(str[i]) == 0 && str[i] != '\'')
  224.         {
  225.             str[i] = ' ';
  226.         }
  227.         else
  228.         {
  229.             str[i] = tolower(str[i]);
  230.         }
  231.     }
  232.  
  233.     for (size_t i = 0; i < num_parts; i++)
  234.     {
  235.         dest_parts[i] = &str[chunk_size * i];
  236.         dest_parts[i][chunk_size - 1] = '\0';
  237.     }
  238. }
  239.  
  240. typedef struct worker_t
  241. {
  242.     size_t id;
  243.     pthread_t thread;
  244.     char *slize;
  245.     word_counter_t *word_counter;
  246.     double work_time;
  247. } worker_t;
  248.  
  249. void *worker_func(worker_t *self)
  250. {
  251.     restart_timer(self->id);
  252.     find_words(self->word_counter, self->slize);
  253.     self->work_time = restart_timer(self->id);
  254. }
  255.  
  256. int main(int argc, char **argv)
  257. {
  258.     args_t args = parse_args(argc, argv);
  259.  
  260.     // prepare file content
  261.     char *file_str;
  262.     size_t file_size;
  263.     char **parts = malloc(sizeof(char *) * args.num_threads);
  264.     file_to_str(args.file_path, &file_str, &file_size);
  265.     split_str(file_str, args.num_threads, parts);
  266.  
  267.     // initialize word counter
  268.     word_counter_t word_counter;
  269.     init(&word_counter);
  270.  
  271.     // create threads
  272.     worker_t *workers = malloc(sizeof(worker_t) * args.num_threads);
  273.     for (size_t i = 0; i < args.num_threads; i++)
  274.     {
  275.         workers[i].id = i + 1;
  276.         workers[i].slize = parts[i];
  277.         workers[i].word_counter = &word_counter;
  278.         pthread_create(&workers[i].thread, NULL, (void *(*)(void *))worker_func, &workers[i]);
  279.     }
  280.  
  281.     // join workers
  282.     for (size_t i = 0; i < args.num_threads; i++)
  283.     {
  284.         pthread_join(workers[i].thread, NULL);
  285.     }
  286.  
  287.     // sort word counter
  288.     restart_timer(0);
  289.     sort(&word_counter);
  290.     double sort_time = restart_timer(0);
  291.  
  292.     // display results
  293.     double work_time_sum = 0;
  294.     print_stats(&word_counter);
  295.     for (size_t i = 0; i < args.num_threads; i++)
  296.     {
  297.         printf("Thread #%d finished in %.2f seconds\n",
  298.                workers[i].id,
  299.                workers[i].work_time);
  300.         work_time_sum += workers[i].work_time;
  301.     }
  302.     printf("For a total of %.2f seconds.\nSorting took %.2f seconds.\n",
  303.            work_time_sum, sort_time);
  304.  
  305.     // cleanup
  306.     destroy(&word_counter);
  307.     free(file_str);
  308. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement