Advertisement
Guest User

Text Analyzer Source

a guest
Oct 20th, 2013
153
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 3.06 KB | None | 0 0
  1. /*
  2.  * words.c
  3.  *
  4.  *  Created on: October 6, 2013
  5.  *      Author: Chris Cirefice
  6.  */
  7.  
  8. #include <stdio.h>
  9. #include <string.h>
  10. #include <ctype.h>
  11. #include <stdlib.h>
  12. #include "words.h"
  13.  
  14. int getNumUniqueWords(char text[], int size);
  15.  
  16. int main(int argc, char* argv[]) {
  17.  
  18.     setvbuf(stdout, NULL, 4, _IONBF); // For Eclipse... stupid bug. --> does NOT affect the program, just the output to console!
  19.  
  20.     int nbr_words;
  21.  
  22.     char text[] = "Some - \"text, a stdin\". We'll have! also repeat? We'll also have a repeat!";
  23.     int length = sizeof(text);
  24.     nbr_words = getNumUniqueWords(text, length);
  25.  
  26.     return 0;
  27. }
  28.  
  29. void free_memory(char **list, int size) {
  30.  
  31.     for (int i = 0; i < size; i ++) {
  32.         // You can see that printing the values is fine, as long as free is not called.
  33.         // When free is called, the program will crash if (size > strlen(list[i]))
  34.         //printf("Wanna free value %d w/len of %d: %s\n", i, strlen(list[i]), list[i]);
  35.         free(list[i]);
  36.     }
  37.     free(list);
  38. }
  39.  
  40. int getNumUniqueWords(char text[], int length) {
  41.     int numTotalWords = 0;
  42.     char *word;
  43.  
  44.     printf("Length: %d characters\n", length);
  45.  
  46.     char totalWords[length];
  47.     strcpy(totalWords, text);
  48.  
  49.     word = strtok(totalWords, " ,.-!?()\"0123456789");
  50.  
  51.     while (word != NULL) {
  52.         numTotalWords ++;
  53.         printf("%s\n", word);
  54.         word = strtok(NULL, " ,.-!?()\"0123456789");
  55.     }
  56.  
  57.     printf("Looks like we counted %d total words\n\n", numTotalWords);
  58.  
  59.     char *uniqueWords[numTotalWords];
  60.     char *tempWord;
  61.     int wordAlreadyExists = 0;
  62.     int numUniqueWords = 0;
  63.  
  64.     char totalWordsCopy[length];
  65.     strcpy(totalWordsCopy, text);
  66.  
  67.     for (int i = 0; i < numTotalWords; i++) {
  68.         uniqueWords[i] = NULL;
  69.     }
  70.  
  71.     // Tokenize until all the text is consumed.
  72.     word = strtok(totalWordsCopy, " ,.-!?()\"0123456789");
  73.     while (word != NULL) {
  74.  
  75.         // Look through the word list for the current token.
  76.         for (int j = 0; j < numTotalWords; j ++) {
  77.             // Just for clarity, no real meaning.
  78.             tempWord = uniqueWords[j];
  79.  
  80.             // The word list is either empty or the current token is not in the list.
  81.             if (tempWord == NULL) {
  82.                 break;
  83.             }
  84.  
  85.             //printf("Comparing (%s) with (%s)\n", tempWord, word);
  86.  
  87.             // If the current token is the same as the current element in the word list, mark and break
  88.             if (strcmp(tempWord, word) == 0) {
  89.                 printf("\nDuplicate: (%s)\n\n", word);
  90.                 wordAlreadyExists = 1;
  91.                 break;
  92.             }
  93.         }
  94.  
  95.         // Word does not exist, add it to the array.
  96.         if (!wordAlreadyExists) {
  97.             uniqueWords[numUniqueWords] = malloc(strlen(word));
  98.             uniqueWords[numUniqueWords] = word;
  99.             numUniqueWords ++;
  100.             printf("Unique: %s\n", word);
  101.         }
  102.  
  103.         // Reset flags and continue.
  104.         wordAlreadyExists = 0;
  105.         word = strtok(NULL, " ,.-!?()\"0123456789");
  106.     }
  107.  
  108.     // Print out the array just for funsies - make sure it's working properly.
  109.     for (int x = 0; x <numUniqueWords; x++) {
  110.         printf("Unique list %d: %s\n", x, uniqueWords[x]);
  111.     }
  112.  
  113.     printf("\nNumber of unique words: %d\n\n", numUniqueWords);
  114.  
  115.     // Right below is where things start to suck.
  116.     free_memory(uniqueWords, numUniqueWords);
  117.  
  118.     return numUniqueWords;
  119. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement