This week only. Pastebin PRO Accounts Christmas Special! Don't miss out!Want more features on Pastebin? Sign Up, it's FREE!
Guest

Text Analyzer Source

By: a guest on Oct 20th, 2013  |  syntax: C  |  size: 3.06 KB  |  views: 34  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. /*
  2.  * words.c
  3.  *
  4.  *  Created on: October 6, 2013
  5.  *      Author: Chris Cirefice
  6.  */
  7.  
  8. #include <stdio.h>
  9. #include <string.h>
  10. #include <ctype.h>
  11. #include <stdlib.h>
  12. #include "words.h"
  13.  
  14. int getNumUniqueWords(char text[], int size);
  15.  
  16. int main(int argc, char* argv[]) {
  17.  
  18.         setvbuf(stdout, NULL, 4, _IONBF); // For Eclipse... stupid bug. --> does NOT affect the program, just the output to console!
  19.  
  20.         int nbr_words;
  21.  
  22.         char text[] = "Some - \"text, a stdin\". We'll have! also repeat? We'll also have a repeat!";
  23.         int length = sizeof(text);
  24.         nbr_words = getNumUniqueWords(text, length);
  25.  
  26.         return 0;
  27. }
  28.  
  29. void free_memory(char **list, int size) {
  30.  
  31.         for (int i = 0; i < size; i ++) {
  32.                 // You can see that printing the values is fine, as long as free is not called.
  33.                 // When free is called, the program will crash if (size > strlen(list[i]))
  34.                 //printf("Wanna free value %d w/len of %d: %s\n", i, strlen(list[i]), list[i]);
  35.                 free(list[i]);
  36.         }
  37.         free(list);
  38. }
  39.  
  40. int getNumUniqueWords(char text[], int length) {
  41.         int numTotalWords = 0;
  42.         char *word;
  43.  
  44.         printf("Length: %d characters\n", length);
  45.  
  46.         char totalWords[length];
  47.         strcpy(totalWords, text);
  48.  
  49.         word = strtok(totalWords, " ,.-!?()\"0123456789");
  50.  
  51.         while (word != NULL) {
  52.                 numTotalWords ++;
  53.                 printf("%s\n", word);
  54.                 word = strtok(NULL, " ,.-!?()\"0123456789");
  55.         }
  56.  
  57.         printf("Looks like we counted %d total words\n\n", numTotalWords);
  58.  
  59.         char *uniqueWords[numTotalWords];
  60.         char *tempWord;
  61.         int wordAlreadyExists = 0;
  62.         int numUniqueWords = 0;
  63.  
  64.         char totalWordsCopy[length];
  65.         strcpy(totalWordsCopy, text);
  66.  
  67.         for (int i = 0; i < numTotalWords; i++) {
  68.                 uniqueWords[i] = NULL;
  69.         }
  70.  
  71.         // Tokenize until all the text is consumed.
  72.         word = strtok(totalWordsCopy, " ,.-!?()\"0123456789");
  73.         while (word != NULL) {
  74.  
  75.                 // Look through the word list for the current token.
  76.                 for (int j = 0; j < numTotalWords; j ++) {
  77.                         // Just for clarity, no real meaning.
  78.                         tempWord = uniqueWords[j];
  79.  
  80.                         // The word list is either empty or the current token is not in the list.
  81.                         if (tempWord == NULL) {
  82.                                 break;
  83.                         }
  84.  
  85.                         //printf("Comparing (%s) with (%s)\n", tempWord, word);
  86.  
  87.                         // If the current token is the same as the current element in the word list, mark and break
  88.                         if (strcmp(tempWord, word) == 0) {
  89.                                 printf("\nDuplicate: (%s)\n\n", word);
  90.                                 wordAlreadyExists = 1;
  91.                                 break;
  92.                         }
  93.                 }
  94.  
  95.                 // Word does not exist, add it to the array.
  96.                 if (!wordAlreadyExists) {
  97.                         uniqueWords[numUniqueWords] = malloc(strlen(word));
  98.                         uniqueWords[numUniqueWords] = word;
  99.                         numUniqueWords ++;
  100.                         printf("Unique: %s\n", word);
  101.                 }
  102.  
  103.                 // Reset flags and continue.
  104.                 wordAlreadyExists = 0;
  105.                 word = strtok(NULL, " ,.-!?()\"0123456789");
  106.         }
  107.  
  108.         // Print out the array just for funsies - make sure it's working properly.
  109.         for (int x = 0; x <numUniqueWords; x++) {
  110.                 printf("Unique list %d: %s\n", x, uniqueWords[x]);
  111.         }
  112.  
  113.         printf("\nNumber of unique words: %d\n\n", numUniqueWords);
  114.  
  115.         // Right below is where things start to suck.
  116.         free_memory(uniqueWords, numUniqueWords);
  117.  
  118.         return numUniqueWords;
  119. }
clone this paste RAW Paste Data