Advertisement
Guest User

WordCloud Junk

a guest
Mar 3rd, 2014
199
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 5.70 KB | None | 0 0
  1. #include <cctype>
  2. #include <cstdlib>
  3. #include <iostream>
  4. #include <fstream>
  5. #include <string>
  6. #include <algorithm>
  7.  
  8. // make a lower-case version of the input string, which
  9. //  is already a copy so its safe to modify.
  10. static std::string make_lower(std::string s)
  11. {
  12.     std::transform(s.begin(), s.end(), s.begin(), ::tolower);
  13.     return s;
  14. }
  15.  
  16. struct wordNode
  17. {
  18. public:
  19.     std::string myWord;
  20.     unsigned int freq_count;
  21.     wordNode *next;
  22.    
  23.     wordNode(const std::string& aWord)
  24.         : myWord(aWord), freq_count(1), next(NULL)
  25.     {}
  26. };
  27.  
  28. class wordCloud
  29. {
  30. public:
  31.     wordNode *head;
  32.     unsigned int size;
  33.    
  34.     // ctor/dtor
  35.     wordCloud(const std::string& s = "");
  36.     ~wordCloud();
  37.  
  38.     // copy semantics
  39.     wordCloud(const wordCloud&);
  40.     wordCloud& operator =(const wordCloud&);
  41.    
  42.     void loadFile(const std::string& fileName);
  43.     void insert(const std::string& aWord, unsigned int freq=1);
  44.     void print(std::ostream& os, unsigned int freq=0);
  45.  
  46.     // build a new wordCloud from entries in *this* cloud
  47.     //  that match the entries in the parameter.
  48.     wordCloud compareWith(const wordCloud& blacklist, unsigned int freq=1);
  49. };
  50.  
  51. wordCloud::wordCloud(const std::string& s)
  52.     : head(NULL), size(0)
  53. {
  54.     if (!s.empty())
  55.         loadFile(s);
  56. }
  57.  
  58. wordCloud::~wordCloud(void)
  59. {
  60.     while (head)
  61.     {
  62.         wordNode *temp = head;
  63.         head = head->next;
  64.         delete temp;
  65.     }
  66. }
  67.  
  68. // copy constructor
  69. wordCloud::wordCloud(const wordCloud& obj)
  70.     : head(NULL), size(0)
  71. {
  72.     wordNode **dst = &head;
  73.     wordNode *src = obj.head;
  74.     while (src)
  75.     {
  76.         *dst = new wordNode(*src);
  77.         dst = &(*dst)->next;
  78.     }
  79.     *dst = NULL;
  80. }
  81.  
  82. void wordCloud::insert(const std::string& aWord, unsigned int freq)
  83. {
  84.     // manufacture lower-case version of word;
  85.     std::string lcaseWord = make_lower(aWord);
  86.    
  87.     // search for the word by walking a pointer-to-pointer
  88.     //  through the pointers in the linked list.
  89.     wordNode** pp = &head;
  90.     while (*pp && (lcaseWord < (*pp)->myWord))
  91.         pp = &(*pp)->next;
  92.    
  93.     // if we stopped on something and the words match
  94.     //  we just increment the frequency count. otherwise
  95.     //  pp is holding the address of the pointer where we
  96.     //  need to insert the new node, so do so.
  97.     if (*pp && (*pp)->myWord == lcaseWord)
  98.     {
  99.         (*pp)->freq_count++;
  100.     }
  101.     else
  102.     {    // insert the node
  103.         wordNode *node = new wordNode(lcaseWord);
  104.         node->freq_count = freq;
  105.         node->next = *pp;
  106.         *pp = node;
  107.         ++size;
  108.     }
  109. }
  110.  
  111. void wordCloud::print(std::ostream& os, unsigned int freq)
  112. {
  113.     unsigned int totalWords = 0;
  114.     unsigned int uniqueWords = 0;
  115.     for (wordNode *p = head; p; p = p->next)
  116.     {
  117.         if (p->freq_count >= freq)
  118.         {
  119.             os << p->myWord;
  120.             if (freq > 0)
  121.                 os << " (" << p->freq_count << ")\n";
  122.             totalWords += p->freq_count;
  123.             ++uniqueWords;
  124.         }
  125.     }
  126.     os << "Unique words (freq >= " << freq << ") : " << uniqueWords << '\n';
  127.     os << "Total words  (freq >= " << freq << ") : " << totalWords << '\n';
  128. }
  129.  
  130. // load a file of whitespace separated words. the incoming
  131. //  content is expected to be free of all punctuation, and
  132. //  all non-alphanumeric data.
  133. void wordCloud::loadFile(const std::string& fileName)
  134. {
  135.     std::ifstream file(fileName);
  136.     std::string word;
  137.     while (file >> word)
  138.         insert(word);
  139. }
  140.  
  141. // this relies on the default sort order of the word cloud
  142. //  to efficiently compute the intersection of the two
  143. //  objects (this and the passed parameter). The results
  144. //  are returned in a separate wordCloud object built
  145. //  from that intersection
  146. wordCloud wordCloud::compareWith(const wordCloud& obj, unsigned int freq)
  147. {
  148.     wordNode *lhs = head, *rhs = obj.head;
  149.     wordCloud result;
  150.  
  151.     while (lhs && rhs)
  152.     {
  153.         // advance lhs until it meets or exceeds rhs. the frequency
  154.         //  is checked to ignore anything prefiltered by that floor
  155.         while (lhs && (lhs->freq_count < freq || lhs->myWord < rhs->myWord))
  156.             lhs = lhs->next;
  157.            
  158.         // advance rhs until it meets or exeeds lhs
  159.         while (lhs && rhs && rhs->myWord < lhs->myWord)
  160.             rhs = rhs->next;
  161.        
  162.         // if we still have pointers and their words match
  163.         //  insert the match and advance the lhs. include
  164.         //  the source file frequency count for bookeeping
  165.         if (lhs && rhs && lhs->myWord == rhs->myWord)
  166.         {
  167.             result.insert(lhs->myWord, lhs->freq_count);
  168.             lhs = lhs->next;
  169.         }
  170.     }
  171.     return result;
  172. }
  173.  
  174. int main()
  175. {
  176.     unsigned int freq; //variable for determined the print frequency
  177.     std::cout   << "This program will read words from a text file, print each word and the\n"
  178.                 << "amount of times the word appears within the file (frequency).\n\n"
  179.                 << "It will also compare the words to other words inside a blacklist and\n"
  180.                 << "skip those words when it prints.\n\n";
  181.    
  182.     std::cout << "Enter the frequency of words you would like to be printed: ";
  183.     std::cin >> freq;
  184.     std::cout << "\n\n";
  185.  
  186.     // load data files
  187.     wordCloud blacklist("blacklist.txt");
  188.     wordCloud wordlist("stage.txt");
  189.     wordCloud matches = wordlist.compareWith(blacklist, freq);
  190.  
  191.     std::cout << "\nBlacklist:\n";
  192.     blacklist.print(std::cout); //print blacklist
  193.    
  194.     std::cout << "\nWordlist:\n";
  195.     wordlist.print(std::cout, freq);
  196.    
  197.     // build frequency results
  198.     std::cout << "\nMatches:\n";
  199.     matches.print(std::cout, freq);
  200.    
  201.     return EXIT_SUCCESS;
  202. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement