Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <cctype>
- #include <cstdlib>
- #include <iostream>
- #include <fstream>
- #include <string>
- #include <algorithm>
- // make a lower-case version of the input string, which
- // is already a copy so its safe to modify.
- static std::string make_lower(std::string s)
- {
- std::transform(s.begin(), s.end(), s.begin(), ::tolower);
- return s;
- }
- struct wordNode
- {
- public:
- std::string myWord;
- unsigned int freq_count;
- wordNode *next;
- wordNode(const std::string& aWord)
- : myWord(aWord), freq_count(1), next(NULL)
- {}
- };
- class wordCloud
- {
- public:
- wordNode *head;
- unsigned int size;
- // ctor/dtor
- wordCloud(const std::string& s = "");
- ~wordCloud();
- // copy semantics
- wordCloud(const wordCloud&);
- wordCloud& operator =(const wordCloud&);
- void loadFile(const std::string& fileName);
- void insert(const std::string& aWord, unsigned int freq=1);
- void print(std::ostream& os, unsigned int freq=0);
- // build a new wordCloud from entries in *this* cloud
- // that match the entries in the parameter.
- wordCloud compareWith(const wordCloud& blacklist, unsigned int freq=1);
- };
- wordCloud::wordCloud(const std::string& s)
- : head(NULL), size(0)
- {
- if (!s.empty())
- loadFile(s);
- }
- wordCloud::~wordCloud(void)
- {
- while (head)
- {
- wordNode *temp = head;
- head = head->next;
- delete temp;
- }
- }
- // copy constructor
- wordCloud::wordCloud(const wordCloud& obj)
- : head(NULL), size(0)
- {
- wordNode **dst = &head;
- wordNode *src = obj.head;
- while (src)
- {
- *dst = new wordNode(*src);
- dst = &(*dst)->next;
- }
- *dst = NULL;
- }
- void wordCloud::insert(const std::string& aWord, unsigned int freq)
- {
- // manufacture lower-case version of word;
- std::string lcaseWord = make_lower(aWord);
- // search for the word by walking a pointer-to-pointer
- // through the pointers in the linked list.
- wordNode** pp = &head;
- while (*pp && (lcaseWord < (*pp)->myWord))
- pp = &(*pp)->next;
- // if we stopped on something and the words match
- // we just increment the frequency count. otherwise
- // pp is holding the address of the pointer where we
- // need to insert the new node, so do so.
- if (*pp && (*pp)->myWord == lcaseWord)
- {
- (*pp)->freq_count++;
- }
- else
- { // insert the node
- wordNode *node = new wordNode(lcaseWord);
- node->freq_count = freq;
- node->next = *pp;
- *pp = node;
- ++size;
- }
- }
- void wordCloud::print(std::ostream& os, unsigned int freq)
- {
- unsigned int totalWords = 0;
- unsigned int uniqueWords = 0;
- for (wordNode *p = head; p; p = p->next)
- {
- if (p->freq_count >= freq)
- {
- os << p->myWord;
- if (freq > 0)
- os << " (" << p->freq_count << ")\n";
- totalWords += p->freq_count;
- ++uniqueWords;
- }
- }
- os << "Unique words (freq >= " << freq << ") : " << uniqueWords << '\n';
- os << "Total words (freq >= " << freq << ") : " << totalWords << '\n';
- }
- // load a file of whitespace separated words. the incoming
- // content is expected to be free of all punctuation, and
- // all non-alphanumeric data.
- void wordCloud::loadFile(const std::string& fileName)
- {
- std::ifstream file(fileName);
- std::string word;
- while (file >> word)
- insert(word);
- }
- // this relies on the default sort order of the word cloud
- // to efficiently compute the intersection of the two
- // objects (this and the passed parameter). The results
- // are returned in a separate wordCloud object built
- // from that intersection
- wordCloud wordCloud::compareWith(const wordCloud& obj, unsigned int freq)
- {
- wordNode *lhs = head, *rhs = obj.head;
- wordCloud result;
- while (lhs && rhs)
- {
- // advance lhs until it meets or exceeds rhs. the frequency
- // is checked to ignore anything prefiltered by that floor
- while (lhs && (lhs->freq_count < freq || lhs->myWord < rhs->myWord))
- lhs = lhs->next;
- // advance rhs until it meets or exeeds lhs
- while (lhs && rhs && rhs->myWord < lhs->myWord)
- rhs = rhs->next;
- // if we still have pointers and their words match
- // insert the match and advance the lhs. include
- // the source file frequency count for bookeeping
- if (lhs && rhs && lhs->myWord == rhs->myWord)
- {
- result.insert(lhs->myWord, lhs->freq_count);
- lhs = lhs->next;
- }
- }
- return result;
- }
- int main()
- {
- unsigned int freq; //variable for determined the print frequency
- std::cout << "This program will read words from a text file, print each word and the\n"
- << "amount of times the word appears within the file (frequency).\n\n"
- << "It will also compare the words to other words inside a blacklist and\n"
- << "skip those words when it prints.\n\n";
- std::cout << "Enter the frequency of words you would like to be printed: ";
- std::cin >> freq;
- std::cout << "\n\n";
- // load data files
- wordCloud blacklist("blacklist.txt");
- wordCloud wordlist("stage.txt");
- wordCloud matches = wordlist.compareWith(blacklist, freq);
- std::cout << "\nBlacklist:\n";
- blacklist.print(std::cout); //print blacklist
- std::cout << "\nWordlist:\n";
- wordlist.print(std::cout, freq);
- // build frequency results
- std::cout << "\nMatches:\n";
- matches.print(std::cout, freq);
- return EXIT_SUCCESS;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement