Advertisement
Guest User

Untitled

a guest
Dec 15th, 2019
118
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 2.79 KB | None | 0 0
  1. #include "WordFrequency.h"
  2. #include "sanitize.h"
  3. #include <algorithm>
  4. #include <fstream>
  5. #include <string>
  6. #include <unordered_map>
  7.  
  8. WordFrequency::WordFrequency() { numWords = 0; } // default constructor
  9. void WordFrequency::readIn(const string &filename) {
  10.   std::ifstream file(filename);
  11.   std::string currentWord; // Word holder for placing into hash table
  12.   int total = 0;
  13.  
  14.   while (!file.eof()) {
  15.     getline(file, currentWord, ' ');
  16.     // total++;
  17.     sanitize(currentWord);
  18.     currentWord.erase(std::remove(currentWord.begin(), currentWord.end(), '\n'),
  19.                       currentWord.end());
  20.     for (int i = 0, length = currentWord.size(); i < length; i++) {
  21.       // check whether character is end characters or punctuation
  22.       if (std::ispunct(currentWord[i])) {
  23.         currentWord.erase(i--, 2);
  24.         length = currentWord.size();
  25.       }
  26.     }
  27.     if (frequencyTable.find(currentWord) == frequencyTable.end()) {
  28.       frequencyTable[currentWord] = 1;
  29.       numWords++;
  30.     } else {
  31.       frequencyTable[currentWord] += 1;
  32.     }
  33.     // frequencyTable[currentWord]++;
  34.     // std::cout << currentWord << std::endl;
  35.   }
  36.   // std::cout << frequencyTable.bucket_count() << std::endl;
  37.   // std::cout << total << std::endl;
  38.   std::unordered_map<string, int>::iterator itr;
  39.   for (itr = frequencyTable.begin(); itr != frequencyTable.end(); itr++) {
  40.     if (itr->first == "lifethe") {
  41.       std::cout << "true " << itr->second << std::endl;
  42.     }
  43.   }
  44.  
  45. } // add words from file to hash table
  46. size_t WordFrequency::numberOfWords() {
  47.   return numWords;
  48. } // return the number of unique words
  49. size_t WordFrequency::wordCount(const string &word) {
  50.   std::unordered_map<string, int>::iterator itr = frequencyTable.begin();
  51.   while (itr != frequencyTable.end()) {
  52.     if (word == itr->first) {
  53.       return itr->second;
  54.     }
  55.     itr++;
  56.   }
  57.   return 0;
  58.  
  59. } // return the number of occurrences of the given word
  60. string WordFrequency::mostFrequentWord() {
  61.   std::string word;
  62.   int freqWord = 0;
  63.   std::unordered_map<string, int>::iterator itr;
  64.   for (itr = frequencyTable.begin(); itr != frequencyTable.end(); itr++) {
  65.     if (itr == frequencyTable.begin()) {
  66.       word = itr->first;
  67.       freqWord = itr->second;
  68.     }
  69.     if (freqWord < itr->second) {
  70.       word = itr->first;
  71.       freqWord = itr->second;
  72.     }
  73.   }
  74.   return word;
  75. } // return the most frequent word
  76. size_t WordFrequency::maxBucketSize() {
  77.   int bigBucket =
  78.       frequencyTable.bucket_size(0); // Variable storing largest bucket size
  79.   for (int i = 0; i < frequencyTable.bucket_count(); i++) {
  80.     if (bigBucket < frequencyTable.bucket_size(i)) {
  81.       bigBucket = frequencyTable.bucket_size(i);
  82.     }
  83.   }
  84.   return bigBucket;
  85. } // return the size of the largest bucket
  86.   // in the hash table
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement