SHARE
TWEET

Untitled

a guest Mar 20th, 2017 69 in 10 days
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. string clean_string(string w) {
  2.  
  3.     transform(w.begin(), w.end(), w.begin(), tolower); // Changes to lowercase
  4.     w.erase (remove_if(w.begin(), w.end(), &isdigit), w.end()); // Removes anything that is a digit (number)
  5.     w.erase (remove_if(w.begin(), w.end(), &ispunct), w.end()); // Removes punctuation of any kind
  6.     w.erase (remove(w.begin(), w.end(), ' '), w.end()); // Removes spaces
  7.  
  8.   return w;
  9. }
  10.  
  11. vector<string> generate_ngrams(string w, size_t n) {
  12. vector<string> ngrams;
  13.  
  14.     for (auto i = 0; i <= w.length() - n; i++) {
  15.         ngrams.push_back(w.substr(i, n));
  16.  
  17.     }
  18.  
  19.   return ngrams;
  20. }
  21.  
  22. void process_line(map<string, long>& m, string line, size_t n) {
  23.     string cleaned  = clean_string(line);
  24.     vector<string> ngram = generate_ngrams(cleaned, n);
  25.  
  26.     for(const auto &w : ngram) {
  27.      ++m[w];
  28.     }
  29.    
  30. }
  31.  
  32. bool pair_string_lessthan(const pair<string, long> &p1, const pair<string, long> &p2) {
  33.  
  34.     return (p1.first < p1.first);
  35. }
  36.  
  37. bool pair_frequency_greaterthan(const pair<string, long> &p1, const pair<string, long> &p2) {
  38.  
  39.    return (p1.second > p2.second);
  40. }
RAW Paste Data
Top