Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include "textstats.hpp"
- #import <algorithm>
- #include <iostream>
- void get_tokens(const string &s, const unordered_set<char> &delimiters, vector<string> &tokens) {
- string temp;
- for (char c: s) {
- if(delimiters.count(c)) {
- if(temp.size() >= 1 && !delimiters.count(temp[0]))
- {
- string t;
- for(char c: temp)
- {
- t += tolower(c);
- }
- tokens.push_back(t);
- }
- temp = "";
- }
- else {
- temp += c;
- }
- }
- if(temp.size() > 1)
- {
- string t;
- for(char c: temp)
- {
- t += tolower(c);
- }
- tokens.push_back(t);
- }
- }
- void get_type_freq(const vector<string> &tokens, map<string, int> &freqdi) {
- for(auto str : tokens)
- if (freqdi.count(str))
- freqdi.find(str)->second++;
- else
- freqdi[str] = 1;
- }
- void get_types(const vector<string> &tokens, vector<string> &wtypes) {
- for (auto str : tokens){
- if(find(wtypes.begin(), wtypes.end(), str) == wtypes.end()){
- wtypes.push_back(str);
- }
- }
- sort(wtypes.begin(), wtypes.end());
- }
- void get_x_length_words(const vector<string> &wtypes, int x, vector<string> &words) {
- for(auto str: wtypes)
- if (str.size() >= x)
- words.push_back(str);
- }
- void get_x_freq_words(const map<string, int> &freqdi, int x, vector<string> &words) {
- for(auto q : freqdi)
- if (q.second >= x)
- words.push_back(q.first);
- }
- void get_words_by_length_dict(const vector<string> &wtypes, map<int, vector<string> > &lengthdi) {
- for(auto str : wtypes){
- lengthdi[str.size()].push_back(str);
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement