Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include <vector>
- #include <set>
- #include <string>
- #include <algorithm>
- #include <sstream>
- #include <fstream>
- using namespace std;
- class WtoB{
- public:
- string word;
- set<string> bigrams;
- int frequency;
- WtoB(string word, set<string> bigrams, int frequency){
- this->word = word;
- for(auto i = bigrams.begin(); i != bigrams.end(); i++){
- this->bigrams.insert(*i);
- }
- this->frequency = frequency;
- }
- };
- set<string> making_bigrams(string &a){
- unsigned long length = a.length();
- set<string> bigrams;
- for(int i = 0; i < length-1; i++){
- string temp = "", temp1;
- temp += a[i];
- temp1+= a[i+1];
- bigrams.insert(temp+temp1);
- temp = "";
- }
- if(bigrams.size() == 0 && a.length() == 1) bigrams.insert(a);
- return bigrams;
- }
- double Similarity(set<string> &a, set<string> &b){
- set<string> for_intersect, for_union;
- set_intersection(a.begin(), a.end(), b.begin(), b.end(), inserter(for_intersect, for_intersect.begin()));
- //cout << for_intersect.size() << ' ';
- set_union(a.begin(), a.end(), b.begin(), b.end(), inserter(for_union, for_union.begin()));
- //cout << for_union.size() << ' ';
- return (double)for_intersect.size() / (double)for_union.size();
- }
- int main(int argc, const char * argv[]) {
- string a, a1;
- int frequency = 0;
- vector<WtoB> dictionary, result, result1;
- ifstream for_open;
- for_open.open("/Users/Vlad/Desktop/count_big1.txt");
- while(getline(for_open, a)){
- istringstream iss(a);
- iss >> a1 >> frequency;
- dictionary.push_back(WtoB(a1, making_bigrams(a1), frequency));
- }
- while(cin){
- string previous = a;
- cin >> a;
- if(a == previous) break;
- //cout << a << endl;
- set<string> temp = making_bigrams(a);
- double max_similarity = -1;
- for(int i = 0; i < dictionary.size(); i++){
- double similarity = Similarity(temp, dictionary[i].bigrams);
- if(max_similarity == similarity){
- result.emplace_back(dictionary[i]);
- }
- else if(max_similarity < similarity){
- max_similarity = similarity;
- if(result.size() > 0) result.clear();
- result.emplace_back(dictionary[i]);
- }
- }
- if(result.size() != 1){
- int max_frequency = 0;
- for(int i = 0; i < result.size(); i++){
- if(max_frequency < result[i].frequency){
- max_frequency = result[i].frequency;
- if(result1.size() > 0) result1.clear();
- result1.emplace_back(result[i]);
- }
- else if(max_frequency == result1[i].frequency) result1.emplace_back(result[i]);
- }
- if(result1.size() > 1){
- long size = result1.size();
- string min_str = result1[0].word;
- for(long i = 1; i < size; i++){
- if(min_str < result1[i].word) min_str = result1[i].word;
- }
- cout << min_str << endl;
- }
- else if(result1.size() == 1){
- cout << result1[0].word << endl;
- }
- }
- else{
- cout << result[0].word << endl;
- }
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement