Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <fstream>
- #include <iostream>
- #include <vector>
- #include <algorithm>
- using namespace std;
- class word {
- public:
- string Word;
- vector<string> bigrams;
- int freq;
- word(string str, int f): bigrams() {
- Word = str; freq = f;
- if (str.size()==1) bigrams.push_back(str);
- for (int i=0; i<str.size()-1; i++) {
- string tmp("ab");
- tmp[0] = str[i]; tmp[1] = str[i+1];
- bigrams.push_back(tmp);
- }
- }
- bool operator<(const word &obj) const {
- if (this->freq != obj.freq) return this->freq > obj.freq;
- return this->Word < obj.Word;
- }
- };
- double getK(word A, word B) {
- double Intersection=0, Union;
- int tmp[30] = {0};
- for (string a : A.bigrams) for (int i=0; i<B.bigrams.size(); i++) {
- string b = B.bigrams[i];
- if (a == b && tmp[i]==0) {
- tmp[i] = 1;
- Intersection += 1.0;
- break;
- }
- }
- Union = A.bigrams.size() + B.bigrams.size() - Intersection;
- //cout << A.Word << "#" << B.Word << " = " << Intersection/Union << "\n";
- return Intersection/Union;
- }
- string checkSpelling(vector<word> &dict, string str) {
- word a(str,0);
- double max=0.0;
- string MaxWord = dict.at(0).Word;
- for (word w : dict) {
- double k = getK(a,w);
- if (k > max) {
- max = k;
- MaxWord = w.Word;
- }
- }
- return MaxWord;
- }
- int main() {
- fstream words("count_big.txt");
- string str; int num;
- vector<word> dict;
- dict.reserve(60000);
- while (words >> str) {
- words >> num;
- dict.push_back(word(str,num));
- }
- words.close();
- sort(dict.begin(), dict.end());
- while (cin >> str) {
- cout << checkSpelling(dict,str) << endl;
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement