Advertisement
Guest User

lexicon

a guest
May 15th, 2015
507
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 3.11 KB | None | 0 0
  1. #include <iostream>
  2. #include <fstream>
  3. #include <vector>
  4. #include <string>
  5. #include <cstdlib>
  6. #include <algorithm>
  7. #include "Lexicon.h"
  8. using namespace std;
  9.  
  10. Lexicon::Lexicon(const char* file_name, unsigned int max_length){
  11.   word_list = vector<string>(10000);
  12.   full_letter_vectors = vector<vector<int> >(max_length+1, vector<int>() );
  13.  
  14.   for(unsigned int i = 0; i < max_length+1; i++){
  15.     vector<vector<vector<int> > > n_letter_words;    
  16.     for(unsigned int j = 0; j < i; j++){
  17.       vector<vector<int> > nth_letter_array(26, vector<int>() );      
  18.       n_letter_words.push_back(nth_letter_array);      
  19.     }
  20.     index_vectors.push_back(n_letter_words);
  21.   }
  22.  
  23.   ifstream inFile(file_name);
  24.   if ( !inFile ) {
  25.     cerr << "Cannot open " << file_name << "\n";
  26.     exit( 1 );
  27.   }
  28.  
  29.   string next = "";
  30.   int word_list_index = 0;
  31.   while(inFile >> next && next.length()<=max_length){
  32.     word_list[word_list_index]=next;
  33.     //cout<<next<<endl;
  34.  
  35.     for(unsigned int letter = 0; letter < next.length(); letter++){
  36.       index_vectors[next.length()][letter][next[letter]-'A'].push_back(word_list_index);
  37.       full_letter_vectors[next.length()].push_back(word_list_index);
  38.     }
  39.    
  40.     word_list_index++;
  41.   }
  42. }
  43.  
  44. Lexicon::~Lexicon(){
  45. }
  46.  
  47. string Lexicon::get_word(unsigned int i){
  48.   if(i<=word_list.size())
  49.     return word_list[i];
  50.   else return "";
  51. }
  52.  
  53. vector<int> intersection(vector<int> s1, vector<int> s2){
  54.   vector<int> s3;
  55.   set_intersection(s1.begin(), s1.end(),
  56.                    s2.begin(), s2.end(),
  57.                    back_inserter(s3));
  58.   return s3;
  59. }
  60.  
  61. void vector_copy(vector<int> s1, vector<int> s2){
  62.   for(unsigned int i = 0; i < s1.size(); i++)
  63.     s2.push_back(s1[i]);
  64.   //cout<<"msiivc: "<<(s2).size()<<endl;
  65. }
  66.  
  67.  
  68. vector<int>* Lexicon::find_matches(string pattern){
  69.   vector<int>* matches = new vector<int>();
  70.   bool started = false;
  71.   for(unsigned int letter = 0; letter < pattern.length(); letter++){
  72.     //cout<<letter<<endl;
  73.     if(pattern[letter] != '?'){
  74.       if(!started){
  75.         vector<int> raw_matches = index_vectors[pattern.length()][letter][pattern[letter]-'A'];
  76.         for(unsigned int i = 0; i < raw_matches.size(); i++)
  77.           (*matches).push_back(raw_matches[i]);
  78.         // cout<<"msiavc: "<<(*matches).size()<<endl;
  79.         // cout<<"ivs: "<<index_vectors.size()<<endl;
  80.         // cout<<"ivspl: "<<index_vectors[pattern.length()].size()<<endl;
  81.         // cout<<"ivspll: "<<index_vectors[pattern.length()][letter].size()<<endl;
  82.         // cout<<"x: "<<index_vectors[pattern.length()][letter][pattern[letter]-'A'].size()<<endl;
  83.         // cout<<"msi0: "<<(*matches).size()<<endl;
  84.         started = true;
  85.       }else{
  86.         // cout<<"already had started"<<endl;
  87.         *matches = intersection(*matches,index_vectors[pattern.length()][letter][pattern[letter]-'A']);
  88.       }
  89.     }
  90.   }
  91.   if(!started){
  92.     vector<int> raw_matches = full_letter_vectors[pattern.length()];
  93.     for(unsigned int i = 0; i < raw_matches.size(); i++)
  94.       (*matches).push_back(raw_matches[i]);
  95.   }
  96.   //cout<<"msi: "<<(*matches).size()<<endl;
  97.   return matches;
  98. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement