Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include <fstream>
- #include <vector>
- #include <string>
- #include <cstdlib>
- #include <algorithm>
- #include "Lexicon.h"
- using namespace std;
- Lexicon::Lexicon(const char* file_name, unsigned int max_length){
- word_list = vector<string>(10000);
- full_letter_vectors = vector<vector<int> >(max_length+1, vector<int>() );
- for(unsigned int i = 0; i < max_length+1; i++){
- vector<vector<vector<int> > > n_letter_words;
- for(unsigned int j = 0; j < i; j++){
- vector<vector<int> > nth_letter_array(26, vector<int>() );
- n_letter_words.push_back(nth_letter_array);
- }
- index_vectors.push_back(n_letter_words);
- }
- ifstream inFile(file_name);
- if ( !inFile ) {
- cerr << "Cannot open " << file_name << "\n";
- exit( 1 );
- }
- string next = "";
- int word_list_index = 0;
- while(inFile >> next && next.length()<=max_length){
- word_list[word_list_index]=next;
- //cout<<next<<endl;
- for(unsigned int letter = 0; letter < next.length(); letter++){
- index_vectors[next.length()][letter][next[letter]-'A'].push_back(word_list_index);
- full_letter_vectors[next.length()].push_back(word_list_index);
- }
- word_list_index++;
- }
- }
- Lexicon::~Lexicon(){
- }
- string Lexicon::get_word(unsigned int i){
- if(i<=word_list.size())
- return word_list[i];
- else return "";
- }
- vector<int> intersection(vector<int> s1, vector<int> s2){
- vector<int> s3;
- set_intersection(s1.begin(), s1.end(),
- s2.begin(), s2.end(),
- back_inserter(s3));
- return s3;
- }
- void vector_copy(vector<int> s1, vector<int> s2){
- for(unsigned int i = 0; i < s1.size(); i++)
- s2.push_back(s1[i]);
- //cout<<"msiivc: "<<(s2).size()<<endl;
- }
- vector<int>* Lexicon::find_matches(string pattern){
- vector<int>* matches = new vector<int>();
- bool started = false;
- for(unsigned int letter = 0; letter < pattern.length(); letter++){
- //cout<<letter<<endl;
- if(pattern[letter] != '?'){
- if(!started){
- vector<int> raw_matches = index_vectors[pattern.length()][letter][pattern[letter]-'A'];
- for(unsigned int i = 0; i < raw_matches.size(); i++)
- (*matches).push_back(raw_matches[i]);
- // cout<<"msiavc: "<<(*matches).size()<<endl;
- // cout<<"ivs: "<<index_vectors.size()<<endl;
- // cout<<"ivspl: "<<index_vectors[pattern.length()].size()<<endl;
- // cout<<"ivspll: "<<index_vectors[pattern.length()][letter].size()<<endl;
- // cout<<"x: "<<index_vectors[pattern.length()][letter][pattern[letter]-'A'].size()<<endl;
- // cout<<"msi0: "<<(*matches).size()<<endl;
- started = true;
- }else{
- // cout<<"already had started"<<endl;
- *matches = intersection(*matches,index_vectors[pattern.length()][letter][pattern[letter]-'A']);
- }
- }
- }
- if(!started){
- vector<int> raw_matches = full_letter_vectors[pattern.length()];
- for(unsigned int i = 0; i < raw_matches.size(); i++)
- (*matches).push_back(raw_matches[i]);
- }
- //cout<<"msi: "<<(*matches).size()<<endl;
- return matches;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement