Guest User

spellchecker.cpp

a guest
Apr 30th, 2013
528
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #include <iostream>
  2. #include <fstream>
  3. #include <vector>
  4. #include <map>
  5. #include <string>
  6. #include <algorithm>
  7. #include <cmath>
  8. using namespace std;
  9.  
  10. // ---------------------------------------
  11. struct trie_t: map< char, trie_t >
  12. {
  13.     size_t w;
  14.     trie_t() : w(0) {}
  15. }
  16. glossary;
  17.  
  18. trie_t & add( trie_t & t, const string & s )
  19. {
  20.     t.w++;
  21.     return !s.empty() ? add( t[ s[0] ], s.substr(1) )
  22.                       : t['$'];
  23. }
  24.  
  25. // ---------------------------------------
  26. struct rambler_t
  27. {
  28.     string         todo;
  29.     string         done;
  30.     size_t         cost;
  31.     const trie_t * road;
  32.  
  33.     rambler_t( const string & t, const string & d, size_t c, const trie_t * r )
  34.         :  todo( t ), done( d ), cost( c ), road( r ) {}
  35.     rambler_t() {}
  36. };
  37.  
  38. typedef vector<rambler_t> team_t;
  39.  
  40. // ---------------------------------------
  41. double ramb_chances( const rambler_t & a )  { return log( ( a.road->w + 1 ) * ( a.done.size() + 1 ) ) / ( 1 << a.cost ); }
  42.  
  43. bool ramb_chance_cmp ( const rambler_t & a, const rambler_t & b ) { return ramb_chances(a) > ramb_chances(b); }
  44. bool ramb_done_cmp   ( const rambler_t & a, const rambler_t & b ) { return a.done == b.done ? a.cost < b.cost : a.done < b.done; }
  45. bool ramb_uniq       ( const rambler_t & a, const rambler_t & b ) { return a.done == b.done; }
  46.  
  47. // ---------------------------------------
  48. void step_forward( const rambler_t & R, team_t & team, team_t & leaders )
  49. {
  50.     char           next  =       R.todo[0];
  51.     const string & todo = next ? R.todo.substr(1) : "";
  52.  
  53.     for( map<char, trie_t>::const_iterator
  54.             it = R.road->begin(); it != R.road->end(); ++it )
  55.     {
  56.         const trie_t * road  = &it->second;
  57.         char           dest  =  it->first;
  58.         if ( next  ==  dest )
  59.             team.push_back( rambler_t(   todo, R.done + dest,  R.cost    ,   road ));  // RAWWAY
  60.         else
  61.         {
  62.             team.push_back( rambler_t(   todo, R.done + dest,  R.cost + 1,   road ));  // CHANGE
  63.             team.push_back( rambler_t( R.todo, R.done + dest,  R.cost + 1,   road ));  // INSERT
  64.             if ( !next && dest == '$' )
  65.                 leaders.push_back( R );                                                // FINISH
  66.         }
  67.     }
  68.     if ( next )
  69.         team.push_back(     rambler_t(   todo, R.done,         R.cost + 1, R.road ));  // DELETE
  70. }
  71.  
  72. // ---------------------------------------
  73. team_t spellcheck( const string & word,
  74.                  const size_t max_cost, const size_t team_size )
  75. {
  76.     team_t walkers, leaders;
  77.            walkers.push_back( rambler_t( word, "", 0, &glossary ) ); // INITIAL
  78.  
  79.     while ( !walkers.empty() )
  80.     {
  81.         team_t next_g;
  82.         for( size_t i = 0; i < min( walkers.size(), team_size ); ++i )
  83.             if ( walkers[i].cost < max_cost )
  84.                 step_forward( walkers[i], next_g, leaders );
  85.  
  86.         walkers.swap( next_g );
  87.         sort( walkers.begin(), walkers.end(), ramb_chance_cmp );
  88.     }
  89.  
  90.     sort( leaders.begin(), leaders.end(), ramb_done_cmp );
  91.     leaders.resize( distance( leaders.begin(), unique( leaders.begin(), leaders.end(), ramb_uniq ) ) );
  92.     sort( leaders.begin(), leaders.end(), ramb_chance_cmp );
  93.     return leaders;
  94. }
  95.  
  96. // ---------------------------------------
  97. int main(int argc, char* argv[])
  98. {
  99.     ifstream fi( argv[1] );
  100.     if ( !fi.is_open() )
  101.         return 1;
  102.  
  103.     string word;
  104.     while( getline( fi, word ) )
  105.         add( glossary, word );
  106.  
  107.     while( getline( cin, word ) )
  108.     {
  109.         team_t leaders = spellcheck( word, word.size()/2, 512 );
  110.  
  111.         cout << word << endl;
  112.         for( size_t i = 0; i < leaders.size(); ++i )
  113.             cout << '\t' << leaders[i].done << ' ' << leaders[i].cost << endl;
  114.         cout << endl;
  115.     }
  116.  
  117.     return 0;
  118. }
RAW Paste Data