Advertisement
Guest User

Untitled

a guest
May 20th, 2017
82
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 1.81 KB | None | 0 0
  1. #!/usr/bin/perl -w
  2.  
  3. use Tie::RDBM;
  4. use Digest::MD5 qw/md5_hex/;
  5.  
  6. sub processSymbol;
  7. sub simpKey;
  8.  
  9. # Sentinels. Use them as special values.
  10. my $leader = "l"; # Prior to first word in a statement.
  11. my $ender = "e"; # After last word in a statement.
  12.  
  13. my %symbols;
  14.  
  15. tie %symbols , 'Tie::RDBM', {db => 'dbi:mysql:databasename',
  16.                 create=> 1,
  17.                 user=> 'username',
  18.                 password => 'password',
  19.                 autocommit => 0 };
  20.  
  21. $symbols{&simpKey($leader)} = 0
  22.     unless exists $symbols{&simpKey($leader)};
  23.  
  24. while(<>)
  25. {
  26.     chomp;
  27.     my @statements = split /\./;
  28.  
  29.     foreach( @statements )
  30.     {
  31.         my @statementSymbols = split /\s/;
  32.  
  33.         my $presentSymbol = $leader;
  34.  
  35.         foreach(@statementSymbols)
  36.         {
  37.             my $nextSymbol = "x$_";
  38.  
  39.             &processSymbol($presentSymbol, $nextSymbol);
  40.             # Slip into the future.
  41.             $presentSymbol = $nextSymbol;
  42.         }
  43.  
  44.         # Terminate the last symbol.
  45.         $nextSymbol = $ender;
  46.  
  47.         &processSymbol($presentSymbol, $nextSymbol);
  48.     }
  49.  
  50.     # Commit our changes for this line
  51.     (tied %symbols)->commit();
  52. }
  53.  
  54. sub simpKey
  55. {
  56.     my $key = shift;
  57.  
  58.     # Hash it. The Tie::RDBM module throws up on something we're feeding it.
  59.     $key = md5_hex($key);
  60.  
  61.     # Harder!
  62.     # Eight hex chars works to 32 bits, or about 4 billion symbols.
  63.     # Not *too* bad...
  64.     $key = substr $key, 0, 8;
  65.     return $key;
  66. }
  67.  
  68. sub processSymbol
  69. {
  70.     my $presentSymbol = &simpKey(shift);
  71.     my $nextSymbol = shift;
  72.  
  73.     # Make sure the present symbol is known.
  74.     unless(exists $symbols{$presentSymbol})
  75.     {
  76.         $symbols{$presentSymbol} = {$nextSymbol => 0};
  77.     }
  78.  
  79.     $leaderSymbolBucket = $symbols{$presentSymbol};
  80.  
  81.     # Make sure the present-next link exists.
  82.     unless(exists $leaderSymbolBucket->{$nextSymbol})
  83.     {
  84.         $leaderSymbolBucket->{$nextSymbol} = 0;
  85.     }
  86.  
  87.     # Increment the count for the next symbol.
  88.     ++$symbols{$presentSymbol}->{$nextSymbol};
  89. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement