Advertisement
Guest User

Untitled

a guest
May 20th, 2017
101
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 1.86 KB | None | 0 0
  1. #!/usr/bin/perl -w
  2.  
  3. use strict;
  4.  
  5. use Tie::RDBM;
  6. use Digest::MD5 qw/md5_hex/;
  7.  
  8. use Data::Dumper;
  9.  
  10. sub processSymbol;
  11. sub simpKey;
  12.  
  13. # Sentinels. Use them as special values.
  14. my $leader = "l"; # Prior to first word in a statement.
  15. my $ender = "e"; # After last word in a statement.
  16.  
  17.  
  18. my %symbols;
  19. tie %symbols , 'Tie::RDBM', {db => 'dbi:mysql:databasename',
  20.                 create=> 1,
  21.                 user=> 'username',
  22.                 password => 'password',
  23.                 autocommit => 0 };
  24.  
  25. print "Tied. Found " . scalar( keys( %symbols)) . " keys\n";
  26.  
  27. $symbols{&simpKey($leader)} = 0
  28.     unless exists $symbols{&simpKey($leader)};
  29.  
  30. while(<>)
  31. {
  32.     chomp;
  33.     my @statements = split /\./;
  34.  
  35.     foreach( @statements )
  36.     {
  37.         my @statementSymbols = split /\s/;
  38.  
  39.         my $presentSymbol = $leader;
  40.  
  41.         my $nextSymbol;
  42.  
  43.         foreach(@statementSymbols)
  44.         {
  45.             $nextSymbol = "x$_";
  46.  
  47.             &processSymbol($presentSymbol, $nextSymbol);
  48.             # Slip into the future.
  49.             $presentSymbol = $nextSymbol;
  50.         }
  51.  
  52.         # Terminate the last symbol.
  53.         $nextSymbol = $ender;
  54.  
  55.         &processSymbol($presentSymbol, $nextSymbol);
  56.     }
  57.  
  58.     # Commit our changes for this line
  59.     (tied %symbols)->commit();
  60. }
  61.  
  62. sub simpKey
  63. {
  64.     my $key = shift;
  65.  
  66.     # Hash it. The TIE::RDBMS module throws up on something we're feeding it.
  67.     $key = md5_hex($key);
  68.  
  69.     # Harder!
  70.     # Eight hex chars works to 32 bits, or about 4 billion symbols.
  71.     # Not *too* bad...
  72.     $key = substr $key, 0, 8;
  73.     return $key;
  74. }
  75.  
  76. sub processSymbol
  77. {
  78.     my $presentSymbol = &simpKey(shift);
  79.     my $nextSymbol = shift;
  80.  
  81.     # Make sure the present symbol is known.
  82.     my $presentBucket = {$nextSymbol => 0};
  83.  
  84.     # 1-2 fetches
  85.     if(exists $symbols{$presentSymbol})
  86.     {
  87.         my $foundBucket = $symbols{$presentSymbol};
  88.         $presentBucket = $foundBucket
  89.             if $foundBucket ne "0";
  90.     }
  91.  
  92.     $presentBucket->{$nextSymbol} += 1;
  93.  
  94.     # Save back to the hash.
  95.     $symbols{$presentSymbol} = $presentBucket;
  96. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement