Advertisement
Guest User

Untitled

a guest
Mar 8th, 2015
181
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 1.05 KB | None | 0 0
  1. #!/usr/bin/perl -w
  2.  
  3. my %ngramme;
  4. my @korpus;
  5. my @ngramm_temp;
  6. my $n_gramm_laenge = $ARGV[0];
  7. my $minfreq = 3;
  8. my $korpus = 'Pfad\\zum\\Korpus.txt';
  9. my $outputdatei = "ngram_fertig.txt";
  10.  
  11. open(OUTPUT, "> $outputdatei");
  12. open (DATEI,  "< $korpus");
  13.  
  14. while (<DATEI>) {
  15.     chomp ($_);
  16.     my @zeile = split /<\/w>/,$_;
  17.     foreach(@zeile) {
  18.         push (@korpus,$_);
  19.     }
  20.     undef (@zeile);
  21. }
  22.  
  23. foreach (@korpus) {
  24.     if($_ =~ m/lemma=(".*?")>/){
  25.     $lemma = $1;}
  26.     push(@lemmata,$lemma);
  27. }
  28.    
  29. foreach (@lemmata) {
  30.     push (@ngramm_temp,$_);
  31.     my $array_laenge = @ngramm_temp;
  32.     if ($array_laenge == $n_gramm_laenge) {
  33.         my $ngramm = join (" -> ",@ngramm_temp);
  34.         shift @ngramm_temp;
  35.         if (defined $ngramme{$ngramm}) {
  36.             $ngramme{$ngramm}++;
  37.         }
  38.         else {
  39.             $ngramme{$ngramm} = 1;
  40.         }
  41.     }
  42. }
  43.  
  44. print OUTPUT "digraph myGraph {\n";
  45.  
  46. foreach my $key (sort { $ngramme{$a} <=> $ngramme{$b} } keys %ngramme) {
  47.     if ($ngramme{$key} >= $minfreq) {
  48.         print OUTPUT "\t$key [weight=\"$ngramme{$key}\"];\n";
  49.     }
  50. }
  51. print OUTPUT "}";
  52. close(DATEI);
  53. close(OUTPUT);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement