Advertisement
Guest User

Satzlängendurchschnitt

a guest
Mar 7th, 2015
150
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 2.21 KB | None | 0 0
  1. #/usr/bin/perl -w
  2.  
  3. #Dieses Modul ermittelt jede Satzlänge und die durchschnittliche Satzlänge pro Drogensorte
  4.  
  5. $input = "../land_der_traeume_gesamtkorpus_CWB_fertig.vrt";
  6. $output = '../satzanzahl_schnitt_uber5.txt';
  7.  
  8. open(INPUT, "< $input");
  9. open(OUTPUT, "> $output");
  10.  
  11. # my %hash;
  12.  
  13. while (<INPUT>) {
  14.     $alles = join("",<INPUT>);
  15.     @array = split(/<text/,$alles);
  16. }
  17.  
  18. foreach (@array) {
  19.     if ($_ =~ m/drug="(.*?)"/) {
  20.         $drug = $1;
  21.         push(@drugarray,$drug);
  22.         @zaehle = split(/[\n]/,$_);
  23.         @zaehle_satz = split(/\$\./,$_);
  24.         $laenge = @zaehle;
  25.         $satzlaenge = @zaehle_satz;
  26.         push(@zaehlarray,"$drug\t$laenge\n");
  27.         push(@satzlaengearray,"$drug\t$satzlaenge\n");
  28.         }
  29. }
  30.  
  31. sub del_double {
  32.     my %all;
  33.     $all{$_}=0 for @drugarray;
  34.     return (keys %all);
  35. }
  36.  
  37. @drugarray=&del_double(@drugarray);
  38. shift(@drugarray);
  39.  
  40. $elemente = @drugarray;
  41.  
  42. $counter = 0;
  43. $textlaengeabs = 0;
  44. $satzlaengeabs = 0;
  45.  
  46. foreach (@drugarray) {
  47.     $droge = $_;
  48.     foreach (@zaehlarray) {
  49.         if ($_ =~ m/^$droge\t(\d*?)\n/) {
  50.         $textlaengeabs = $textlaengeabs + $1;
  51.         }
  52.     }
  53.     foreach (@satzlaengearray) {
  54.         if ($_ =~ m/^$droge\t(\d*?)\n/) {
  55.         $satzlaengeabs = $satzlaengeabs + $1;
  56.         $counter++;
  57.         }
  58.     }  
  59.     $schnittwoerter = $textlaengeabs / $counter;
  60.     $schnittsaetze = $satzlaengeabs / $counter;
  61.     $satzlaenge = $schnittwoerter / $schnittsaetze;
  62.  
  63.     unless ($counter <= 5) {
  64.         push (@hasharraywoerter,"$droge");
  65.         push (@hasharraywoerter,"$schnittwoerter");
  66.     }
  67.     unless ($counter <= 5) {
  68.         push (@hasharraysaetze,"$droge");
  69.         push (@hasharraysaetze,"$schnittsaetze");
  70.     }
  71.     unless ($counter <= 5) {
  72.         push (@hasharraysatzlaenge,"$droge");
  73.         push (@hasharraysatzlaenge,"$satzlaenge");
  74.     }
  75.     undef $textlaengeabs;  
  76.     undef $satzlaengeabs;  
  77.     undef $counter;
  78.     undef $schnittwoerter;
  79.     undef $schnittsaetze;
  80.     undef $satzlaenge;
  81. }
  82.  
  83.  
  84. %hash_wort = @hasharraywoerter;
  85. %hash_satz = @hasharraysaetze;
  86. %hash_satzlaenge = @hasharraysatzlaenge;
  87.  
  88. print OUTPUT "Droge\tSatzanzahlsdurchschnitt\n";
  89.  
  90. # foreach my $key (sort {$hash_wort{$b} <=> $hash_wort{$a}} keys %hash_wort) {
  91. #   print "$key\t$hash_wort{$key}\n";
  92. # }
  93.  
  94. foreach my $key (sort{$hash_satz{$b} <=> $hash_satz{$a}} keys %hash_satz) {
  95.     print OUTPUT "$key\t$hash_satz{$key}\n";
  96. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement