Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #/usr/bin/perl -w
- #Dieses Modul ermittelt jede Beitragslänge und die durchschnittliche Beitragslänge pro Drogensorte
- $input = "../land_der_traeume_gesamtkorpus_CWB_fertig.vrt";
- $output = '../durschnittl_textlaenge_uber5.txt';
- open(INPUT, "< $input");
- open(OUTPUT, "> $output");
- # my %hash;
- while (<INPUT>) {
- $alles = join("",<INPUT>);
- @array = split(/<text/,$alles);
- }
- foreach (@array) {
- if ($_ =~ m/drug="(.*?)"/) {
- $drug = $1;
- push(@drugarray,$drug);
- @zaehle = split(/[\n]/,$_);
- $laenge = @zaehle;
- push(@zaehlarray,"$drug\t$laenge\n");
- }
- }
- sub del_double {
- my %all;
- $all{$_}=0 for @drugarray;
- return (keys %all);
- }
- @drugarray=&del_double(@drugarray);
- shift(@drugarray);
- $elemente = @drugarray;
- $counter = 0;
- $textlaengeabs = 0;
- foreach (@drugarray) {
- $droge = $_;
- foreach (@zaehlarray) {
- if ($_ =~ m/^$droge\t(\d*?)\n/) {
- $textlaengeabs = $textlaengeabs + $1;
- $counter++;
- }
- }
- $schnitt = $textlaengeabs / $counter;
- unless ($counter <= 5) {
- push (@hasharray,"$droge");
- push (@hasharray,"$schnitt");
- }
- undef $textlaengeabs;
- undef $counter;
- undef $schnitt;
- }
- %hash = @hasharray;
- print OUTPUT "Droge\tTextlängendurchschnitt\n";
- foreach my $key (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
- print OUTPUT "$key\t$hash{$key}\n";
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement