Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #/usr/bin/perl -w
- #Dieses Modul erstellt Subkorpora
- $input = "../land_der_traeume_gesamtkorpus_CWB_fertig.vrt";
- $output = '../satzanzahl_schnitt_uber5.txt';
- open(INPUT, "< $input");
- open(OUTPUT, "> $output");
- while (<INPUT>) {
- $alles = join("",<INPUT>);
- @array = split(/<\/text/,$alles);
- }
- foreach (@array) {
- if ($_ =~ m/drug="(.*?)"/) {
- $drug = $1;
- push(@drugarray,$drug);
- @zaehle = split(/[\n]/,$_);
- @zaehle_satz = split(/\$\./,$_);
- $laenge = @zaehle;
- $satzlaenge = @zaehle_satz;
- push(@zaehlarray,"$drug\t$laenge\n");
- push(@satzlaengearray,"$drug\t$satzlaenge\n");
- }
- }
- sub del_double {
- my %all;
- $all{$_}=0 for @drugarray;
- return (keys %all);
- }
- @drugarray=&del_double(@drugarray);
- shift(@drugarray);
- foreach (@drugarray) {
- $droge = $_;
- $output = '../subkorpora/subkorpus' . $droge . ".txt";
- open(OUTPUT, "> $output");
- foreach (@array) {
- if ($_ =~ m/drug="$droge"/) {
- $_ =~ s/.*?\t.*?\t(.*?)\n/$1 /g;
- $_ =~ s/ä/ae/g;
- $_ =~ s/ö/oe/g;
- $_ =~ s/ü/ue/g;
- $_ =~ s/ß/ss/g;
- $_ =~ s/<text.*?">//g;
- $_ =~ s/\@card\@//g;
- print OUTPUT $_;
- }
- }
- close(OUTPUT);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement