Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/perl -w
- my %ngramme;
- my @korpus;
- my @ngramm_temp;
- my $n_gramm_laenge = $ARGV[0];
- my $minfreq = 3;
- my $korpus = 'Pfad\\zum\\Korpus.txt';
- my $outputdatei = "ngram_fertig.txt";
- open(OUTPUT, "> $outputdatei");
- open (DATEI, "< $korpus");
- while (<DATEI>) {
- chomp ($_);
- my @zeile = split /<\/w>/,$_;
- foreach(@zeile) {
- push (@korpus,$_);
- }
- undef (@zeile);
- }
- foreach (@korpus) {
- if($_ =~ m/lemma=(".*?")>/){
- $lemma = $1;}
- push(@lemmata,$lemma);
- }
- foreach (@lemmata) {
- push (@ngramm_temp,$_);
- my $array_laenge = @ngramm_temp;
- if ($array_laenge == $n_gramm_laenge) {
- my $ngramm = join (" -> ",@ngramm_temp);
- shift @ngramm_temp;
- if (defined $ngramme{$ngramm}) {
- $ngramme{$ngramm}++;
- }
- else {
- $ngramme{$ngramm} = 1;
- }
- }
- }
- print OUTPUT "digraph myGraph {\n";
- foreach my $key (sort { $ngramme{$a} <=> $ngramme{$b} } keys %ngramme) {
- if ($ngramme{$key} >= $minfreq) {
- print OUTPUT "\t$key [weight=\"$ngramme{$key}\"];\n";
- }
- }
- print OUTPUT "}";
- close(DATEI);
- close(OUTPUT);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement