Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env perl
- use warnings;
- use strict;
- use Encode;
- use charnames ':full';
- binmode(STDOUT, ":utf8");
- my $sep = ";";
- # hupsista vaan, automaattinen koodauksen käsittely lähes aina
- open (my $MML, '<:encoding(ISO-8859-10)', $ARGV[0]) or die "ei voi avata: $ARGV[0]\n$!";
- while(<$MML>) {
- chomp;
- my @items = split /$sep/;
- # testaa koltansaamen varalta
- if($items[1] != 5) {
- print;
- print "\n";
- } else {
- # koltansaamen käsittely
- my $koltta = $_;
- $koltta =~ s|\N{LATIN CAPITAL LETTER E WITH OGONEK}
- |\N{LATIN CAPITAL LETTER G WITH STROKE}|gx;
- $koltta =~ s|\N{LATIN SMALL LETTER E WITH OGONEK}
- |\N{LATIN SMALL LETTER G WITH STROKE}|gx;
- $koltta =~ s|\N{LATIN CAPITAL LETTER E WITH DOT ABOVE}
- |\N{LATIN CAPITAL LETTER K WITH CARON}|gx;
- $koltta =~ s|\N{LATIN SMALL LETTER E WITH DOT ABOVE}
- |\N{LATIN SMALL LETTER K WITH CARON}|gx;
- $koltta =~ s|\N{LATIN CAPITAL LETTER O WITH MACRON}
- |\N{LATIN CAPITAL LETTER EZH}|gx;
- $koltta =~ s|\N{LATIN SMALL LETTER O WITH MACRON}
- |\N{LATIN SMALL LETTER EZH}|gx;
- $koltta =~ s|\N{LATIN CAPITAL LETTER O WITH ACUTE}
- |\N{LATIN CAPITAL LETTER EZH WITH CARON}|gx;
- $koltta =~ s|\N{LATIN SMALL LETTER O WITH ACUTE}
- |\N{LATIN SMALL LETTER EZH WITH CARON}|gx;
- $koltta =~ s|\N{LATIN CAPITAL LETTER E WITH DIAERESIS}
- |\N{LATIN CAPITAL LETTER G WITH CARON}|gx;
- $koltta =~ s|\N{LATIN SMALL LETTER E WITH DIAERESIS}
- |\N{LATIN SMALL LETTER G WITH CARON}|gx;
- print $koltta;
- print "\n";
- }
- }
- close $MML;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement