Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/perl -w
- #
- # Claudia Gahleitner 0556004
- #
- # Exercise 5
- use strict;
- use warnings;
- if ($#ARGV != 1)
- {
- die ("USAGE: ex5.pl input.txt output.txt\n");
- }
- my $idx = 0;
- my $input = $ARGV[$idx++];
- my $output = $ARGV[$idx++];
- open(INFO, "< $input") || die("kann coding.fa nicht öffnen: $!");
- open(OUTFILE, "> $output") or die "Kann output.txt nicht öffnen: $!";
- my @coding = <INFO>;
- my $i;
- my $j;
- my @newcoding;
- chomp @coding;
- foreach $i (@coding)
- {
- if ($i =~ /^[A|G|C|T]+$/i)
- {
- push (@newcoding,$i);
- }
- elsif ($i =~ /^>(.+)/ && @newcoding)
- {
- my $mrna = transcript(@newcoding);
- my @protein = translate($mrna);
- print "$1: @protein\n";
- @newcoding = [];
- }
- }
- my $mrna = transcript(@newcoding);
- #print "mrna: $mrna\n";
- my @protein = translate($mrna);
- sub transcript
- {
- my @temp;
- foreach $i (@_)
- {
- $i =~ s/T/U/g;
- push (@temp,$i);
- }
- return reverse(join("", @temp));
- }
- sub translate
- {
- my $mrna = pop(@_);
- # print "now: $mrna\n";
- my %codonTable = # associates base-triplets to the amino-acids they encode
- (
- "CUU" => "Leu", "CUC" => "Leu", "CUA" => "Leu", "CUG" => "Leu", "UUA" => "Leu", "UUG" => "Leu",
- "UCU" => "Ser", "UCC" => "Ser", "UCA" => "Ser", "UCG" => "Ser", "AGU" => "Ser", "AGC" => "Ser",
- "CGU" => "Arg", "CGC" => "Arg", "CGA" => "Arg", "CGG" => "Arg", "AGA" => "Arg", "AGG" => "Arg",
- "GGU" => "Gly", "GGC" => "Gly", "GGA" => "Gly", "GGG" => "Gly",
- "GCU" => "Ala", "GCC" => "Ala", "GCA" => "Ala", "GCG" => "Ala",
- "GUU" => "Val", "GUC" => "Val", "GUA" => "Val", "GUG" => "Val",
- "ACU" => "Thr", "ACC" => "Thr", "ACA" => "Thr", "ACG" => "Thr",
- "CCU" => "Pro", "CCC" => "Pro", "CCA" => "Pro", "CCG" => "Pro",
- "AUU" => "Ile", "AUC" => "Ile", "AUA" => "Ile",
- "UAU" => "Tyr", "UAC" => "Tyr",
- "UUU" => "Phe", "UUC" => "Phe",
- "UGU" => "Cys", "UGC" => "Cys",
- "AAU" => "Asn", "AAC" => "Asn",
- "GAU" => "Asp", "GAC" => "Asp",
- "CAA" => "Gln", "CAG" => "Gln",
- "GAA" => "Glu", "GAG" => "Glu",
- "CAU" => "His", "CAC" => "His",
- "AAA" => "Lys", "AAG" => "Lys",
- "AUG" => "Met",
- "UGG" => "Trp",
- "UAA" => "___", "UAG" => "___", "UGA" => "___",
- );
- # every protein starts with Methinin (AUG)
- my $start = index($mrna, "AUG");
- if ($start == -1)
- {
- print "$mrna\n";
- die("ERROR: no start-codon found. Bailing out!");
- }
- my @protein;
- while (length($mrna) > 3)
- {
- my $codon = substr($mrna, $start, 3);
- $mrna = substr($mrna, 3);
- if (!exists($codonTable{$codon}))
- {
- die ("ERROR: $codon does not encode a valid amino acid. Bailing out!");
- }
- my $amino = $codonTable{$codon};
- if (length($codon) < 3 || !$amino)
- {
- die("ERROR: no stop-codon found.");
- }
- elsif ($amino eq "___") # we hit a stop-codon
- {
- last;
- }
- {
- push(@protein, $amino);
- }
- }
- return @protein;
- }
- close(INFO);
- close(OUTFILE);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement