Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env perl
- # hwk6_jchri.pl -- DNA Translation Application
- #
- # Usage : hwk6_jchri.pl [File Name]
- #
- # Written by : John Christin
- # Created on : November 04, 2010
- #
- # Notes: Stop codons are translated as "X". If your seqeunce contains multiple start codons within a single
- # exon only the largest exon will be printed.
- #*******************************************************************************
- $^W = 1;
- use strict;
- #Checks arguements on command line
- if ( @ARGV < 1 ) {
- die "Usage $0 <file path>\n";
- }
- #Checks if file specified on command line exists
- unless (-e @ARGV[0]){
- die "File does not exist\n";
- }
- #Translation lookup table embeded in a function
- sub codonlookuptable{
- #Scalar containing variables being inputed into the function
- my($codon)=@_;
- my %ProteinTable = ( "AAA" => "F",
- "AAG" => "F",
- "AAT" => "L",
- "AAC" => "L",
- "GAA" => "L",
- "GAG" => "L",
- "GAT" => "L",
- "GAC" => "L",
- "TAA" => "I",
- "TAG" => "I",
- "TAC" => "M",
- "CAA" => "V",
- "CAG" => "V",
- "CAT" => "V",
- "CAC" => "V",
- "AGA" => "S",
- "AGG" => "S",
- "AGT" => "S",
- "AGC" => "S",
- "GGA" => "P",
- "GGG" => "P",
- "GGT" => "P",
- "GGC" => "P",
- "TGA" => "T",
- "TGG" => "T",
- "TGT" => "T",
- "TGC" => "T",
- "CGA" => "A",
- "CGG" => "A",
- "CGT" => "A",
- "CGC" => "A",
- "ATA" => "Y",
- "ATG" => "Y",
- "ATT" => "X",
- "ATC" => "X",
- "GTA" => "H",
- "GTG" => "H",
- "GTT" => "Q",
- "GTC" => "Q",
- "TTA" => "N",
- "TTG" => "N",
- "TTT" => "K",
- "TTC" => "K",
- "CTA" => "D",
- "CTG" => "D",
- "CTT" => "E",
- "CTC" => "E",
- "ACA" => "C",
- "ACG" => "C",
- "ACT" => "X",
- "ACC" => "W",
- "GCA" => "R",
- "GCG" => "R",
- "GCT" => "R",
- "GCC" => "R",
- "TCA" => "S",
- "TCC" => "S",
- "TCT" => "R",
- "CCA" => "G",
- "CCG" => "G",
- "CCT" => "G",
- "CCC" => "G",
- );
- if(exists $ProteinTable{$codon}){
- return $ProteinTable{$codon};
- }
- }
- #Storing DNA sequence in a scalar
- my $dna = <>;
- chomp $dna;
- #Check for invalid characters
- if ($dna =~ m/[^ACTG]/) {
- die "Input can only contain ACTG and bases must be UPPERCASE\n";
- }
- else{
- my $protein = '';
- my $inputCodon;
- #for loop which feeds the entire DNA sequence through the codonlookuptable function.
- #It continues to loop untill the length of the ($dna - 2) is greater than the index
- #therefore terminating the loop at the correct point. Increments by threes.
- for(my $index = 0; $index<(length($dna)-2); $index += 3){
- #takes substr 3 characters long and stores it in $inputCodon
- $inputCodon = substr($dna,$index,3);
- # adds each codon lookup result to the end of the protein as it elongates
- # it's very remeniscent of enlongation on the ribosome.
- $protein .= codonlookuptable($inputCodon);
- }
- #Matches using non-greedy matching and stores positives in an array
- my @result = ($protein =~ m/(M.+?X)/g);
- #prints array line by line
- foreach (@result) {
- print "$_\n";
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement