Untitled

#!/usr/bin/env  perl
# hwk6_jchri.pl   --  DNA Translation Application
#
# Usage                : hwk6_jchri.pl [File Name]
#
# Written by           : John Christin
# Created on           : November 04, 2010
#
# Notes: Stop codons are translated as "X". If your seqeunce contains multiple start codons within a single
# exon only the largest exon will be printed.
#*******************************************************************************

$^W = 1;
use strict;
#Checks arguements on command line
if ( @ARGV < 1 ) {
		die "Usage $0 <file path>\n";
	}
#Checks if file specified on command line exists
unless (-e @ARGV[0]){
		die "File does not exist\n";
	}
#Translation lookup table embeded in a function
sub codonlookuptable{
	#Scalar containing variables being inputed into the function
    my($codon)=@_;
	my %ProteinTable = 	(			"AAA" => "F",
									"AAG" => "F",
									"AAT" => "L",
									"AAC" => "L",
									"GAA" => "L",
									"GAG" => "L",
									"GAT" => "L",
									"GAC" => "L",
									"TAA" => "I",
									"TAG" => "I",
									"TAC" => "M",
									"CAA" => "V",
									"CAG" => "V",
									"CAT" => "V",
									"CAC" => "V",
									"AGA" => "S",
									"AGG" => "S",
									"AGT" => "S",
									"AGC" => "S",
									"GGA" => "P",
									"GGG" => "P",
									"GGT" => "P",
									"GGC" => "P",
									"TGA" => "T",
									"TGG" => "T",
									"TGT" => "T",
									"TGC" => "T",
									"CGA" => "A",
									"CGG" => "A",
									"CGT" => "A",
									"CGC" => "A",
									"ATA" => "Y",
									"ATG" => "Y",
									"ATT" => "X",
									"ATC" => "X",
									"GTA" => "H",
									"GTG" => "H",
									"GTT" => "Q",
									"GTC" => "Q",
									"TTA" => "N",
									"TTG" => "N",
									"TTT" => "K",
									"TTC" => "K",
									"CTA" => "D",
									"CTG" => "D",
									"CTT" => "E",
									"CTC" => "E",
									"ACA" => "C",
									"ACG" => "C",
									"ACT" => "X",
									"ACC" => "W",
									"GCA" => "R",
									"GCG" => "R",
									"GCT" => "R",
									"GCC" => "R",
									"TCA" => "S",
									"TCC" => "S",
									"TCT" => "R",
									"CCA" => "G",
									"CCG" => "G",
									"CCT" => "G",
									"CCC" => "G",
									);
				if(exists $ProteinTable{$codon}){
					return $ProteinTable{$codon};
					}
}
#Storing DNA sequence in a scalar
my $dna = <>;
chomp $dna;
#Check for invalid characters
if ($dna =~ m/[^ACTG]/) {
	die "Input can only contain ACTG and bases must be UPPERCASE\n";
	}
else{
	my $protein = '';
	my $inputCodon;
		#for loop which feeds the entire DNA sequence through the codonlookuptable function.
		#It continues to loop untill the length of the ($dna - 2) is greater than the index
		#therefore terminating the loop at the correct point. Increments by threes.
		for(my $index = 0; $index<(length($dna)-2); $index += 3){
			#takes substr 3 characters long and stores it in $inputCodon
			$inputCodon = substr($dna,$index,3);
			# adds each codon lookup result to the end of the protein as it elongates
			# it's very remeniscent of enlongation on the ribosome.
			$protein .=  codonlookuptable($inputCodon);
		}
	#Matches using non-greedy matching and stores positives in an array
	my @result = ($protein =~ m/(M.+?X)/g);
	#prints array line by line
	foreach (@result) {
		print "$_\n";
	}
}