Advertisement
Guest User

Untitled

a guest
Jun 26th, 2017
60
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.43 KB | None | 0 0
  1. #!/usr/bin/env perl
  2. # hwk6_jchri.pl -- DNA Translation Application
  3. #
  4. # Usage : hwk6_jchri.pl [File Name]
  5. #
  6. # Written by : John Christin
  7. # Created on : November 04, 2010
  8. #
  9. # Notes: Stop codons are translated as "X". If your seqeunce contains multiple start codons within a single
  10. # exon only the largest exon will be printed.
  11. #*******************************************************************************
  12.  
  13. $^W = 1;
  14. use strict;
  15. #Checks arguements on command line
  16. if ( @ARGV < 1 ) {
  17. die "Usage $0 <file path>\n";
  18. }
  19. #Checks if file specified on command line exists
  20. unless (-e @ARGV[0]){
  21. die "File does not exist\n";
  22. }
  23. #Translation lookup table embeded in a function
  24. sub codonlookuptable{
  25. #Scalar containing variables being inputed into the function
  26. my($codon)=@_;
  27. my %ProteinTable = ( "AAA" => "F",
  28. "AAG" => "F",
  29. "AAT" => "L",
  30. "AAC" => "L",
  31. "GAA" => "L",
  32. "GAG" => "L",
  33. "GAT" => "L",
  34. "GAC" => "L",
  35. "TAA" => "I",
  36. "TAG" => "I",
  37. "TAC" => "M",
  38. "CAA" => "V",
  39. "CAG" => "V",
  40. "CAT" => "V",
  41. "CAC" => "V",
  42. "AGA" => "S",
  43. "AGG" => "S",
  44. "AGT" => "S",
  45. "AGC" => "S",
  46. "GGA" => "P",
  47. "GGG" => "P",
  48. "GGT" => "P",
  49. "GGC" => "P",
  50. "TGA" => "T",
  51. "TGG" => "T",
  52. "TGT" => "T",
  53. "TGC" => "T",
  54. "CGA" => "A",
  55. "CGG" => "A",
  56. "CGT" => "A",
  57. "CGC" => "A",
  58. "ATA" => "Y",
  59. "ATG" => "Y",
  60. "ATT" => "X",
  61. "ATC" => "X",
  62. "GTA" => "H",
  63. "GTG" => "H",
  64. "GTT" => "Q",
  65. "GTC" => "Q",
  66. "TTA" => "N",
  67. "TTG" => "N",
  68. "TTT" => "K",
  69. "TTC" => "K",
  70. "CTA" => "D",
  71. "CTG" => "D",
  72. "CTT" => "E",
  73. "CTC" => "E",
  74. "ACA" => "C",
  75. "ACG" => "C",
  76. "ACT" => "X",
  77. "ACC" => "W",
  78. "GCA" => "R",
  79. "GCG" => "R",
  80. "GCT" => "R",
  81. "GCC" => "R",
  82. "TCA" => "S",
  83. "TCC" => "S",
  84. "TCT" => "R",
  85. "CCA" => "G",
  86. "CCG" => "G",
  87. "CCT" => "G",
  88. "CCC" => "G",
  89. );
  90. if(exists $ProteinTable{$codon}){
  91. return $ProteinTable{$codon};
  92. }
  93. }
  94. #Storing DNA sequence in a scalar
  95. my $dna = <>;
  96. chomp $dna;
  97. #Check for invalid characters
  98. if ($dna =~ m/[^ACTG]/) {
  99. die "Input can only contain ACTG and bases must be UPPERCASE\n";
  100. }
  101. else{
  102. my $protein = '';
  103. my $inputCodon;
  104. #for loop which feeds the entire DNA sequence through the codonlookuptable function.
  105. #It continues to loop untill the length of the ($dna - 2) is greater than the index
  106. #therefore terminating the loop at the correct point. Increments by threes.
  107. for(my $index = 0; $index<(length($dna)-2); $index += 3){
  108. #takes substr 3 characters long and stores it in $inputCodon
  109. $inputCodon = substr($dna,$index,3);
  110. # adds each codon lookup result to the end of the protein as it elongates
  111. # it's very remeniscent of enlongation on the ribosome.
  112. $protein .= codonlookuptable($inputCodon);
  113. }
  114. #Matches using non-greedy matching and stores positives in an array
  115. my @result = ($protein =~ m/(M.+?X)/g);
  116. #prints array line by line
  117. foreach (@result) {
  118. print "$_\n";
  119. }
  120. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement