Guest User

Untitled

a guest
May 25th, 2018
77
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.88 KB | None | 0 0
  1. #returns the protein sequence given a fasta file containing a full DNA sequence and several introns
  2. f = open("RNASplicinginput.txt", "r");
  3. #make dictionary of sequences
  4. d={};
  5. fullID = "";
  6. switch = "off";
  7. for line in f:
  8. if switch == "off":#this identifies the first item in the file as the main sequence
  9. fullID = line.strip()
  10. id = line.strip();
  11. s = "";
  12. switch = "on";
  13. if line[0] == ">": #separates the IDs from the sequences
  14. id = line.strip();
  15. s = "";
  16. else:
  17. s += str(line.strip());
  18. d[id] = s;
  19. f. close();
  20. fullseq = d[fullID];
  21.  
  22. #create dictionary of only introns
  23. inD = {i:d[i] for i in d if i!= fullID};
  24. #remove introns from full sequence
  25. for key in inD:
  26. fullseq = fullseq.replace(inD[key], "");
  27.  
  28. #translates to RNA:
  29. i = 0;
  30. RNA = "";
  31. while i < len(fullseq):
  32. if fullseq[i] == "T":
  33. RNA += "U";
  34. else:
  35. RNA+=str(fullseq[i]);
  36. i +=1;
  37.  
  38. #transcribes RNA to protein
  39. codons = {
  40. "UUU" : "F",
  41. "CUU" : "L",
  42. "AUU" : "I",
  43. "GUU" : "V",
  44. "UUC" : "F",
  45. "CUC" : "L",
  46. "AUC" : "I",
  47. "GUC" : "V",
  48. "UUA" : "L",
  49. "CUA" : "L",
  50. "AUA" : "I",
  51. "GUA" : "V",
  52. "UUG" : "L",
  53. "CUG" : "L",
  54. "AUG" : "M",
  55. "GUG" : "V",
  56. "UCU" : "S",
  57. "CCU" : "P",
  58. "ACU" : "T",
  59. "GCU" : "A",
  60. "UCC" : "S",
  61. "CCC" : "P",
  62. "ACC" : "T",
  63. "GCC" : "A",
  64. "UCA" : "S",
  65. "CCA" : "P",
  66. "ACA" : "T",
  67. "GCA" : "A",
  68. "UCG" : "S",
  69. "CCG" : "P",
  70. "ACG" : "T",
  71. "GCG" : "A",
  72. "UAU" : "Y",
  73. "CAU" : "H",
  74. "AAU" : "N",
  75. "GAU" : "D",
  76. "UAC" : "Y",
  77. "CAC" : "H",
  78. "AAC" : "N",
  79. "GAC" : "D",
  80. "UAA" : "Stop",
  81. "CAA" : "Q",
  82. "AAA" : "K",
  83. "GAA" : "E",
  84. "UAG" : "Stop",
  85. "CAG" : "Q",
  86. "AAG" : "K",
  87. "GAG" : "E",
  88. "UGU" : "C",
  89. "CGU" : "R",
  90. "AGU" : "S",
  91. "GGU" : "G",
  92. "UGC" : "C",
  93. "CGC" : "R",
  94. "AGC" : "S",
  95. "GGC" : "G",
  96. "UGA" : "Stop",
  97. "CGA" : "R",
  98. "AGA" : "R",
  99. "GGA" : "G",
  100. "UGG" : "W",
  101. "CGG" : "R",
  102. "AGG" : "R",
  103. "GGG" : "G"
  104. };
  105. i = 0;
  106. p="";
  107. while i < (len(RNA)-3):
  108. cod = RNA[i:i+3];
  109. p += codons[cod];
  110. i +=3;
  111. print p;
Add Comment
Please, Sign In to add comment