Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- codon_table = {
- "UUU": "F", "UUC": "F", "UUA": "L", "UUG": "L", "UCU": "S",
- "UCC": "S", "UCA": "S", "UCG": "S", "UAU": "Y", "UAC": "Y",
- "UAA": "_", "UAG": "_", "UGA": "_", "UGU": "C", "UGC": "C",
- "UGG": "W", "CUU": "L", "CUC": "L", "CUA": "L", "CUG": "L",
- "CCU": "P", "CCC": "P", "CCA": "P", "CCG": "P", "CAU": "H",
- "CAC": "H", "CAA": "Q", "CAG": "Q", "CGU": "R", "CGC": "R",
- "CGA": "R", "CGG": "R", "AUU": "I", "AUC": "I", "AUA": "I",
- "AUG": "M", "ACU": "T", "ACC": "T", "ACA": "T", "ACG": "T",
- "AAU": "N", "AAC": "N", "AAA": "K", "AAG": "K", "AGU": "S",
- "AGC": "S", "AGA": "R", "AGG": "R", "GUU": "V", "GUC": "V",
- "GUA": "V", "GUG": "V", "GCU": "A", "GCC": "A", "GCA": "A",
- "GCG": "A", "GAU": "D", "GAC": "D", "GAA": "E", "GAG": "E",
- "GGU": "G", "GGC": "G", "GGA": "G", "GGG": "G"}
- >seq1
- ATGCTGATGATAGGTATGGGTA
- GATAGATGAGAGAGATGAGAAT
- >seq2
- ATGCGATGATAGATG
- >seq3
- ATGC
- def dna_to_protein(inputFilename, outputFilename="protein.fasta"):
- inputFile = open(inputFilename, "r")
- outputFile = open(outputFilename, "w")
- for line in inputFile:
- line = line.strip()
- if line and not line.startswith('>'):
- line = line.replace("T","U")
- for i in range(0,len(line),3):
- codon=line[i:i+3]
- if len(codon) == 3:
- amino=codon_table[codon]
- if amino != "_":
- line = line + amino
- elif amino == "_":
- line = line + amino
- return line
- print(line, file=outputFile)
- dna_to_protein("dna.fasta", "protein.fasta")
- >seq1
- MLMIGMGR_MREMRX
- >seq2
- MR__M
- >seq3
- MX
Add Comment
Please, Sign In to add comment