Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """
- Still a better lovestory than Twilight.
- @author: Siim Sams IAPB15
- """
- import csv
- def read_dna_data_from_file(filename): # => string dna
- """
- Arguments:
- filename -- Expects a string that is the dna filename
- Returns:
- A string
- """
- string = ""
- with open(filename) as file:
- for line in file:
- string += line.strip('\n') # Adds a line with no break spaces to the string
- return ''.join(i for i in string if ord(i) < 128) # Returns string with no gibberish
- def transcribe_dna_to_rna(dna): # => string rna
- """
- :param dna:
- String that is in dna form
- :return:
- String that is in RNA form
- """
- string = dna.replace("A", "U")
- string = string.replace("G", "E")
- string = string.replace("C", "G")
- string = string.replace("T", "A")
- string = string.replace("E", "C")
- return string
- def translate_rna_to_protein(rna): # => string protein
- """
- :param rna:
- String that is in rna form
- :return:
- String in protein form
- """
- string = ""
- amino = [("UUU", "F"), ("UUC", "F"), ("UUA", "L"), ("UUG", "L"), ("CUU", "L"), ("CUC", "L"), ("CUA", "L"), ("CUG", "L"), ("AUU", "I"), ("AUC", "I"), ("AUA", "I"), ("AUG", "M"), ("GUU", "V"), ("GUC", "V"), ("GUA", "V"), ("GUG", "V"), ("UCU", "S"), ("UCC", "S"), ("UCA", "S"), ("UCG", "S"), ("CCU", "P"), ("CCC", "P"), ("CCA", "P"), ("CCG", "P"), ("ACU", "T"), ("ACC", "T"), ("ACA", "T"), ("ACG", "T"), ("GCU", "A"), ("GCC", "A"), ("GCA", "A"), ("GCG", "A"), ("UAU", "Y"), ("UAC", "Y"), ("UAA", "Stop"), ("UAG", "Stop"), ("CAU", "H"), ("CAC", "H"), ("CAA", "Q"), ("CAG", "Q"), ("AAU", "N"), ("AAC", "N"), ("AAA", "K"), ("AAG", "K"), ("GAU", "D"), ("GAC", "D"), ("GAA", "E"), ("GAG", "E"), ("UGU", "C"), ("UGC", "C"), ("UGA", "Stop"), ("UGG", "W"), ("CGU", "R"), ("CGC", "R"), ("CGA", "R"), ("CGG", "R"), ("AGU", "S"), ("AGC", "S"), ("AGA", "R"), ("AGG", "R"), ("GGU", "G"), ("GGC", "G"), ("GGA", "G"), ("GGG", "G")]
- for i in range(0, rna.count('') - 1,3): # Loop that advances by 3
- for j in range(len(amino)): # Loop for checking amino values
- if rna[i:i+3] == amino[j][0]: # Converts rna to protein
- string += amino[j][1]
- else:
- pass
- return string
- def determine_species(classification_file): # => dict of species and count
- protein = translate_rna_to_protein(transcribe_dna_to_rna(read_dna_data_from_file('EX05_DNA.txt')))
- array = []
- dict = {}
- names = open(classification_file)
- csv_names = csv.reader(names)
- for row in csv_names: # Adds all rows to array
- array.append(row)
- duplicate_free_proteins = [list(t) for t in set(tuple(element) for element in array[1: len(array)])] # Gets rid of duplicate proteins
- del array[:]
- for i in range(len(duplicate_free_proteins)): # Adds latin names to array
- array.append(duplicate_free_proteins[i][1])
- duplicate_free_names = tuple(set(array))
- for i in range(len(duplicate_free_names)): # Adds duplicate free latin names to dictionary
- dict[duplicate_free_names[i]] = 0
- for i in range(len(duplicate_free_proteins)):
- if duplicate_free_proteins[i][2] in protein:
- key = duplicate_free_proteins[i][1]
- dict[key] += protein.count(duplicate_free_proteins[i][2])
- return dict
- '''
- print(read_dna_data_from_file('EX05_DNA.txt'))
- print(transcribe_dna_to_rna(read_dna_data_from_file('EX05_DNA.txt')))
- print(translate_rna_to_protein(transcribe_dna_to_rna(read_dna_data_from_file('EX05_DNA.txt'))))
- print(determine_species('EX05_Protein.csv'))
- '''
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement