Untitled

"""
Still a better lovestory than Twilight.

@author: Siim Sams IAPB15
"""
import csv

def read_dna_data_from_file(filename):  # => string dna
    """
    Arguments:
    filename -- Expects a string that is the dna filename

    Returns:
    A string
    """
    string = ""
    with open(filename) as file:
        for line in file:
           string += line.strip('\n')  # Adds a line with no break spaces to the string
    return ''.join(i for i in string if ord(i) < 128)  # Returns string with no gibberish


def transcribe_dna_to_rna(dna):  # => string rna
    """
    :param dna:
    String that is in dna form
    :return:
    String that is in RNA form
    """
    string = dna.replace("A", "U")
    string = string.replace("G", "E")
    string = string.replace("C", "G")
    string = string.replace("T", "A")
    string = string.replace("E", "C")
    return string


def translate_rna_to_protein(rna):  # => string protein
    """
    :param rna:
    String that is in rna form
    :return:
    String in protein form
    """
    string = ""
    amino = [("UUU", "F"), ("UUC", "F"), ("UUA", "L"), ("UUG", "L"), ("CUU", "L"), ("CUC", "L"), ("CUA", "L"), ("CUG", "L"), ("AUU", "I"), ("AUC", "I"), ("AUA", "I"), ("AUG", "M"), ("GUU", "V"), ("GUC", "V"), ("GUA", "V"), ("GUG", "V"), ("UCU", "S"), ("UCC", "S"), ("UCA", "S"), ("UCG", "S"), ("CCU", "P"), ("CCC", "P"), ("CCA", "P"), ("CCG", "P"), ("ACU", "T"), ("ACC", "T"), ("ACA", "T"), ("ACG", "T"), ("GCU", "A"), ("GCC", "A"), ("GCA", "A"), ("GCG", "A"), ("UAU", "Y"), ("UAC", "Y"), ("UAA", "Stop"), ("UAG", "Stop"), ("CAU", "H"), ("CAC", "H"), ("CAA", "Q"), ("CAG", "Q"), ("AAU", "N"), ("AAC", "N"), ("AAA", "K"), ("AAG", "K"), ("GAU", "D"), ("GAC", "D"), ("GAA", "E"), ("GAG", "E"), ("UGU", "C"), ("UGC", "C"), ("UGA", "Stop"), ("UGG", "W"), ("CGU", "R"), ("CGC", "R"), ("CGA", "R"), ("CGG", "R"), ("AGU", "S"), ("AGC", "S"), ("AGA", "R"), ("AGG", "R"), ("GGU", "G"), ("GGC", "G"), ("GGA", "G"), ("GGG", "G")]
    for i in range(0, rna.count('') - 1,3):  # Loop that advances by 3
        for j in range(len(amino)):  # Loop for checking amino values
            if rna[i:i+3] == amino[j][0]:  # Converts rna to protein
                string += amino[j][1]
            else:
                pass
    return string


def determine_species(classification_file):  # => dict of species and count
    protein = translate_rna_to_protein(transcribe_dna_to_rna(read_dna_data_from_file('EX05_DNA.txt')))
    array = []
    dict = {}
    names = open(classification_file)
    csv_names = csv.reader(names)
    for row in csv_names:  # Adds all rows to array
        array.append(row)
    duplicate_free_proteins = [list(t) for t in set(tuple(element) for element in array[1: len(array)])]  # Gets rid of duplicate proteins
    del array[:]
    for i in range(len(duplicate_free_proteins)):  # Adds latin names to array
        array.append(duplicate_free_proteins[i][1])
    duplicate_free_names = tuple(set(array))
    for i in range(len(duplicate_free_names)):  # Adds duplicate free latin names to dictionary
        dict[duplicate_free_names[i]] = 0
    for i in range(len(duplicate_free_proteins)):
        if duplicate_free_proteins[i][2] in protein:
            key = duplicate_free_proteins[i][1]
            dict[key] += protein.count(duplicate_free_proteins[i][2])
    return dict
'''
print(read_dna_data_from_file('EX05_DNA.txt'))
print(transcribe_dna_to_rna(read_dna_data_from_file('EX05_DNA.txt')))
print(translate_rna_to_protein(transcribe_dna_to_rna(read_dna_data_from_file('EX05_DNA.txt'))))
print(determine_species('EX05_Protein.csv'))
'''