Advertisement
Guest User

Untitled

a guest
Oct 4th, 2015
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.60 KB | None | 0 0
  1. """
  2. Still a better lovestory than Twilight.
  3.  
  4. @author: Siim Sams IAPB15
  5. """
  6. import csv
  7.  
  8. def read_dna_data_from_file(filename):  # => string dna
  9.     """
  10.    Arguments:
  11.    filename -- Expects a string that is the dna filename
  12.  
  13.    Returns:
  14.    A string
  15.    """
  16.     string = ""
  17.     with open(filename) as file:
  18.         for line in file:
  19.            string += line.strip('\n')  # Adds a line with no break spaces to the string
  20.     return ''.join(i for i in string if ord(i) < 128)  # Returns string with no gibberish
  21.  
  22.  
  23. def transcribe_dna_to_rna(dna):  # => string rna
  24.     """
  25.    :param dna:
  26.    String that is in dna form
  27.    :return:
  28.    String that is in RNA form
  29.    """
  30.     string = dna.replace("A", "U")
  31.     string = string.replace("G", "E")
  32.     string = string.replace("C", "G")
  33.     string = string.replace("T", "A")
  34.     string = string.replace("E", "C")
  35.     return string
  36.  
  37.  
  38. def translate_rna_to_protein(rna):  # => string protein
  39.     """
  40.    :param rna:
  41.    String that is in rna form
  42.    :return:
  43.    String in protein form
  44.    """
  45.     string = ""
  46.     amino = [("UUU", "F"), ("UUC", "F"), ("UUA", "L"), ("UUG", "L"), ("CUU", "L"), ("CUC", "L"), ("CUA", "L"), ("CUG", "L"), ("AUU", "I"), ("AUC", "I"), ("AUA", "I"), ("AUG", "M"), ("GUU", "V"), ("GUC", "V"), ("GUA", "V"), ("GUG", "V"), ("UCU", "S"), ("UCC", "S"), ("UCA", "S"), ("UCG", "S"), ("CCU", "P"), ("CCC", "P"), ("CCA", "P"), ("CCG", "P"), ("ACU", "T"), ("ACC", "T"), ("ACA", "T"), ("ACG", "T"), ("GCU", "A"), ("GCC", "A"), ("GCA", "A"), ("GCG", "A"), ("UAU", "Y"), ("UAC", "Y"), ("UAA", "Stop"), ("UAG", "Stop"), ("CAU", "H"), ("CAC", "H"), ("CAA", "Q"), ("CAG", "Q"), ("AAU", "N"), ("AAC", "N"), ("AAA", "K"), ("AAG", "K"), ("GAU", "D"), ("GAC", "D"), ("GAA", "E"), ("GAG", "E"), ("UGU", "C"), ("UGC", "C"), ("UGA", "Stop"), ("UGG", "W"), ("CGU", "R"), ("CGC", "R"), ("CGA", "R"), ("CGG", "R"), ("AGU", "S"), ("AGC", "S"), ("AGA", "R"), ("AGG", "R"), ("GGU", "G"), ("GGC", "G"), ("GGA", "G"), ("GGG", "G")]
  47.     for i in range(0, rna.count('') - 1,3):  # Loop that advances by 3
  48.         for j in range(len(amino)):  # Loop for checking amino values
  49.             if rna[i:i+3] == amino[j][0]:  # Converts rna to protein
  50.                 string += amino[j][1]
  51.             else:
  52.                 pass
  53.     return string
  54.  
  55.  
  56. def determine_species(classification_file):  # => dict of species and count
  57.     protein = translate_rna_to_protein(transcribe_dna_to_rna(read_dna_data_from_file('EX05_DNA.txt')))
  58.     array = []
  59.     dict = {}
  60.     names = open(classification_file)
  61.     csv_names = csv.reader(names)
  62.     for row in csv_names:  # Adds all rows to array
  63.         array.append(row)
  64.     duplicate_free_proteins = [list(t) for t in set(tuple(element) for element in array[1: len(array)])]  # Gets rid of duplicate proteins
  65.     del array[:]
  66.     for i in range(len(duplicate_free_proteins)):  # Adds latin names to array
  67.         array.append(duplicate_free_proteins[i][1])
  68.     duplicate_free_names = tuple(set(array))
  69.     for i in range(len(duplicate_free_names)):  # Adds duplicate free latin names to dictionary
  70.         dict[duplicate_free_names[i]] = 0
  71.     for i in range(len(duplicate_free_proteins)):
  72.         if duplicate_free_proteins[i][2] in protein:
  73.             key = duplicate_free_proteins[i][1]
  74.             dict[key] += protein.count(duplicate_free_proteins[i][2])
  75.     return dict
  76. '''
  77. print(read_dna_data_from_file('EX05_DNA.txt'))
  78. print(transcribe_dna_to_rna(read_dna_data_from_file('EX05_DNA.txt')))
  79. print(translate_rna_to_protein(transcribe_dna_to_rna(read_dna_data_from_file('EX05_DNA.txt'))))
  80. print(determine_species('EX05_Protein.csv'))
  81. '''
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement