﻿

# EDIT_1_HW_BioInfa

Nov 30th, 2020
554
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. import random
2. from termcolor import colored
3.
4.
5. def makeDNA(countAT, countCG):
6.     countA = int(countAT / 2)
7.     countT = int(countAT / 2)
8.     countC = int(countCG / 2)
9.     countG = int(countCG / 2)
10.     strA = ''.join(random.choice("A") for _ in range(countA))
11.     strT = ''.join(random.choice("T") for _ in range(countT))
12.     strC = ''.join(random.choice("C") for _ in range(countC))
13.     strG = ''.join(random.choice("G") for _ in range(countG))
14.     listDNA = list(strA + strT + strC + strG)
15.     random.shuffle(listDNA)
16.     DNA = ''.join(listDNA)
17.     return DNA
18.
19.
20. def reverseDNA(strDNA):
21.     rDNA = ""
22.     for i in strDNA:
23.         if i == "A":
24.             rDNA = rDNA + "T"
25.         elif i == "T":
26.             rDNA = rDNA + "A"
27.         if i == "G":
28.             rDNA = rDNA + "C"
29.         elif i == "C":
30.             rDNA = rDNA + "G"
31.     return rDNA[::-1]
32.
33.
34. def orf_it_up(strDNA, k):
35.     listORF = []
36.     listAmino = []
37.     start_codon = "ATG"
38.     stop_codons = ["TAA", "TAG", "TGA"]
39.
40.     gencode = {
41.         'ATA': 'I', 'ATC': 'I', 'ATT': 'I', 'ATG': 'M',
42.         'ACA': 'T', 'ACC': 'T', 'ACG': 'T', 'ACT': 'T',
43.         'AAC': 'N', 'AAT': 'N', 'AAA': 'K', 'AAG': 'K',
44.         'AGC': 'S', 'AGT': 'S', 'AGA': 'R', 'AGG': 'R',
45.         'CTA': 'L', 'CTC': 'L', 'CTG': 'L', 'CTT': 'L',
46.         'CCA': 'P', 'CCC': 'P', 'CCG': 'P', 'CCT': 'P',
47.         'CAC': 'H', 'CAT': 'H', 'CAA': 'Q', 'CAG': 'Q',
48.         'CGA': 'R', 'CGC': 'R', 'CGG': 'R', 'CGT': 'R',
49.         'GTA': 'V', 'GTC': 'V', 'GTG': 'V', 'GTT': 'V',
50.         'GCA': 'A', 'GCC': 'A', 'GCG': 'A', 'GCT': 'A',
51.         'GAC': 'D', 'GAT': 'D', 'GAA': 'E', 'GAG': 'E',
52.         'GGA': 'G', 'GGC': 'G', 'GGG': 'G', 'GGT': 'G',
53.         'TCA': 'S', 'TCC': 'S', 'TCG': 'S', 'TCT': 'S',
54.         'TTC': 'F', 'TTT': 'F', 'TTA': 'L', 'TTG': 'L',
55.         'TAC': 'Y', 'TAT': 'Y', 'TAA': '*', 'TAG': '*',
56.         'TGC': 'C', 'TGT': 'C', 'TGA': '*', 'TGG': 'W'}
57.
58.     position = 0
59.     orf = ""
60.     aas = ""
61.     while position < len(strDNA) - 2:
62.         current_codon = strDNA[position:position + 3]
63.
64.         if current_codon == start_codon:
65.
66.             while not (current_codon in stop_codons) and (current_codon in gencode):
67.                 orf += current_codon
68.                 aas += gencode[current_codon]
69.                 position += 3
70.                 current_codon = strDNA[position:position + 3]
71.             orf += current_codon
72.             if current_codon in stop_codons:
73.                 aas += '*'
74.
75.             if len(orf) >= 30 and len(orf) % 3 == 0:
76.                 listORF.append(orf)
77.                 listAmino.append(aas)
78.
79.             orf = ""
80.             aas = ""
81.
82.         position += 3
83.     if listORF:
84.         strDNA1  = strDNA
85.         maxL = max(listORF, key=len)
86.         print(maxL)
87.         print(strDNA1.rindex(maxL)+k, '-', strDNA1.index(maxL)+len(maxL)+k-1)
88.         print(max(listAmino, key=len))
89.     else:
90.         print(colored('Nothing', 'red'))
91.
92.
93. if __name__ == '__main__':
94.     print("Enter length of DNA dnaSize=")
95.     dnaSize = int(input())
96.     print("Enter content percentage of CG percentCG= ")
97.     percentGC = int(input())
98.     percentAT = 100 - percentGC
99.     countGC = int((dnaSize * percentGC) / 100)
100.     countAT = int((dnaSize * percentAT) / 100)
101.     strDNA = makeDNA(countAT, countGC)
102.     print('DNA=', strDNA)
103.     rDNA = reverseDNA(strDNA)
104.     print('rDNA=', rDNA)
105.     phi_genome = strDNA
106.     phi_genome_plus1 = strDNA[1:]
107.     phi_genome_plus2 = strDNA[2:]
108.     phi_genome_reversed = rDNA  # reversed string
109.     phi_genome_reversed_plus1 = rDNA[1:]
110.     phi_genome_reversed_plus2 = rDNA[2:]
111.
112.     all_genomes = [phi_genome, phi_genome_plus1, phi_genome_plus2, phi_genome_reversed, phi_genome_reversed_plus1,
113.                    phi_genome_reversed_plus2]
114.
115.     print(colored('On direct stand', 'blue'))
116.     print(colored('On 1st frame', 'green'))
117.     orf_it_up(all_genomes[0], 1 )
118.     print(colored('On 2nd frame', 'green'))
119.     orf_it_up(all_genomes[1], 2)
120.     print(colored('On 3rd frame', 'green'))
121.     orf_it_up(all_genomes[2], 3)
122.     print(colored('On reverse stand', 'blue'))
123.     print(colored('On 1st frame', 'green'))
124.     orf_it_up(all_genomes[3], 1)
125.     print(colored('On 2nd frame', 'green'))
126.     orf_it_up(all_genomes[4], 2)
127.     print(colored('On 3rd frame', 'green'))
128.     orf_it_up(all_genomes[5], 3)
129.
RAW Paste Data