berinkaq

Untitled

Feb 23rd, 2021
470
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1.  
  2. # #C = #G, #T = #A
  3. def check1(file):
  4.     f = open(file, 'r')
  5.     text = f.read()
  6.     text = text.replace('\r', '')
  7.     text = text.replace('\n', '')
  8.     c = 0
  9.     g = 0
  10.     t = 0
  11.     a = 0
  12.     for ch in text:
  13.         if ch == 'C':
  14.             c += 1
  15.         if ch == 'G':
  16.             g += 1
  17.         if ch == 'T':
  18.             t += 1
  19.         if ch == 'A':
  20.             a += 1
  21.     c_g = (max(c, g) - min(c, g)) / max(c, g) * 100
  22.     a_t = (max(a, t) - min(a, t)) / max(a, t) * 100
  23.     c_g = round(c_g, 3)
  24.     a_t = round(a_t, 3)
  25.     print('#C = ', c, '\n#G = ', g, '\ndifference C&G = ', c_g, '%', sep = '')
  26.     print('#A = ', a, '\n#T = ', t, '\ndifference A&T = ', a_t, '%', sep = '')
  27.     print()
  28.  
  29.  
  30. # слов CG мало в определенных геномах
  31. def check2(file):
  32.     f = open(file, 'r')
  33.     text = f.read()
  34.     text = text.replace('\r', '')
  35.     text = text.replace('\n', '')
  36.     cg = 0
  37.     for i in range(len(text) - 1):
  38.         if text[i] == 'C' and text[i + 1] == 'G':
  39.             cg += 1
  40.     print("#CG = ", cg, sep = '')
  41.     p = 2 * cg / len(text) * 100
  42.     p = round(p, 3)
  43.     print("Percentage: ", p, '%', sep = '')
  44.     print()
  45.  
  46.  
  47. # слов TA мало во всех геномах
  48. def check3(file):
  49.     f = open(file, 'r')
  50.     text = f.read()
  51.     text = text.replace('\r', '')
  52.     text = text.replace('\n', '')
  53.     ta = 0
  54.     for i in range(len(text) - 1):
  55.         if text[i] == 'T' and text[i + 1] == 'A':
  56.             ta += 1
  57.     print("#TA = ", ta, sep = '')
  58.     p = 2 * ta / len(text) * 100
  59.     p = round(p, 3)
  60.     print("Percentage: ", p, sep = '')
  61.     print()
  62.  
  63. # в некоторых геномах #C > #G в одной части и #G > #C в другой части (GC skew)
  64. # проверяем первую половину и вторую половину
  65. def check4(file):
  66.     f = open(file, 'r')
  67.     text = f.read()
  68.     text = text.replace('\r', '')
  69.     text = text.replace('\n', '')
  70.     l = len(text) // 2
  71.     c1 = 0
  72.     g1 = 0
  73.     for i in range(l):
  74.         if text[i] == 'C':
  75.             c1 += 1
  76.         if text[i] == 'G':
  77.             g1 += 1
  78.     print("first half:\n", '#C = ', c1, '\n#G = ', g1, sep = '')
  79.     c2 = 0
  80.     g2 = 0
  81.     for i in range(l, len(text)):
  82.         if text[i] == 'C':
  83.             c2 += 1
  84.         if text[i] == 'G':
  85.             g2 += 1
  86.     print("second half:\n", '#C = ', c2, '\n#G = ', g2, sep = '')  
  87.  
  88.     c_g1 = (max(c1, g1) - min(c1, g1)) / max(c1, g1) * 100
  89.     c_g2 = (max(c2, g2) - min(c2, g2)) / max(c2, g2) * 100
  90.     c_g1 = round(c_g1, 3)
  91.     c_g2 = round(c_g2, 3)
  92.    
  93.     print('difference between C&G in frist half: ', c_g1, '%', sep = '')   
  94.     print('difference between C&G in second half: ', c_g2, '%', sep = '')  
  95.     print()
  96.  
  97.  
  98.  
  99. files = ['NC_001802.1.fna', 'NC_002642.fna', 'NC_045512.2.fna']
  100. viruses = ['HIV', 'YABA', 'COVID']
  101.  
  102. for i in range(len(files)):
  103.     print(viruses[i], ': ')
  104.     check1(files[i])
  105.     check2(files[i])
  106.     check3(files[i])
  107.     check4(files[i])
  108.     print()
RAW Paste Data