Untitled


# #C = #G, #T = #A
def check1(file):
    f = open(file, 'r')
    text = f.read()
    text = text.replace('\r', '')
    text = text.replace('\n', '')
    c = 0
    g = 0
    t = 0
    a = 0
    for ch in text:
        if ch == 'C':
            c += 1
        if ch == 'G':
            g += 1
        if ch == 'T':
            t += 1
        if ch == 'A':
            a += 1
    c_g = (max(c, g) - min(c, g)) / max(c, g) * 100
    print('#C = ', c, '\n#G = ', g, '\ndifference C&G = ', c_g, '%', sep = '')
    a_t = (max(a, t) - min(a, t)) / max(a, t) * 100
    print('#A = ', a, '\n#T = ', t, '\ndifference A&T = ', a_t, '%', sep = '')
    print()


# слов CG мало в определенных геномах
def check2(file):
    f = open(file, 'r')
    text = f.read()
    text = text.replace('\r', '')
    text = text.replace('\n', '')
    cg = 0
    for i in range(len(text) - 1):
        if text[i] == 'C' and text[i + 1] == 'G':
            cg += 1
    print("#CG = ", cg, sep = '')
    print()


# слов TA мало во всех геномах
def check3(file):
    f = open(file, 'r')
    text = f.read()
    text = text.replace('\r', '')
    text = text.replace('\n', '')
    ta = 0
    for i in range(len(text) - 1):
        if text[i] == 'T' and text[i + 1] == 'A':
            ta += 1
    print("#TA = ", ta, sep = '')
    print()

# в некоторых геномах #C > #G в одной части и #G > #C в другой части (GC skew)
# проверяем первую половину и вторую половину
def check4(file):
    f = open(file, 'r')
    text = f.read()
    text = text.replace('\r', '')
    text = text.replace('\n', '')
    l = len(text) // 2
    c = 0
    g = 0
    for i in range(l):
        if text[i] == 'C':
            c += 1
        if text[i] == 'G':
            g += 1
    print("first half:\n", '#C = ', c, '\n#G = ', g, sep = '')
    c = 0
    g = 0
    for i in range(l, len(text)):
        if text[i] == 'C':
            c += 1
        if text[i] == 'G':
            g += 1
    print("second half:\n", '#C = ', c, '\n#G = ', g, sep = '')
    print()


files = ['NC_001802.1.fna', 'NC_002642.fna', 'NC_045512.2.fna']
viruses = ['HIV', 'YABA', 'COVID']

for i in range(len(files)):
    print(viruses[i], ': ')
    check1(files[i])
    check2(files[i])
    check3(files[i])
    check4(files[i])
    print()