Untitled


# #C = #G, #T = #A
def check1(file):
    f = open(file, 'r')
    text = f.read()
    text = text.replace('\r', '')
    text = text.replace('\n', '')
    c = 0
    g = 0
    t = 0
    a = 0
    for ch in text:
        if ch == 'C':
            c += 1
        if ch == 'G':
            g += 1
        if ch == 'T':
            t += 1
        if ch == 'A':
            a += 1
    c_g = (max(c, g) - min(c, g)) / max(c, g) * 100
    a_t = (max(a, t) - min(a, t)) / max(a, t) * 100
    c_g = round(c_g, 3)
    a_t = round(a_t, 3)
    print('#C = ', c, '\n#G = ', g, '\ndifference C&G = ', c_g, '%', sep = '')
    print('#A = ', a, '\n#T = ', t, '\ndifference A&T = ', a_t, '%', sep = '')
    print()


# слов CG мало в определенных геномах
def check2(file):
    f = open(file, 'r')
    text = f.read()
    text = text.replace('\r', '')
    text = text.replace('\n', '')
    cg = 0
    for i in range(len(text) - 1):
        if text[i] == 'C' and text[i + 1] == 'G':
            cg += 1
    print("#CG = ", cg, sep = '')
    p = 2 * cg / len(text) * 100
    p = round(p, 3)
    print("Percentage: ", p, '%', sep = '')
    print()


# слов TA мало во всех геномах
def check3(file):
    f = open(file, 'r')
    text = f.read()
    text = text.replace('\r', '')
    text = text.replace('\n', '')
    ta = 0
    for i in range(len(text) - 1):
        if text[i] == 'T' and text[i + 1] == 'A':
            ta += 1
    print("#TA = ", ta, sep = '')
    p = 2 * ta / len(text) * 100
    p = round(p, 3)
    print("Percentage: ", p, sep = '')
    print()

# в некоторых геномах #C > #G в одной части и #G > #C в другой части (GC skew)
# проверяем первую половину и вторую половину
def check4(file):
    f = open(file, 'r')
    text = f.read()
    text = text.replace('\r', '')
    text = text.replace('\n', '')
    l = len(text) // 2
    c1 = 0
    g1 = 0
    for i in range(l):
        if text[i] == 'C':
            c1 += 1
        if text[i] == 'G':
            g1 += 1
    print("first half:\n", '#C = ', c1, '\n#G = ', g1, sep = '')
    c2 = 0
    g2 = 0
    for i in range(l, len(text)):
        if text[i] == 'C':
            c2 += 1
        if text[i] == 'G':
            g2 += 1
    print("second half:\n", '#C = ', c2, '\n#G = ', g2, sep = '')

    c_g1 = (max(c1, g1) - min(c1, g1)) / max(c1, g1) * 100
    c_g2 = (max(c2, g2) - min(c2, g2)) / max(c2, g2) * 100
    c_g1 = round(c_g1, 3)
    c_g2 = round(c_g2, 3)

    print('difference between C&G in frist half: ', c_g1, '%', sep = '')
    print('difference between C&G in second half: ', c_g2, '%', sep = '')
    print()


files = ['NC_001802.1.fna', 'NC_002642.fna', 'NC_045512.2.fna']
viruses = ['HIV', 'YABA', 'COVID']

for i in range(len(files)):
    print(viruses[i], ': ')
    check1(files[i])
    check2(files[i])
    check3(files[i])
    check4(files[i])
    print()