Collatz - DNA - Protein

from datetime import datetime

# Base-4 ranking: C=0, A=1, T=2, G=3
NUCLEOTIDE_ORDER = ['C', 'A', 'T', 'G']

# Codon to amino acid (1-letter code) mapping
CODON_TO_AA = {
    # Phenylalanine
    'TTT': 'F', 'TTC': 'F',
    # Leucine
    'TTA': 'L', 'TTG': 'L', 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L',
    # Isoleucine
    'ATT': 'I', 'ATC': 'I', 'ATA': 'I',
    # Methionine (Start)
    'ATG': 'M',
    # Valine
    'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
    # Serine
    'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'AGT': 'S', 'AGC': 'S',
    # Proline
    'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P',
    # Threonine
    'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T',
    # Alanine
    'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
    # Tyrosine
    'TAT': 'Y', 'TAC': 'Y',
    # Histidine
    'CAT': 'H', 'CAC': 'H',
    # Glutamine
    'CAA': 'Q', 'CAG': 'Q',
    # Asparagine
    'AAT': 'N', 'AAC': 'N',
    # Lysine
    'AAA': 'K', 'AAG': 'K',
    # Aspartic Acid
    'GAT': 'D', 'GAC': 'D',
    # Glutamic Acid
    'GAA': 'E', 'GAG': 'E',
    # Cysteine
    'TGT': 'C', 'TGC': 'C',
    # Tryptophan
    'TGG': 'W',
    # Arginine
    'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'AGA': 'R', 'AGG': 'R',
    # Glycine
    'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G',
    # Stop codons
    'TAA': '*', 'TAG': '*', 'TGA': '*'
}

def index_to_codon(index):
    """Converts integer [0–63] to codon using base-4 encoding."""
    if 0 <= index <= 63:
        first = NUCLEOTIDE_ORDER[index // 16]
        second = NUCLEOTIDE_ORDER[(index // 4) % 4]
        third = NUCLEOTIDE_ORDER[index % 4]
        return first + second + third
    else:
        return 'NNN'  # Invalid codon

def convert_integer_to_dna_codons(number):
    """Converts a shifted integer (1-based) to codon string (base-64, offset by +1)."""
    if number <= 0:
        raise ValueError("Only positive integers (1 or greater) are valid.")

    codons = []
    number -= 1  # Shift so that CCC starts at 1

    while number >= 0:
        remainder = number % 64
        codons.append(index_to_codon(remainder))
        number = number // 64 - 1 if number >= 64 else -1  # Ensure correct length for base-64 +1 system

    codons.reverse()
    return ''.join(codons)

def translate_to_amino_acids(dna_sequence):
    """Translates DNA triplets into a string of amino acids using standard genetic code."""
    aa_sequence = []
    for i in range(0, len(dna_sequence), 3):
        codon = dna_sequence[i:i+3]
        aa = CODON_TO_AA.get(codon, 'X')  # 'X' for unknown codon
        aa_sequence.append(aa)
    return ''.join(aa_sequence)

def collatz_sequence_with_dna(n, output_file):
    """Performs Collatz steps on n and logs integer + DNA + AA at each step."""
    with open(output_file, 'w') as f:
        step = 0
        while n != 1:
            dna = convert_integer_to_dna_codons(n)
            aa_seq = translate_to_amino_acids(dna)
            f.write(f"Step {step}: {n} -> {dna} -> {aa_seq}\n")
            step += 1
            n = 3 * n + 1 if n % 2 else n // 2
        dna = convert_integer_to_dna_codons(n)
        aa_seq = translate_to_amino_acids(dna)
        f.write(f"Step {step}: {n} -> {dna} -> {aa_seq}\n")  # Final step when n == 1

if __name__ == "__main__":
    try:
        user_input = int(input("Enter a positive integer to start Collatz-DNA sequence: "))
        if user_input <= 0:
            raise ValueError("Only positive integers are allowed.")
    except ValueError as ve:
        print(f"Invalid input: {ve}")
        exit(1)

    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_file = f"collatz_dna_output.txt"

    collatz_sequence_with_dna(user_input, output_file)

    print(f"Collatz-DNA-Amino Acid sequence written to {output_file}")

    from collections import Counter
    from datetime import datetime

    # Path to your input text file
    file_path = R"collatz_dna_output.txt"

    # Create a timestamp for filenames
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

    # Output file paths
    output_file_path = "count_results_{timestamp}.txt"
    summary_file_path = "summary_{timestamp}.txt"

    # Counter to store occurrences
    final_values_counter = Counter()

    # Read and process each line
    with open(file_path, 'r') as file:
        for line in file:
            if '->' in line:
                parts = line.strip().split('->')
                if len(parts) >= 3:
                    final_value = parts[-1].strip()
                    final_values_counter[final_value] += 1

    # Save the full count to the output text file
    with open(output_file_path, 'w') as out_file:
        for value, count in final_values_counter.items():
            out_file.write(f"{value}: {count}\n")

    # Create the summary file
    total_values = sum(final_values_counter.values())
    unique_values = len(final_values_counter)
    duplicates = {k: v for k, v in final_values_counter.items() if v > 1}

    with open(summary_file_path, 'w') as summary_file:
        summary_file.write(f"SUMMARY FILE - {timestamp}\n\n")
        summary_file.write(f"Total values identified: {total_values}\n")
        summary_file.write(f"Unique values: {unique_values}\n")
        summary_file.write(f"Values that occurred more than once:\n")

        if duplicates:
            for value, count in duplicates.items():
                summary_file.write(f"{value}: {count}\n")
        else:
            summary_file.write("None\n")

    print(f"Results saved to:\n{output_file_path}\n{summary_file_path}")