Guest User

Collatz - DNA - Protein

a guest
Jun 21st, 2025
42
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.60 KB | None | 0 0
  1. from datetime import datetime
  2.  
  3. # Base-4 ranking: C=0, A=1, T=2, G=3
  4. NUCLEOTIDE_ORDER = ['C', 'A', 'T', 'G']
  5.  
  6. # Codon to amino acid (1-letter code) mapping
  7. CODON_TO_AA = {
  8. # Phenylalanine
  9. 'TTT': 'F', 'TTC': 'F',
  10. # Leucine
  11. 'TTA': 'L', 'TTG': 'L', 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L',
  12. # Isoleucine
  13. 'ATT': 'I', 'ATC': 'I', 'ATA': 'I',
  14. # Methionine (Start)
  15. 'ATG': 'M',
  16. # Valine
  17. 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
  18. # Serine
  19. 'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'AGT': 'S', 'AGC': 'S',
  20. # Proline
  21. 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P',
  22. # Threonine
  23. 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T',
  24. # Alanine
  25. 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
  26. # Tyrosine
  27. 'TAT': 'Y', 'TAC': 'Y',
  28. # Histidine
  29. 'CAT': 'H', 'CAC': 'H',
  30. # Glutamine
  31. 'CAA': 'Q', 'CAG': 'Q',
  32. # Asparagine
  33. 'AAT': 'N', 'AAC': 'N',
  34. # Lysine
  35. 'AAA': 'K', 'AAG': 'K',
  36. # Aspartic Acid
  37. 'GAT': 'D', 'GAC': 'D',
  38. # Glutamic Acid
  39. 'GAA': 'E', 'GAG': 'E',
  40. # Cysteine
  41. 'TGT': 'C', 'TGC': 'C',
  42. # Tryptophan
  43. 'TGG': 'W',
  44. # Arginine
  45. 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'AGA': 'R', 'AGG': 'R',
  46. # Glycine
  47. 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G',
  48. # Stop codons
  49. 'TAA': '*', 'TAG': '*', 'TGA': '*'
  50. }
  51.  
  52. def index_to_codon(index):
  53. """Converts integer [0–63] to codon using base-4 encoding."""
  54. if 0 <= index <= 63:
  55. first = NUCLEOTIDE_ORDER[index // 16]
  56. second = NUCLEOTIDE_ORDER[(index // 4) % 4]
  57. third = NUCLEOTIDE_ORDER[index % 4]
  58. return first + second + third
  59. else:
  60. return 'NNN' # Invalid codon
  61.  
  62. def convert_integer_to_dna_codons(number):
  63. """Converts a shifted integer (1-based) to codon string (base-64, offset by +1)."""
  64. if number <= 0:
  65. raise ValueError("Only positive integers (1 or greater) are valid.")
  66.  
  67. codons = []
  68. number -= 1 # Shift so that CCC starts at 1
  69.  
  70. while number >= 0:
  71. remainder = number % 64
  72. codons.append(index_to_codon(remainder))
  73. number = number // 64 - 1 if number >= 64 else -1 # Ensure correct length for base-64 +1 system
  74.  
  75. codons.reverse()
  76. return ''.join(codons)
  77.  
  78. def translate_to_amino_acids(dna_sequence):
  79. """Translates DNA triplets into a string of amino acids using standard genetic code."""
  80. aa_sequence = []
  81. for i in range(0, len(dna_sequence), 3):
  82. codon = dna_sequence[i:i+3]
  83. aa = CODON_TO_AA.get(codon, 'X') # 'X' for unknown codon
  84. aa_sequence.append(aa)
  85. return ''.join(aa_sequence)
  86.  
  87. def collatz_sequence_with_dna(n, output_file):
  88. """Performs Collatz steps on n and logs integer + DNA + AA at each step."""
  89. with open(output_file, 'w') as f:
  90. step = 0
  91. while n != 1:
  92. dna = convert_integer_to_dna_codons(n)
  93. aa_seq = translate_to_amino_acids(dna)
  94. f.write(f"Step {step}: {n} -> {dna} -> {aa_seq}\n")
  95. step += 1
  96. n = 3 * n + 1 if n % 2 else n // 2
  97. dna = convert_integer_to_dna_codons(n)
  98. aa_seq = translate_to_amino_acids(dna)
  99. f.write(f"Step {step}: {n} -> {dna} -> {aa_seq}\n") # Final step when n == 1
  100.  
  101. if __name__ == "__main__":
  102. try:
  103. user_input = int(input("Enter a positive integer to start Collatz-DNA sequence: "))
  104. if user_input <= 0:
  105. raise ValueError("Only positive integers are allowed.")
  106. except ValueError as ve:
  107. print(f"Invalid input: {ve}")
  108. exit(1)
  109.  
  110. timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
  111. output_file = f"collatz_dna_output.txt"
  112.  
  113. collatz_sequence_with_dna(user_input, output_file)
  114.  
  115. print(f"Collatz-DNA-Amino Acid sequence written to {output_file}")
  116.  
  117. from collections import Counter
  118. from datetime import datetime
  119.  
  120. # Path to your input text file
  121. file_path = R"collatz_dna_output.txt"
  122.  
  123. # Create a timestamp for filenames
  124. timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
  125.  
  126. # Output file paths
  127. output_file_path = "count_results_{timestamp}.txt"
  128. summary_file_path = "summary_{timestamp}.txt"
  129.  
  130. # Counter to store occurrences
  131. final_values_counter = Counter()
  132.  
  133. # Read and process each line
  134. with open(file_path, 'r') as file:
  135. for line in file:
  136. if '->' in line:
  137. parts = line.strip().split('->')
  138. if len(parts) >= 3:
  139. final_value = parts[-1].strip()
  140. final_values_counter[final_value] += 1
  141.  
  142. # Save the full count to the output text file
  143. with open(output_file_path, 'w') as out_file:
  144. for value, count in final_values_counter.items():
  145. out_file.write(f"{value}: {count}\n")
  146.  
  147. # Create the summary file
  148. total_values = sum(final_values_counter.values())
  149. unique_values = len(final_values_counter)
  150. duplicates = {k: v for k, v in final_values_counter.items() if v > 1}
  151.  
  152. with open(summary_file_path, 'w') as summary_file:
  153. summary_file.write(f"SUMMARY FILE - {timestamp}\n\n")
  154. summary_file.write(f"Total values identified: {total_values}\n")
  155. summary_file.write(f"Unique values: {unique_values}\n")
  156. summary_file.write(f"Values that occurred more than once:\n")
  157.  
  158. if duplicates:
  159. for value, count in duplicates.items():
  160. summary_file.write(f"{value}: {count}\n")
  161. else:
  162. summary_file.write("None\n")
  163.  
  164. print(f"Results saved to:\n{output_file_path}\n{summary_file_path}")
  165.  
Add Comment
Please, Sign In to add comment