mjsielerjr1

Mutation Simulator

May 14th, 2021
660
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/env python3
  2.  
  3. ###############################################################################
  4. ##  Title: Mutation Simulator
  5. ##  Author: Michael Sieler
  6. ##  Date: May 10th, 2021
  7. ##  Description: A mutation simulator that calculates sequence similarity of
  8. ##              two or more organisms and prints the result to the screen.
  9. ##  Notes:
  10. ##      - User must define number of organisms, genome size and mutation rate
  11. ##      - Script is organised by Classes, Functions, and Main script
  12. ##      - Script is saved to ACTF as well under the following directory:
  13. ##          `/ACTF/Course/mgb_s21/home/sielerjm/Exams/Exam_2/mutationSimulator.py`
  14. ##      - To run script, enter: `./mutationSimulator.py`
  15. ###############################################################################
  16.  
  17.  
  18. ## Import Libraries
  19. import random
  20.  
  21.  
  22. ###############################################################################
  23. ################################# CLASSES #####################################
  24. ###############################################################################
  25.  
  26.  
  27. class Organism():
  28.     """
  29.    Organism class
  30.        Defines functions for creating an organism object with the following attributes:
  31.            ID: integar
  32.            Generation: counter of how many times the object has "reproduced"
  33.            Size: integar of genome size, defined by user
  34.            Genome: list of nucleotides
  35.            mutCount: mutation counter
  36.            mutRate: mutation rate, defined by user
  37.    """
  38.  
  39.     def __init__(self, id, generation, size, genome, mutCount, mutRate):
  40.         self.id = id  # name of organism
  41.         self.generation = generation  # generation counter
  42.         self.size = size  # genome size
  43.         self.genome = genome[:]  # genome sequence
  44.         self.mutCount = mutCount  # mutation counter
  45.         self.mutRate = mutRate  # Default rate is 1 in a million
  46.  
  47.     # Getters
  48.     def get_id(self):
  49.         return self.id
  50.  
  51.     def get_genome(self):
  52.         return self.genome[:]
  53.  
  54.     def get_generation(self):
  55.         return self.generation
  56.  
  57.     def get_size(self):
  58.         return self.size
  59.  
  60.     def get_mutRate(self):
  61.         return self.mutRate
  62.  
  63.     def get_mutCount(self):
  64.         return self.mutCount
  65.  
  66.     def reproduce(self):
  67.         temp_genome = self.get_genome()[:]  # copies genome from previous generation
  68.  
  69.         for index in range(100): # repeat 100 times
  70.             if ((index + 1) % 10) == 0:
  71.                 print("Generation #{}:".format(index+1))
  72.                 print("\tMutation Count: {}".format(self.get_mutCount()))
  73.  
  74.             for index in range(self.get_size()):  # copies one base at a time with a chance of mutation
  75.                 temp_mutation = random.randint(0, (self.mutRate - 1) )  # subtract one because starts at zero
  76.  
  77.                 if temp_mutation == 0:  # arbitrary number
  78.                     temp_genome[index] = self.mutation(temp_genome[index])
  79.                     self.set_mutCount()
  80.  
  81.             self.set_generation()  # Increases generation by 1
  82.  
  83.         self.set_genome(temp_genome)
  84.  
  85.  
  86.     # Setters
  87.  
  88.     def set_id(self, num):
  89.         self.id = num
  90.  
  91.     def mutation(self, base):
  92.         temp_base = base
  93.  
  94.         if temp_base == "A":
  95.             TGC = {0:"T", 1:"G", 2:"C"}
  96.             temp_base = random.choice(list(TGC.values()))
  97.  
  98.         elif temp_base == "T":
  99.             AGC = {0:"A", 1:"G", 2:"C"}
  100.             temp_base = random.choice(list(AGC.values()))
  101.  
  102.         elif temp_base == "G":
  103.             ATC = {0:"A", 1:"T", 2:"C"}
  104.             temp_base = random.choice(list(ATC.values()))
  105.  
  106.         elif temp_base == "C":
  107.             ATG = {0:"A", 1:"T", 2:"G"}
  108.             temp_base = random.choice(list(ATG.values()))
  109.  
  110.         return temp_base
  111.  
  112.     def set_mutCount(self):  # Increases the mutation count by 1
  113.         self.mutCount += 1
  114.  
  115.     def set_newGenome(self):
  116.         self.genome = self.randomsequence(self.get_size())
  117.  
  118.     def set_genome(self, sequence):
  119.         self.genome = sequence[:]
  120.  
  121.     def set_generation(self):
  122.         self.generation += 1
  123.  
  124.     def randomsequence(self, size):
  125.         ATGC = ["A", "T", "G", "C"]
  126.         temp = []  # Creates an empty string
  127.  
  128.         for index in range(size):
  129.             temp.append(ATGC[random.randint(0, 3)])  # randomly picks a nucleotide and adds to empty string
  130.             #print("inside randomsequence(): #{}".format(index))
  131.  
  132.         return temp
  133.  
  134.  
  135. ###############################################################################
  136. ################################ FUNCTIONS ####################################
  137. ###############################################################################
  138.  
  139.  
  140. # Calculates the difference and returns sequence similarity between two organisms
  141. def calcNumDiff(org1, org2):
  142.     temp_diff = 0
  143.     seqA = org1.get_genome()[:]
  144.     seqB = org2.get_genome()[:]
  145.     for index in range(len(seqA)):
  146.         if seqA[index] != seqB[index]:
  147.             temp_diff += 1
  148.     return (1 - (temp_diff/org1.get_size()))
  149.  
  150.  
  151. # Prints basic information about the organism
  152. def printOrganismInfo(org):
  153.     print("\nOrganism #{} \n  Genome size: {} \n  Number of generations: {} \n  Mutation count: {} \n  Mutation rate: {}  \n  First 20 nucleotides: {}\n".format(org.get_id(), org.get_size(), org.get_generation(), org.get_mutCount(), org.get_mutRate(), org.get_genome()[0:20]))
  154.  
  155.  
  156. ###############################################################################
  157. ############################# START OF SCRIPT #################################
  158. ###############################################################################
  159.  
  160. print("Running Mutation Simulator, written by Michael Sieler 2021\n")
  161.  
  162. # Initialize Variables
  163. numOrganisms = 3
  164. organism = []
  165. diffCount = 0
  166. setGenomeSize = 2*10**6
  167. setMutRate = 1*10**6
  168.  
  169. for number in range(numOrganisms):
  170.     if number == 0: # Generates a parent organism
  171.         organism.append(number)
  172.  
  173.         # id, generation, size, genome, mutCount, mutRate
  174.         organism[number] = Organism(0, 0, setGenomeSize, "", 0, setMutRate)
  175.  
  176.         organism[number].set_newGenome()
  177.         printOrganismInfo(organism[number])
  178.  
  179.     elif number > 0: # Generates child organisms
  180.         organism.append(number)
  181.  
  182.         # Copies genome from parent organism
  183.         organism[number] = Organism(number, 0, organism[0].get_size(), organism[0].get_genome()[:], 0, organism[0].get_mutRate())
  184.  
  185.         printOrganismInfo(organism[number]) # Prints some basic info BEFORE mutations
  186.         organism[number].reproduce()
  187.         printOrganismInfo(organism[number]) # Prints some basic info AFTER mutations
  188.  
  189.  
  190.  
  191. # Sequence similarity between organisms 1 and 2
  192. print("Sequence similarity between 1 and 2: {}".format(calcNumDiff(organism[1], organism[2])))
  193.  
  194. # Sequence Similarity of child organisms to parent
  195. print("Sequence similarity between 1 and 0: {}".format(calcNumDiff(organism[1], organism[0])))
  196. print("Sequence similarity between 2 and 0: {}".format(calcNumDiff(organism[2], organism[0])))
  197.  
RAW Paste Data