duquesne9

anavdna.py

Jan 10th, 2021
861
16 days
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. """Here ends the program. I was trying to go with other approach but
  2. I didn't know how to continue. Here is the code anyway:"""
  3.  
  4. # Identifies a person based on their DNA
  5.  
  6. import sys
  7. import csv
  8.  
  9.  
  10. def main():
  11.     if len(sys.argv) != 3:
  12.         sys.exit("Usage: python dna.py data.csv sequence.txt")
  13.  
  14.     """I'm not sure what this was doing"""
  15.     #index = [range(9)]
  16.  
  17.  
  18.     # Read people into memory from csv file
  19.     with open(sys.argv[1]) as file:
  20.         """You wanted restkey, not restval"""
  21.         reader = csv.DictReader(file, fieldnames=["name"], restkey="SRTs")
  22.         # for row in reader:
  23.         #     people.append(row)
  24.         """
  25.        with database small and sequence 1 this will return 2 objects that looks like:
  26.        SRT_list = ["AGATC", "AAGT", "TATC"]
  27.        people = {'Alice': ['2', '8', '3'], 'Bob': ['4', '1', '5'], 'Charlie': ['3', '2', '5']}
  28.        """
  29.         people = {}
  30.         for index, row in enumerate(reader):
  31.             if index == 0:
  32.                 STR_list = row["SRTs"]
  33.             else:
  34.                 people[row['name']] = row['SRTs']
  35.         # print(people)
  36.  
  37.     # Read DNA sequence into memory from text file
  38.     """Almost always better to use a context manager like above"""
  39.     with open(sys.argv[2], "r") as file2:
  40.         sequence = file2.read()
  41.  
  42.     # file2 = open(sys.argv[2])
  43.     # sequence = file2.read()
  44.     # #print(sequence)
  45.     # file2.close()
  46.  
  47.     """ From now on it's the same thing, I wanted to store the returned
  48.        values of the max_repeat function in something like a sequence or dict.
  49.        I was trying to do that with the next 'for' loop but didn't know
  50.        how to finish it"""
  51.     # for i = 1 in range(9):
  52.     """for i in range(1,9): #this is how you would write the above line, but I don't think you want that."""
  53.     counts = []
  54.     for STR in STR_list:
  55.         """I snuck in a string conversion to make comparisons easier later"""
  56.         counts.append(str(max_repeat(sequence, STR)))
  57.  
  58.     """
  59.    tangent: if you're not familiar with list comprehensions, they're a really powerful tool
  60.    for readability. This one is a little complex so it might be better left long form, but I
  61.    to include it anyway.
  62.  
  63.    counts = [str(max_repeat(sequence, STR)) for STR in STR_list]]
  64.    """
  65.  
  66.     # counts = [ agatc , aatg , tatc ]
  67.     #print(counts)
  68.  
  69.     found = False
  70.     for person in people:
  71.         # print(people[person])
  72.         if counts == people[person]: #and counts[1] == person["AATG"] and counts[2] == person["TATC"]:
  73.             print(person["name"])
  74.             found = True
  75.  
  76.     if not found:
  77.         print("No match")
  78.  
  79.  
  80. """
  81. This function does not work correctly. I spent a lot of time trying to find a solution using
  82. indexing like this but I couldn't wrangle all the edge cases. In the end I went back and used a regex.
  83. Writing regex can be just as finicky, but it worked for me.
  84. """
  85. def max_repeat(string, substring):
  86.     # Returns the maximum number of times that the STR repeats
  87.     max_count=0
  88.     count = 0
  89.     for i in range(0,len(string),len(substring)):
  90.         if string[i-1:i] == substring[:1] and count == 0:
  91.             i = i+(len(substring)-1)
  92.         if string[i-len(substring):i] == substring:
  93.             count+= 1
  94.         else:
  95.             count = 0
  96.         max_count = max(max_count,count)
  97.     return max_count
  98.  
  99.  
  100. """
  101. This is a best practices thing, if you want to look it up you can,
  102. but the short version is it's best to encapsulate your main function call
  103. like this to prevent unintended execution
  104. """
  105. if __name__ == "__main__":
  106.     main()
RAW Paste Data