Guest User

Untitled

a guest
Dec 13th, 2020
1,542
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.37 KB | None | 0 0
  1. import csv
  2. import sys
  3. import re
  4.  
  5. # Verify the correct usage.
  6. if re.search(".csv$", sys.argv[1]) and re.search(".txt$", sys.argv[2]):
  7.     ()
  8. else:
  9.     print("Usage: python dna.py data.csv sequence.txt")
  10.     sys.exit(0)
  11.  
  12. with open(sys.argv[1], "r") as csvfile:
  13.     csvreader = csv.DictReader(csvfile) # csvreader contains all of the information in the large csv database.
  14.     strList = list(csvreader.fieldnames) # This list contains only the header file of the database. This starts with names and ends at TCTG.
  15.     database = list(csvreader)
  16.     strList.pop(0) # This will remove/pop "names" and leave us JUST with a list [] of STR types.
  17.  
  18. with open(sys.argv[2], "r") as txtfile:
  19.     string = str(txtfile.read()) # This will read the DNA sequnce into one long string that we can iterate through.
  20.  
  21. madlists = []
  22. for i in range(len(strList)): # Iterate through each STR in the list for comparing to the given sequence.
  23.     search = str(strList[i]) # Iterate one by one
  24.     block = len(search) # Search the length of a given STR, as they vary.
  25.     mcso = int(len(max(re.findall('((?:' + re.escape(search) + ')*)', string), key = len)) / block)
  26.     madlists.append(mcso)
  27.  
  28. for line in database:
  29.     person = line['name']
  30.     numbers = [int(value) for value in line[1:]]
  31.     if numbers == madlists:
  32.         print(person)
  33.         sys.exit(0)
  34. print("No match")
  35. sys.exit(1)
Advertisement
Add Comment
Please, Sign In to add comment