Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import csv
- import sys
- import re
- # Verify the correct usage.
- if re.search(".csv$", sys.argv[1]) and re.search(".txt$", sys.argv[2]):
- ()
- else:
- print("Usage: python dna.py data.csv sequence.txt")
- sys.exit(0)
- with open(sys.argv[1], "r") as csvfile:
- csvreader = csv.DictReader(csvfile) # csvreader contains all of the information in the large csv database.
- strList = list(csvreader.fieldnames) # This list contains only the header file of the database. This starts with names and ends at TCTG.
- database = list(csvreader)
- strList.pop(0) # This will remove/pop "names" and leave us JUST with a list [] of STR types.
- with open(sys.argv[2], "r") as txtfile:
- string = str(txtfile.read()) # This will read the DNA sequnce into one long string that we can iterate through.
- madlists = []
- for i in range(len(strList)): # Iterate through each STR in the list for comparing to the given sequence.
- search = str(strList[i]) # Iterate one by one
- block = len(search) # Search the length of a given STR, as they vary.
- mcso = int(len(max(re.findall('((?:' + re.escape(search) + ')*)', string), key = len)) / block)
- madlists.append(mcso)
- for line in database:
- person = line['name']
- numbers = [int(value) for value in line[1:]]
- if numbers == madlists:
- print(person)
- sys.exit(0)
- print("No match")
- sys.exit(1)
Advertisement
Add Comment
Please, Sign In to add comment