Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import csv
- import sys
- def main():
- # TODO: Check for command-line usage
- if len(sys.argv) != 3 :
- print("Too Many/little Arguments ")
- # TODO: Read database file into a variable
- weu = dict()
- with open (sys.argv[1] , "r") as file:
- reader = csv.DictReader(file)
- for name in reader :
- name["name"] = name["name"]
- name["AGATC"] = int(name['AGATC'])
- name["AATG"] = int(name['AATG'])
- name["TATC"] = int(name['TATC'])
- # TODO: Read DNA sequence file into a variable
- with open(sys.argv[2], "r") as (F):
- reader =csv.reader(F)
- text = next(reader)
- # TODO: Find longest match of each STR in DNA sequence
- A = longest_match(text,"AGATC")
- B = longest_match(text, 'AATG')
- C =longest_match(text, "TATC")
- print(A)
- print(B)
- print(C)
- # TODO: Check database for matching profile
- for name["name"] in name:
- if A == name["AGATC"] and B == name["AATG"] and C== name["TATC"] :
- print(name['name'])
- break
- else :
- print("No match")
- break
- return
- def longest_match(sequence, subsequence):
- """Returns length of longest run of subsequence in sequence."""
- # Initialize variables
- longest_run = 0
- subsequence_length = len(subsequence)
- sequence_length = len(sequence)
- # Check each character in sequence for most consecutive runs of subsequence
- for i in range(sequence_length):
- # Initialize count of consecutive runs
- count = 0
- # Check for a subsequence match in a "substring" (a subset of characters) within sequence
- # If a match, move substring to next potential match in sequence
- # Continue moving substring and checking for matches until out of consecutive matches
- while True:
- # Adjust substring start and end
- start = i + count * subsequence_length
- end = start + subsequence_length
- # If there is a match in the substring
- if sequence[start:end] == subsequence:
- count += 1
- # If there is no match in the substring
- else:
- break
- # Update most consecutive matches found
- longest_run = max(longest_run, count)
- # After checking for runs at each character in seqeuence, return longest run found
- return longest_run
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement