Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import csv
- import sys
- from sys import argv, exit
- results = {} #a dict function to put the sequence results in
- if len(argv) != 3:
- print("Please input a valid DNA database followed by a valid DNA sequence")
- exit(1)
- DB_file = csv.reader(open(argv[1]))
- DB_file = list(DB_file)
- STR = (DB_file[0][1:]) #takes keys by looking at row 0 and column 1 and on
- K = len(STR) #how many DNA keys are in the database
- row_count = len(DB_file)
- seq_file = open(argv[2], "r")
- sequence = seq_file.read()
- L = len(sequence)
- counter = 0
- max_counter = 0
- for i in range(K): #go through every STR in database
- key_length = len(STR[i])
- for j in range(L): #go through entire sequance
- if STR[i] == sequence[j:j + key_length]:
- counter = counter + 1
- if j + key_length < L:
- j = j + key_length
- if STR[i] != sequence[j:j + key_length]:
- if counter > max_counter:
- max_counter = counter
- counter = 0
- else:
- counter = 0
- results[STR[i]] = max_counter
- max_counter = 0
- print(results)
- match = 0
- match_true = 0 #to change to 1 to indicate a match
- for h in range(K): #for each STR. K is the total amount of keys
- for i in range(1, row_count): #row in the database starting from the second to the last
- for j in range(1, K+1): #column in the database starting from the second to the last
- if results[STR[h]] == int(DB_file[i][j]): #compare each STR count in results with the counts of the database
- match = match + 1
- if j==K and results[STR[h]] != int(DB_file[i][j]):
- match = 0
- if match < K and j==K:
- match = 0
- if match == K and j==K:
- print(match)
- match_true = 1
- print(DB_file[i][0])
- if match_true != 1:
- print("No match")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement