Guest User

Untitled

a guest
Jul 5th, 2020
225
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import csv
  2. import sys
  3. from sys import argv, exit
  4.  
  5.  
  6. results = {} #a dict function to put the sequence results in
  7.  
  8. if len(argv) != 3:
  9.     print("Please input a valid DNA database followed by a valid DNA sequence")
  10.     exit(1)
  11.  
  12.  
  13. DB_file = csv.reader(open(argv[1]))
  14. DB_file = list(DB_file)
  15. STR = (DB_file[0][1:])  #takes keys by looking at row 0 and column 1 and on
  16. K = len(STR) #how many DNA keys are in the database
  17.  
  18. row_count = len(DB_file)
  19.  
  20.  
  21. seq_file = open(argv[2], "r")
  22. sequence = seq_file.read()
  23. L = len(sequence)
  24.  
  25. counter = 0
  26. max_counter = 0
  27.  
  28. for i in range(K):       #go through every STR in database
  29.     key_length = len(STR[i])
  30.  
  31.     for j in range(L):   #go through entire sequance
  32.  
  33.         if STR[i] == sequence[j:j + key_length]:
  34.             counter = counter + 1
  35.  
  36.             if j + key_length < L:
  37.                 j = j + key_length
  38.  
  39.  
  40.             if STR[i] != sequence[j:j + key_length]:
  41.  
  42.                 if counter > max_counter:
  43.                     max_counter = counter
  44.                     counter = 0
  45.  
  46.                 else:
  47.                     counter = 0
  48.  
  49.     results[STR[i]] = max_counter
  50.     max_counter = 0
  51.  
  52. print(results)
  53.  
  54.  
  55.  
  56. match = 0
  57. match_true = 0 #to change to 1 to indicate a match
  58.  
  59. for h in range(K): #for each STR. K is the total amount of keys
  60.  
  61.  
  62.     for i in range(1, row_count):  #row in the database starting from the second to the last
  63.  
  64.  
  65.         for j in range(1, K+1):  #column in the database starting from the second to the last
  66.  
  67.  
  68.             if results[STR[h]] == int(DB_file[i][j]): #compare each STR count in results with the counts of the database
  69.                 match = match + 1
  70.  
  71.                
  72.                 if j==K and results[STR[h]] != int(DB_file[i][j]):
  73.                     match = 0
  74.                    
  75.                 if match < K and j==K:
  76.                     match = 0
  77.  
  78.                 if match == K and j==K:
  79.                     print(match)
  80.                     match_true = 1
  81.                     print(DB_file[i][0])
  82.                    
  83.  
  84.  
  85. if match_true != 1:
  86.     print("No match")
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×