Advertisement
unleashbear

Untitled

Feb 9th, 2023
142
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.47 KB | None | 0 0
  1. import csv
  2. import sys
  3.  
  4.  
  5. def main():
  6.  
  7. # TODO: Check for command-line usage
  8. if len(sys.argv) != 3 :
  9. print("Too Many/little Arguments ")
  10.  
  11. # TODO: Read database file into a variable
  12. weu = dict()
  13. with open (sys.argv[1] , "r") as file:
  14. reader = csv.DictReader(file)
  15. for name in reader :
  16. name["name"] = name["name"]
  17. name["AGATC"] = int(name['AGATC'])
  18. name["AATG"] = int(name['AATG'])
  19. name["TATC"] = int(name['TATC'])
  20.  
  21.  
  22. # TODO: Read DNA sequence file into a variable
  23. with open(sys.argv[2], "r") as (F):
  24. reader =csv.reader(F)
  25. text = next(reader)
  26.  
  27. # TODO: Find longest match of each STR in DNA sequence
  28. A = longest_match(text,"AGATC")
  29. B = longest_match(text, 'AATG')
  30. C =longest_match(text, "TATC")
  31. print(A)
  32. print(B)
  33. print(C)
  34.  
  35.  
  36.  
  37.  
  38. # TODO: Check database for matching profile
  39. for name["name"] in name:
  40. if A == name["AGATC"] and B == name["AATG"] and C== name["TATC"] :
  41. print(name['name'])
  42. break
  43. else :
  44. print("No match")
  45. break
  46.  
  47. return
  48.  
  49.  
  50. def longest_match(sequence, subsequence):
  51. """Returns length of longest run of subsequence in sequence."""
  52.  
  53. # Initialize variables
  54. longest_run = 0
  55. subsequence_length = len(subsequence)
  56. sequence_length = len(sequence)
  57.  
  58. # Check each character in sequence for most consecutive runs of subsequence
  59. for i in range(sequence_length):
  60.  
  61. # Initialize count of consecutive runs
  62. count = 0
  63.  
  64. # Check for a subsequence match in a "substring" (a subset of characters) within sequence
  65. # If a match, move substring to next potential match in sequence
  66. # Continue moving substring and checking for matches until out of consecutive matches
  67. while True:
  68.  
  69. # Adjust substring start and end
  70. start = i + count * subsequence_length
  71. end = start + subsequence_length
  72.  
  73. # If there is a match in the substring
  74. if sequence[start:end] == subsequence:
  75. count += 1
  76.  
  77. # If there is no match in the substring
  78. else:
  79. break
  80.  
  81. # Update most consecutive matches found
  82. longest_run = max(longest_run, count)
  83.  
  84. # After checking for runs at each character in seqeuence, return longest run found
  85. return longest_run
  86.  
  87.  
  88. main()
  89.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement