Advertisement
Guest User

Untitled

a guest
Jan 10th, 2023
136
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.66 KB | None | 0 0
  1. import csv
  2. import sys
  3.  
  4.  
  5. def main():
  6.  
  7. # TODO: Check for command-line usage
  8. if len(sys.argv) != 3:
  9. sys.exit("Usage: python dna.py data.csv sequence.txt")
  10.  
  11. # TODO: Read database file into a variable
  12. csvFile = sys.argv[1]
  13. with open(csvFile) as file1:
  14. reader = csv.reader(file1, delimiter=',' , quotechar='"')
  15. lines = list(reader)
  16.  
  17. # TODO: Read DNA sequence file into a variable
  18. database = []
  19. for data in lines:
  20. database.append(data)
  21. # print(f"{database}")
  22.  
  23. # TODO: Find longest match of each STR in DNA sequence
  24. txtFile = sys.argv[2]
  25. file2 = open(txtFile, 'r')
  26. sequence = file2.read()
  27.  
  28. profile = []
  29. for i in range(1, len(database[0])):
  30. subsequence = database[0][i]
  31. n = longest_match(sequence, subsequence)
  32. profile.append(n)
  33. # print(f"{profile}")
  34.  
  35. # TODO: Check database for matching profiles
  36. row = 1
  37. column = 1
  38. index = 0
  39. while row < len(database):
  40. if profile[index] == int(database[row][column]):
  41. column += 1
  42. index += 1
  43. if column == len(database[row]):
  44. print(f"{database[row][0]}")
  45. break
  46. else:
  47. column = 1
  48. row += 1
  49. if row == len(database):
  50. print("No match")
  51.  
  52. return
  53.  
  54.  
  55. def longest_match(sequence, subsequence):
  56. """Returns length of longest run of subsequence in sequence."""
  57.  
  58. # Initialize variables
  59. longest_run = 0
  60. subsequence_length = len(subsequence)
  61. sequence_length = len(sequence)
  62.  
  63. # Check each character in sequence for most consecutive runs of subsequence
  64. for i in range(sequence_length):
  65.  
  66. # Initialize count of consecutive runs
  67. count = 0
  68.  
  69. # Check for a subsequence match in a "substring" (a subset of characters) within sequence
  70. # If a match, move substring to next potential match in sequence
  71. # Continue moving substring and checking for matches until out of consecutive matches
  72. while True:
  73.  
  74. # Adjust substring start and end
  75. start = i + count * subsequence_length
  76. end = start + subsequence_length
  77.  
  78. # If there is a match in the substring
  79. if sequence[start:end] == subsequence:
  80. count += 1
  81.  
  82. # If there is no match in the substring
  83. else:
  84. break
  85.  
  86. # Update most consecutive matches found
  87. longest_run = max(longest_run, count)
  88.  
  89. # After checking for runs at each character in seqeuence, return longest run found
  90. return longest_run
  91.  
  92.  
  93. main()
  94.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement