Advertisement
samuel_og

PSET6 DNA

May 4th, 2022
400
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.78 KB | None | 0 0
  1. import csv
  2. import sys
  3.  
  4.  
  5. def main():
  6.  
  7.     # TODO: Check for command-line usage
  8.     if len(sys.argv) != 3:
  9.         sys.exit("Usage: python dna.py data.csv sequence.txt")
  10.  
  11.     # TODO: Read database file into a variable
  12.     database = sys.argv[1]
  13.     db_names = []
  14.     strnames = []
  15.  
  16.     with open(database, 'r') as file1:
  17.         reader = csv.DictReader(file1)
  18.  
  19.         for line in reader:
  20. #            team["rating"] = int(team["rating"])
  21.             db_names.append(line)
  22.  
  23.         strnames = reader.fieldnames
  24.         strnames.pop(0)
  25.  
  26.     print(db_names)
  27.     print(strnames)
  28.     file1.close()
  29.  
  30.  
  31.     # TODO: Read DNA sequence file into a variable
  32.     sequence = sys.argv[2]
  33.     with open(sequence, 'r') as file2:
  34.         dna = file2.read()
  35.     file2.close()
  36.  
  37.     # TODO: Find longest match of each STR in DNA sequence
  38.     str_counts = []
  39.     for element in strnames:
  40.         str_counts.append(longest_match(dna, element))
  41.  
  42.     print(str_counts)
  43.  
  44.     # TODO: Check database for matching profiles
  45.     # Find the str key in the db_names and compare the value of that key to str_runs
  46.  
  47.     # I know this is currently wrong. pls help
  48.     for i in db_names[strnames]:
  49.         matches = 0
  50.         print(db_names[i])
  51.         for j in str_counts:
  52.             if i == j:
  53.                 matches += 1
  54.             if matches == strnames:
  55.                 print(db_names[i]['name'])
  56.                 exit(0)
  57.     print("No match")
  58.  
  59. #    for i in range(len(strnames)):
  60.  
  61.     return
  62.  
  63.  
  64. def longest_match(sequence, subsequence):
  65.     """Returns length of longest run of subsequence in sequence."""
  66.  
  67.     # Initialize variables
  68.     longest_run = 0
  69.     subsequence_length = len(subsequence)
  70.     sequence_length = len(sequence)
  71.  
  72.     # Check each character in sequence for most consecutive runs of subsequence
  73.     for i in range(sequence_length):
  74.  
  75.         # Initialize count of consecutive runs
  76.         count = 0
  77.  
  78.         # Check for a subsequence match in a "substring" (a subset of characters) within sequence
  79.         # If a match, move substring to next potential match in sequence
  80.         # Continue moving substring and checking for matches until out of consecutive matches
  81.         while True:
  82.  
  83.             # Adjust substring start and end
  84.             start = i + count * subsequence_length
  85.             end = start + subsequence_length
  86.  
  87.             # If there is a match in the substring
  88.             if sequence[start:end] == subsequence:
  89.                 count += 1
  90.  
  91.             # If there is no match in the substring
  92.             else:
  93.                 break
  94.  
  95.         # Update most consecutive matches found
  96.         longest_run = max(longest_run, count)
  97.  
  98.     # After checking for runs at each character in seqeuence, return longest run found
  99.     return longest_run
  100.  
  101.  
  102. main()
  103.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement