Guest User

Untitled

a guest
Dec 20th, 2020
136
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.15 KB | None | 0 0
  1. from sys import argv
  2. import csv
  3.  
  4. # Get exactly 2 arguments: STR database csv and dna sequence file
  5. if len(argv) != 3:
  6.     print("Incorrect format. Correct usage: python dna.py database.csv sequence.txt ")
  7.     exit(1)
  8.  
  9. # Function for copying a dictionary and removing a certain key
  10. def removekey(d, key):
  11.     r = dict(d)
  12.     del r[key]
  13.     return r
  14.  
  15. # Open the dna sequence file, read it to memory.
  16. seqFile = open(argv[2], "r")
  17. seqfile_txt = seqFile.read()
  18.  
  19. # Open the database csv file, read it to memory. Assume first row contains headers
  20.  
  21. with open(argv[1], mode='r') as csv_file:
  22.     csv_reader = csv.DictReader(csv_file)
  23. # Store the STR col. headers from the first row of the file as a list for iteration
  24.     str_names = (csv_reader.fieldnames)[1:]
  25.  
  26. # Create a dictionary to store the STR names as keys. No values for the time being.
  27.     sequence_dict = {}
  28.     sequence_dict = sequence_dict.fromkeys(str_names)
  29.  
  30.     for strname in str_names:
  31.         char_counter = 0
  32.         best_streak = 0
  33.         for char in seqfile_txt:
  34.             #print(seqfile_txt[char_counter:(char_counter + len(strname))])
  35.             multicount = 0
  36.             streak = 0
  37.             while seqfile_txt[(char_counter + (multicount * len(strname))) : (char_counter + (len(strname) * (1 + multicount)))] == strname:
  38.                 multicount +=1
  39.                 streak +=1
  40.             if streak > best_streak:
  41.                 best_streak = streak
  42.             sequence_dict[strname] = best_streak
  43.             char_counter +=1
  44.  
  45. #Check if the values from sequence file exist in the database csv. If yes, print the name associated with the STR values from the database.
  46.     for row in csv_reader:
  47.         # Creating a new dictionary without the name column for easy comparision
  48.         nameless_row = removekey(row,"name")
  49.         # Since the values from CSV file are strings, convert them into ints for set comparision with the sequence file values
  50.         dbset = set(map(int, set(nameless_row.values())))
  51.        
  52.         for name in nameless_row.items():
  53.             if dbset  == set(sequence_dict.values()):
  54.                 print(row["name"])
  55.                 break
Advertisement
Add Comment
Please, Sign In to add comment