SKTLV

CS50 pset6 dna

Jan 18th, 2020
51
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.11 KB | None | 0 0
  1. import sys
  2. from sys import argv
  3. import csv
  4. import re
  5.  
  6. # my functions ---------------------------------------
  7. def repitition_index (SQ, name):
  8.     index = 0
  9.     name_length = len(name)
  10.     name_index = []
  11.     repititions = 0 # the return valu - how many times name repeat in
  12.  
  13.     while index <(len(SQ)): # put all repititions indexes in name_index []
  14.  
  15.         i = SQ.find(name, index) # this return the index of name, untill the index value is EOF SQ
  16.         if i == -1: # SQ,find returns -1 if substring is not found at all
  17.             repititions = 0
  18.             return name_index
  19.  
  20.         name_index.append(i)
  21.         index = i + 1
  22.  
  23.     # check how many of the indexes repeat consecutively
  24.  
  25.     n = len(name_index)
  26.  
  27.     print(f"Indexes:{name_index}")
  28.     print(f"Indexes Length: {n}")
  29.  
  30.     for i in range(n):
  31.         if name_index[i+1] - name_index[i] ==  name_length:
  32.            repititions += 1
  33.  
  34.     return name_index
  35.  
  36. #------------------------------------------------------------------------------
  37.  
  38. # function so return how many repititions are consecutive
  39. def cosecutive_repititions (indexList, length):
  40.     n = len(indexList)
  41.     counter = 1
  42.  
  43.     for i in range(n-1):
  44.         if indexList[i+1] - indexList[i] == length:
  45.             counter += 1
  46.  
  47.     return counter
  48.  
  49. # --------M A I N-----------------------------------------end of functions
  50. #  check that commant has 2 arguments
  51. if(len(argv) != 3):
  52.     print("Usage: python dna.py data.csv sequence.txt")
  53.     sys.exit()
  54.  
  55. # define data as database and seq as swquances
  56. data = argv[1]
  57. sqFile = argv[2]
  58.  
  59. f = open(sqFile, "r") # open the sequence file
  60. SQ = f.read()
  61. print(SQ)
  62.  
  63. with open(data, newline='') as csvfile: # opent the sequence file as csv file
  64.     dataTable = csv.DictReader(csvfile) # put the data in dictionary, firs row as Keys, and all rows as value
  65.  
  66.     for row in dataTable: #scan the rows of tables (keys)
  67.  
  68.         matchCounter = 0 # count the matches of specific sequence
  69.         xx = 5
  70.  
  71.         for name,seq in row.items(): # iterate in the row of the dictionary with keys, values
  72.  
  73.             x = SQ.count(name) # How many times substring 'name'(key) appears in SQ
  74.  
  75.             # call the func to find indexes of repetitions
  76.             indexList = repitition_index(SQ, name)
  77.  
  78.             nameLength = len(name)
  79.  
  80.             print(name, seq)
  81.  
  82.             #call func to get how many consecutive repetitions
  83.             n = cosecutive_repititions (indexList, nameLength)
  84.             if n == seq:
  85.                 matchCounter += 1
  86.                 print("Match Found")
  87.  
  88.                 if matchCounter == len(row) - 1:
  89.                     print("Found Perfect Match!")
  90.                     exit()
  91.  
  92.             print(f"n={n} seq={seq} Match={matchCounter}")
  93.  
  94.         #if matchCounter == len(row) - 1:
  95.             #print("Found Perfect Match!")
  96.             #exit()
  97.  
  98.     print("=====")
  99.     print("No Match Found")
  100.     print(f"Match Counter = {matchCounter}")
  101.     print(f"XX = {xx}")
  102.     print("=====")
  103.  
  104. #============================================================================================== end of program
Add Comment
Please, Sign In to add comment