Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import sys
- from sys import argv
- import csv
- import re
- # my functions ---------------------------------------
- def repitition_index (SQ, name):
- index = 0
- name_length = len(name)
- name_index = []
- repititions = 0 # the return valu - how many times name repeat in
- while index <(len(SQ)): # put all repititions indexes in name_index []
- i = SQ.find(name, index) # this return the index of name, untill the index value is EOF SQ
- if i == -1: # SQ,find returns -1 if substring is not found at all
- repititions = 0
- return name_index
- name_index.append(i)
- index = i + 1
- # check how many of the indexes repeat consecutively
- n = len(name_index)
- print(f"Indexes:{name_index}")
- print(f"Indexes Length: {n}")
- for i in range(n):
- if name_index[i+1] - name_index[i] == name_length:
- repititions += 1
- return name_index
- #------------------------------------------------------------------------------
- # function so return how many repititions are consecutive
- def cosecutive_repititions (indexList, length):
- n = len(indexList)
- counter = 1
- for i in range(n-1):
- if indexList[i+1] - indexList[i] == length:
- counter += 1
- return counter
- # --------M A I N-----------------------------------------end of functions
- # check that commant has 2 arguments
- if(len(argv) != 3):
- print("Usage: python dna.py data.csv sequence.txt")
- sys.exit()
- # define data as database and seq as swquances
- data = argv[1]
- sqFile = argv[2]
- f = open(sqFile, "r") # open the sequence file
- SQ = f.read()
- print(SQ)
- with open(data, newline='') as csvfile: # opent the sequence file as csv file
- dataTable = csv.DictReader(csvfile) # put the data in dictionary, firs row as Keys, and all rows as value
- for row in dataTable: #scan the rows of tables (keys)
- matchCounter = 0 # count the matches of specific sequence
- xx = 5
- for name,seq in row.items(): # iterate in the row of the dictionary with keys, values
- x = SQ.count(name) # How many times substring 'name'(key) appears in SQ
- # call the func to find indexes of repetitions
- indexList = repitition_index(SQ, name)
- nameLength = len(name)
- print(name, seq)
- #call func to get how many consecutive repetitions
- n = cosecutive_repititions (indexList, nameLength)
- if n == seq:
- matchCounter += 1
- print("Match Found")
- if matchCounter == len(row) - 1:
- print("Found Perfect Match!")
- exit()
- print(f"n={n} seq={seq} Match={matchCounter}")
- #if matchCounter == len(row) - 1:
- #print("Found Perfect Match!")
- #exit()
- print("=====")
- print("No Match Found")
- print(f"Match Counter = {matchCounter}")
- print(f"XX = {xx}")
- print("=====")
- #============================================================================================== end of program
Add Comment
Please, Sign In to add comment