Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from sys import argv, exit
- import csv
- import re
- if len(argv) < 3:
- print("missing command line argument")
- #INSTRUCTIONS
- #1. Read the sequence file into a string (you've got this).
- #2. Open the csv file and get each sub string ("AGATC", "TCTG", etc)
- #from the first row into a list (you've essentially done this too).
- #3. For each item in the list of sub strings,
- #look through the sequence you read in step 1
- #and see what the longest sequential run is.
- #Store this number in a list so you have the #longest runs of all sub strings.
- #4. Iterate through the rest of the CSV file,
- #comparing the numbers in each row to the list
- #of numbers you created in step 3. When all
- #numbers match, the name from this row is the one to print.
- with open(argv[1],"r") as file, open(argv[2],"r") as csvfile:
- count = 0
- contents = file.read() #1. Read the sequence file into a string (you've got this).
- csvcontents = csv.reader(csvfile)
- #2. Open the csv file and get each sub string
- #("AGATC", "TCTG", etc) from the first row
- #into a list (you've essentially done this too).
- header = next(csvcontents)
- print("header prints")
- print(header)
- #SENTOX ADVICE:
- # when you loop through anything that iterates in python
- # objects usually have a next method that provides the next
- # when you call something like
- # for row in csvcontents:
- # internally python will keep calling next() from csvcontents
- # to get each row one at a time
- # since the file has just been opened, it is the first row by definintion
- # with csv reader, the returns a lot of values
- # the header is already a list of values from the first row.
- #appending these values into another list is reduntant
- print("attempting to print rows of csvfile")
- for row in csvcontents:
- print(row)
- print("now header prints:")
- print(header)
- #SENTOX ADVICE:
- #you can use the subscript [1:] which means:
- #make a copy of this list starting from element 1 (which is the second element)
- #to the end of the list... in other words, drop the first element
- # this subscript cn be used directly on the list returned by next
- #complist = next(csvcontents)[1:]
- #3. For each item in the list of sub strings,
- #look through the sequence you read
- #in step 1 and see what the longest
- #sequential run is. Store this number
- #in a list so you have the longest runs of all sub strings.
- #complist = next(csvcontents)#[1:]
- #print("complist prints: ")
- #print(complist)
- #for item in contents:
- for item in header[1:]:
- #print("item prints:")
- #print(item)
- beg = 0 # beginning index
- end = len(item) # item length
- seqrun = 0
- longest = 0
- #while contents[beg:beg+len(item)]:
- while beg + end <= len(contents):
- #if contents[beg:beg+len(item)] == item: # trying to solve the issue of end being incremented incorrectly here
- seqrun = 0
- #while contents[beg + len(item): beg + len(item)] == item:
- #while contents[beg: beg + len(item)] == item: # the first len(item) needs to be removed
- while contents[beg: beg + end] == item: # the first len(item) needs to be removed
- seqrun += 1
- beg += end
- #end += len(item) i don't need to increment end as well?
- if seqrun > longest:
- longest = seqrun
- beg += 1
- #end += 1
- print(item + " repeats " + str(seqrun) + "times")
- #print(item + " repeats " + str(longest) + "times") # why is it longest?
- #if seqrun > 1:
- #print(item + " repeats " + str(seqrun) + " times")
- #beg += 1
- #end += 1
- #else:
- #beg += 1
- #end += 1
- # you are adding end to the beginning and itself, treating it as a proxy for len(item)
- # this would work, except end is no longer len(item)
- # it has been incremented through string over time
- # meaning we could be at any number... it could be index 500 of the string
- # so instead of 500 + 4 (if the item were 4 characters long)
- # once that is fixed, a variable is needed to hold the largest
- # seqrun found, otherwise you will just have the most recent seqrun instead
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement