Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from sys import argv, exit
- import csv
- import re
- if len(argv) < 3:
- print("missing command line argument")
- with open(argv[1],"r") as file, open(argv[2],"r") as csvfile:
- count = 0
- contents = file.read() #1. Read the sequence file into a string (you've got this).
- csvcontents = csv.reader(csvfile)
- #2. Open the csv file and get each sub string
- #("AGATC", "TCTG", etc) from the first row
- #into a list (you've essentially done this too).
- header = next(csvcontents)
- print("header prints")
- print(header)
- print("attempting to print rows of csvfile")
- for row in csvcontents:
- print(row)
- print("now header prints:")
- print(header)
- for item in header[1:]:
- beg = 0 # beginning index
- end = len(item) # item length
- seqrun = 0
- longest = 0
- while beg + end <= len(contents):
- seqrun = 0
- while contents[beg: beg + end] == item: # the first len(item) needs to be removed
- seqrun += 1
- beg += len(item)
- if seqrun > longest:
- longest = seqrun
- beg += 1
- end += 1
- print(item + " repeats " + str(longest) + "times") # why is it longest?
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement