Advertisement
joseleeph

Untitled

Dec 13th, 2020
52
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.25 KB | None | 0 0
  1. from sys import argv, exit
  2. import csv
  3. import re
  4. if len(argv) < 3:
  5. print("missing command line argument")
  6.  
  7. with open(argv[1],"r") as file, open(argv[2],"r") as csvfile:
  8. count = 0
  9. contents = file.read() #1. Read the sequence file into a string (you've got this).
  10. csvcontents = csv.reader(csvfile)
  11. #2. Open the csv file and get each sub string
  12. #("AGATC", "TCTG", etc) from the first row
  13. #into a list (you've essentially done this too).
  14.  
  15. header = next(csvcontents)
  16. print("header prints")
  17. print(header)
  18.  
  19. print("attempting to print rows of csvfile")
  20. for row in csvcontents:
  21. print(row)
  22. print("now header prints:")
  23. print(header)
  24.  
  25. for item in header[1:]:
  26. beg = 0 # beginning index
  27. end = len(item) # item length
  28. seqrun = 0
  29. longest = 0
  30. while beg + end <= len(contents):
  31. seqrun = 0
  32. while contents[beg: beg + end] == item: # the first len(item) needs to be removed
  33. seqrun += 1
  34. beg += len(item)
  35. if seqrun > longest:
  36. longest = seqrun
  37. beg += 1
  38. end += 1
  39. print(item + " repeats " + str(longest) + "times") # why is it longest?
  40.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement