Advertisement
joseleeph

Untitled

Dec 9th, 2020
59
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.40 KB | None | 0 0
  1. from sys import argv, exit
  2. import csv
  3. import re
  4. if len(argv) < 3:
  5. print("mising command-line argument")
  6. exit(1)
  7.  
  8. #INSTRUCTIONS
  9.  
  10. #1. Read the sequence file into a string (you've got this).
  11.  
  12. #2. Open the csv file and get each sub string ("AGATC", "TCTG", etc) from the first row into a list (you've essentially done this too).
  13.  
  14. #3. For each item in the list of sub strings, look through the sequence you read in step 1 and see what the longest sequential run is. Store this number in a list so you have the longest runs of all sub strings.
  15.  
  16. #4. Iterate through the rest of the CSV file, comparing the numbers in each row to the list of numbers you created in step 3. When all numbers match, the name from this row is the one to print.
  17.  
  18.  
  19. with open(argv[1],"r") as file, open(argv[2],"r") as csvfile:
  20. count = 0
  21. contents = file.read() #1. Read the sequence file into a string (you've got this).
  22. csvcontents = csv.reader(csvfile)
  23. #2. Open the csv file and get each sub string
  24. #("AGATC", "TCTG", etc) from the first row
  25. #into a list (you've essentially done this too).
  26. header = next(csvcontents)
  27. print("how long is header?")
  28. print(len(header))
  29. sublist = []
  30. for item in header:
  31. sublist.append(item)
  32. complist = sublist[1:]
  33.  
  34.  
  35. #3. For each item in the list of sub strings,
  36. #look through the sequence you read
  37. #in step 1 and see what the longest
  38. #sequential run is. Store this number
  39. #in a list so you have the longest runs of all sub strings.
  40.  
  41. #for item in sublist[1:]:
  42.  
  43. #while contents[beg:end]:
  44.  
  45. for item in complist: # look at each item in the list
  46. beg = 0 # beginning index
  47. end = len(item) # ending index
  48. seqrun = 0 # number of times the sequence runs/repeats
  49. while contents[beg:end]: # while the substring of contents from beginning to end have values
  50. if contents[beg:end] == item: # if the span of contents is equal to the item in the list
  51. seqrun = 1 # it occurs at least once
  52. while contents[beg + end:end + end] == item: # as long as the contents span from beginning to end matches the contents span when incremented by end
  53. seqrun += 1
  54. beg += end
  55. end += end
  56. if seqrun > 1:
  57. print(item + " repeats " + seqrun + " times")
  58. beg += 1
  59. end += 1
  60. else:
  61. beg += 1
  62. end += 1
  63.  
  64.  
  65.  
  66.  
  67.  
  68.  
  69.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement