Advertisement
joseleeph

Untitled

Dec 10th, 2020
58
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.20 KB | None | 0 0
  1. from sys import argv, exit
  2. import csv
  3. import re
  4. if len(argv) < 3:
  5. print("missing command line argument")
  6.  
  7. #INSTRUCTIONS
  8. #1. Read the sequence file into a string (you've got this).
  9.  
  10. #2. Open the csv file and get each sub string ("AGATC", "TCTG", etc)
  11. #from the first row into a list (you've essentially done this too).
  12.  
  13. #3. For each item in the list of sub strings,
  14. #look through the sequence you read in step 1
  15. #and see what the longest sequential run is.
  16. #Store this number in a list so you have the #longest runs of all sub strings.
  17.  
  18. #4. Iterate through the rest of the CSV file,
  19. #comparing the numbers in each row to the list
  20. #of numbers you created in step 3. When all
  21. #numbers match, the name from this row is the one to print.
  22. with open(argv[1],"r") as file, open(argv[2],"r") as csvfile:
  23. count = 0
  24. contents = file.read() #1. Read the sequence file into a string (you've got this).
  25. csvcontents = csvfile.read()
  26. #2. Open the csv file and get each sub string
  27. #("AGATC", "TCTG", etc) from the first row
  28. #into a list (you've essentially done this too).
  29.  
  30. header = next(csvcontents)
  31. print("header prints")
  32. print(header)
  33.  
  34. #SENTOX ADVICE:
  35. # when you loop through anything that iterates in python
  36. # objects usually have a next method that provides the next
  37.  
  38. # when you call something like
  39.  
  40. # for row in csvcontents:
  41.  
  42. # internally python will keep calling next() from csvcontents
  43. # to get each row one at a time
  44.  
  45. # since the file has just been opened, it is the first row by definintion
  46. # with csv reader, the returns a lot of values
  47. # the header is already a list of values from the first row.
  48. #appending these values into another list is reduntant
  49.  
  50. print("attempting to print rows of csvfile")
  51. for row in csvcontents:
  52. print(row)
  53. #SENTOX ADVICE:
  54. #you can use the subscript [1:] which means:
  55. #make a copy of this list starting from element 1 (which is the second element)
  56. #to the end of the list... in other words, drop the first element
  57.  
  58. # this subscript cn be used directly on the list returned by next
  59.  
  60. #complist = next(csvcontents)[1:]
  61.  
  62. #3. For each item in the list of sub strings,
  63. #look through the sequence you read
  64. #in step 1 and see what the longest
  65. #sequential run is. Store this number
  66. #in a list so you have the longest runs of all sub strings.
  67.  
  68. for item in contents:
  69. beg = 0
  70. end = len(item)
  71. seqrun = 0
  72. longest = 0
  73. while contents[beg:beg+len(item)]:
  74. if contents[beg:beg+len(item)] == item: # trying to solve the issue of end being incremented incorrectly here
  75. seqrun = 1
  76. while contents[beg + len(item): end + len(item)] == item:
  77. seqrun += 1
  78. beg += len(item)
  79. end += len(item)
  80. if seqrun > longest:
  81. longest = seqrun
  82. if seqrun > 1:
  83. print(item + " repeats " + str(seqrun) + " times")
  84. beg += 1
  85. end += 1
  86. else:
  87. beg += 1
  88. ebd += 1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement