Advertisement
joseleeph

Untitled

Dec 9th, 2020
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.96 KB | None | 0 0
  1. from sys import argv, exit
  2. import csv
  3. import re
  4. if len(argv) < 3:
  5. print("mising command-line argument")
  6. exit(1)
  7.  
  8. #INSTRUCTIONS
  9.  
  10. #1. Read the sequence file into a string (you've got this).
  11.  
  12. #2. Open the csv file and get each sub string ("AGATC", "TCTG", etc) from the first row into a list (you've essentially done this too).
  13.  
  14. #3. For each item in the list of sub strings, look through the sequence you read in step 1 and see what the longest sequential run is. Store this number in a list so you have the longest runs of all sub strings.
  15.  
  16. #4. Iterate through the rest of the CSV file, comparing the numbers in each row to the list of numbers you created in step 3. When all numbers match, the name from this row is the one to print.
  17.  
  18.  
  19. with open(argv[1],"r") as file, open(argv[2],"r") as csvfile:
  20. count = 0
  21. contents = file.read() #1. Read the sequence file into a string (you've got this).
  22. csvcontents = csv.reader(csvfile)
  23. #2. Open the csv file and get each sub string
  24. #("AGATC", "TCTG", etc) from the first row
  25. #into a list (you've essentially done this too).
  26. header = next(csvcontents)
  27. print("how long is header?")
  28. print(len(header))
  29. sublist = []
  30. print("at the moment sublist prints")
  31. print(sublist)
  32. print("now appending sublist")
  33. #for i in range(len(header)):
  34. for item in header:
  35. #header[i] += sublist
  36. sublist.append(item)
  37. print("sublist now prints")
  38. print(sublist)
  39. print("print(sublist[1:]) prints: ")
  40. print(sublist[1:])
  41.  
  42. #3. For each item in the list of sub strings,
  43. #look through the sequence you read
  44. #in step 1 and see what the longest
  45. #sequential run is. Store this number
  46. #in a list so you have the longest runs of all sub strings.
  47.  
  48. for item in sublist[1:]:
  49. beg = 0
  50. end = len(item)
  51. seqrun = 0
  52. if contents[beg:end] == item:
  53. seqrun = 1
  54. while contents[beg + end:end + end] == item:
  55. seqrun += 1
  56. beg += end
  57. end += end
  58. else:
  59. beg += 1
  60. end += 1
  61. print(item + " prints " + str(seqrun) + "times")
  62.  
  63.  
  64. subindex = 1
  65. sublend = len(contents[subindex])
  66. while contents[subindex:sublend]:
  67. span = contents[subindex:sublend]
  68. repcount = 1
  69. while contents[subindex+sublend: sublend + sublend] == span:
  70. repcount += 1
  71. subindex += sublend
  72. sublend += sublend
  73. if repcount > 1:
  74. print(span + " repeats " + str(repcount) + " times")
  75. subindex += sublend
  76. sublend += sublend
  77. #sublend += len(contents[subindex+sublend])
  78. else:
  79. subindex += 1
  80. sublend += sublend
  81. listindex = 1
  82. for item in sublist[1:]:
  83. if item == span:
  84. print("item number " + listindex + "matches")
  85.  
  86.  
  87.  
  88.  
  89. #sublist = []
  90. #for item in sublist: # this list is empty
  91. #charcount = len(item)
  92. #index = 0
  93. #print("item " + item + " is " + str(charcount) + " long")
  94. #longest = 0
  95. #if charcount > longest:
  96. #longest = charcount
  97.  
  98. #index = 0
  99. #while contents[index:charcount]:
  100. #span = contents[index:charcount]
  101. #repcount = 1
  102. #while contents[index + charcount: charcount + charcount] == span:
  103. #repcount += 1
  104.  
  105.  
  106. #Store this number
  107. #in a list so you
  108. #have the longest runs of all sub strings. ... why where?
  109.  
  110. #index += charcount
  111. #charcount += charcount
  112. #sublist.append(repcount) # is this what is meant?
  113. #print("sublist prints: ")
  114. #print(sublist)
  115.  
  116. #print("stringcompare prints: ")
  117. #print(stringcompare)
  118. #4. Iterate through the rest of the CSV file,
  119. #comparing the numbers in each row to the list
  120. #of numbers you created in step 3.
  121. #When all numbers match, the name
  122. #from this row is the one to print.
  123. #for i in header:
  124. #n = 0
  125. #print("header[" + n + "] prints:")
  126. #print(i)
  127. #n += 1
  128.  
  129.  
  130.  
  131.  
  132.  
  133.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement