Advertisement
Guest User

Untitled

a guest
Nov 15th, 2019
110
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.05 KB | None | 0 0
  1. import getopt
  2. import sys
  3. import string
  4.  
  5. NOT_FOUND = 'Not Found'
  6.  
  7.  
  8. def main(argv):
  9. inputfile = 'seneca.txt'
  10. pattern = 'omne'
  11. input_text = ''
  12.  
  13. try:
  14. opts, args = getopt.getopt(argv, "hi:p:", ["ifile=", "pattern="])
  15. except getopt.GetoptError:
  16. print('test.py -i <inputfile> -p <pattern>')
  17. sys.exit(2)
  18. for opt, arg in opts:
  19. if opt == '-h':
  20. print('test.py -i <inputfile> -p <pattern>')
  21. sys.exit()
  22. elif opt in ("-i", "--ifile"):
  23. inputfile = arg
  24. elif opt in ("-p", "--pattern"):
  25. pattern = arg
  26.  
  27. with open(inputfile, 'r') as f:
  28. input_text = f.read()
  29.  
  30. # result = boyer_moore_matcher(input_text, pattern)
  31. # print(result)
  32. # text = input_text[result + 1:]
  33. text = input_text
  34. print(input_text)
  35. print(f'length: ')
  36. results = []
  37. while True:
  38. result = boyer_moore_matcher(text, pattern)
  39. # print(result)
  40. if result == NOT_FOUND:
  41. break
  42. else:
  43. index = results[-1] if len(results) > 0 else 0
  44. index = index + result
  45. results.append(index if len(results) == 0 else index + 1)
  46. # print(index)
  47. # print(f'AA "{input_text[index if len(results) == 1 else index + 1]}" "{text[result]}"')
  48. text = text[result + 1:]
  49. if len(text) == 0:
  50. break
  51. print(results)
  52. print(len(results))
  53. get_words(input_text, results)
  54.  
  55.  
  56. def get_words(text, results):
  57. for result in results:
  58. start = result
  59. end = result
  60. # print(f'AAAAAAAAAA {text[result]} {result}')
  61. # print(f'start {text[start - 1]} {start - 1}')
  62.  
  63. while start > 0 and text[start - 1] not in string.whitespace:
  64. # print(f'start before {text[start]} {start}')
  65. # print('inside start')
  66.  
  67. start = start - 1
  68. # print(f'start {text[start]} {start}')
  69.  
  70. # print(f'end {text[end + 1]} {end + 1}')
  71. while end < len(text) - 1 and text[end + 1] not in string.whitespace:
  72. # print('inside end')
  73. end = end + 1
  74. print(f'{text[start:end + 1]} {start} {end}')
  75.  
  76.  
  77. def boyer_moore_matcher(text, pattern):
  78. # print(pattern)
  79. # print(text)
  80.  
  81. last = get_last(text, pattern)
  82. print(f'LAAAST {last}')
  83. n = len(text)
  84. m = len(pattern)
  85. i = m - 1
  86. j = m - 1
  87. if n <= i:
  88. return NOT_FOUND
  89.  
  90. while True:
  91. # print(text)
  92. if pattern[j] == text[i]:
  93. if j == 0:
  94. return i
  95. else:
  96. i = i - 1
  97. j = j - 1
  98. else:
  99. i = i + m - min(j, 1 + last[text[i]])
  100. j = m - 1
  101. if i > n - 1:
  102. print('BREAK')
  103. break
  104. return NOT_FOUND
  105.  
  106.  
  107. def get_last(text, pattern):
  108. last = {}
  109. charset = set(text)
  110. for i in charset:
  111. last[i] = pattern.rfind(i)
  112. return last
  113.  
  114.  
  115. if __name__ == "__main__":
  116. main(sys.argv[1:])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement