Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import getopt
- import sys
- import string
- NOT_FOUND = 'Not Found'
- def main(argv):
- inputfile = 'seneca.txt'
- pattern = 'omne'
- input_text = ''
- try:
- opts, args = getopt.getopt(argv, "hi:p:", ["ifile=", "pattern="])
- except getopt.GetoptError:
- print('test.py -i <inputfile> -p <pattern>')
- sys.exit(2)
- for opt, arg in opts:
- if opt == '-h':
- print('test.py -i <inputfile> -p <pattern>')
- sys.exit()
- elif opt in ("-i", "--ifile"):
- inputfile = arg
- elif opt in ("-p", "--pattern"):
- pattern = arg
- with open(inputfile, 'r') as f:
- input_text = f.read()
- # result = boyer_moore_matcher(input_text, pattern)
- # print(result)
- # text = input_text[result + 1:]
- text = input_text
- print(input_text)
- print(f'length: ')
- results = []
- while True:
- result = boyer_moore_matcher(text, pattern)
- # print(result)
- if result == NOT_FOUND:
- break
- else:
- index = results[-1] if len(results) > 0 else 0
- index = index + result
- results.append(index if len(results) == 0 else index + 1)
- # print(index)
- # print(f'AA "{input_text[index if len(results) == 1 else index + 1]}" "{text[result]}"')
- text = text[result + 1:]
- if len(text) == 0:
- break
- print(results)
- print(len(results))
- get_words(input_text, results)
- def get_words(text, results):
- for result in results:
- start = result
- end = result
- # print(f'AAAAAAAAAA {text[result]} {result}')
- # print(f'start {text[start - 1]} {start - 1}')
- while start > 0 and text[start - 1] not in string.whitespace:
- # print(f'start before {text[start]} {start}')
- # print('inside start')
- start = start - 1
- # print(f'start {text[start]} {start}')
- # print(f'end {text[end + 1]} {end + 1}')
- while end < len(text) - 1 and text[end + 1] not in string.whitespace:
- # print('inside end')
- end = end + 1
- print(f'{text[start:end + 1]} {start} {end}')
- def boyer_moore_matcher(text, pattern):
- # print(pattern)
- # print(text)
- last = get_last(text, pattern)
- print(f'LAAAST {last}')
- n = len(text)
- m = len(pattern)
- i = m - 1
- j = m - 1
- if n <= i:
- return NOT_FOUND
- while True:
- # print(text)
- if pattern[j] == text[i]:
- if j == 0:
- return i
- else:
- i = i - 1
- j = j - 1
- else:
- i = i + m - min(j, 1 + last[text[i]])
- j = m - 1
- if i > n - 1:
- print('BREAK')
- break
- return NOT_FOUND
- def get_last(text, pattern):
- last = {}
- charset = set(text)
- for i in charset:
- last[i] = pattern.rfind(i)
- return last
- if __name__ == "__main__":
- main(sys.argv[1:])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement