Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def isWhiteSpace(word):
- return word in [" ", "t", "n"]
- def delimiterCorrection(line):
- tokens = line.split(" ")
- for delimiter in mysrc.delimiters().keys():
- for token in tokens:
- if token != delimiter and delimiter in token:
- pos = token.find(delimiter)
- tokens.remove(token)
- token = token.replace(delimiter, " ")
- extra = token[:pos]
- token = token[pos + 1 :]
- tokens.append(delimiter)
- tokens.append(extra)
- tokens.append(token)
- for token in tokens:
- if ' ' in token:
- tokens.remove(token)
- token = token.split(' ')
- tokens += token
- return [t for t in tokens if not isWhiteSpace(token)] # Remove any tokens that are whitespace
- def tokenize(path):
- """Return a list of (line_number, [token]) pairs.
- Raise exception on error."""
- if not isfile(path):
- raise ValueError("File "" + path + "" doesn't exist!")
- res = []
- with open(path) as f:
- for line_count, line in enumerate(f):
- tokens = delimiterCorrection(line)
- res.append((line_count, tokens))
- for token in tokens:
- # This has a side effect which makes it hard to rewrite
- # Also, what does basic check do?
- basicCheck(token)
- return res
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement