Advertisement
Guest User

Untitled

a guest
Jun 20th, 2019
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.39 KB | None | 0 0
  1. def isWhiteSpace(word):
  2. return word in [" ", "t", "n"]
  3.  
  4. def delimiterCorrection(line):
  5. tokens = line.split(" ")
  6. for delimiter in mysrc.delimiters().keys():
  7. for token in tokens:
  8. if token != delimiter and delimiter in token:
  9. pos = token.find(delimiter)
  10. tokens.remove(token)
  11. token = token.replace(delimiter, " ")
  12. extra = token[:pos]
  13. token = token[pos + 1 :]
  14. tokens.append(delimiter)
  15. tokens.append(extra)
  16. tokens.append(token)
  17.  
  18. for token in tokens:
  19. if ' ' in token:
  20. tokens.remove(token)
  21. token = token.split(' ')
  22. tokens += token
  23. return [t for t in tokens if not isWhiteSpace(token)] # Remove any tokens that are whitespace
  24.  
  25. def tokenize(path):
  26. """Return a list of (line_number, [token]) pairs.
  27. Raise exception on error."""
  28. if not isfile(path):
  29. raise ValueError("File "" + path + "" doesn't exist!")
  30.  
  31. res = []
  32. with open(path) as f:
  33. for line_count, line in enumerate(f):
  34. tokens = delimiterCorrection(line)
  35. res.append((line_count, tokens))
  36. for token in tokens:
  37. # This has a side effect which makes it hard to rewrite
  38. # Also, what does basic check do?
  39. basicCheck(token)
  40. return res
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement