Advertisement
Chribold

fosap h13

Jun 25th, 2017
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.29 KB | None | 0 0
  1. #! /usr/bin/python
  2. import sys
  3. import types
  4. import re
  5. def usage():
  6. print("""Usage: {} [word] ... [r_num] [r_id]
  7. Takes all the the words and regular expressions as arguments
  8. or reads them as input if no arguments are supplied.
  9. In the second case pass one expressions per line
  10. and finish with a new linelike below:
  11. [regex1]
  12. [regex2]
  13. ...
  14. [regexN]
  15. """.format(sys.argv[0]))
  16. def create_regex(expressions):
  17. # creates two lists: one for the given words and on for the regular expressions
  18. # returns a tuple conatining both lists: (re_objects, words)
  19. words = expressions[0:-2]
  20. re_objects = [re.compile(r"{}".format(exp)) for exp in expressions[-2:]]
  21. return (re_objects, words)
  22. class Token(object):
  23. def __init__(self, word, identifier):
  24. self.word = word
  25. self.identifier = identifier
  26. def __str__(self):
  27. return "({}, {})".format(self.word, self.identifier)
  28. class TokenManager(object):
  29. def __init__(self):
  30. self.tokenDict = {}
  31. self.tokenList = list()
  32. def add_token(self, word):
  33. identifier = None
  34. try:
  35. identifier = self.tokenDict[word]
  36. except:
  37. identifier = len(self.tokenDict)
  38. self.tokenDict[word] = identifier
  39. self.tokenList.append(Token(word, identifier))
  40. def print_token(self):
  41. print("Token:")
  42. for token in self.tokenList:
  43. print(token)
  44. def check_word(re_objects, words, word):
  45. if word in words:
  46. return True
  47. for re_object in re_objects:
  48. result = re_object.match(word)
  49. if result is not None and result.group(0) != "":
  50. return True
  51. return False
  52. if __name__ == "__main__":
  53. expressions = list()
  54. if len(sys.argv) == 1:
  55. #read expressions from stdin
  56. read = raw_input()
  57. while(read != ""):
  58. expressions.append(read)
  59. read = raw_input()
  60. else:
  61. expressions = sys.argv[1:]
  62. if len(expressions) == 0:
  63. usage()
  64. # two lists: for regular expressions and for constant words/characters
  65. re_objects = list()
  66. words = list()
  67. (re_objects, words) = create_regex(expressions)
  68. # list for containing the tokens
  69. results = list()
  70. # create new TokenManager
  71. tm = TokenManager()
  72. print("Now reading input...")
  73. read = raw_input()
  74. while read != "":
  75. if " " in read:
  76. read = read.split(" ")
  77. if type(read) in types.StringTypes:
  78. read = [read]
  79. for word in read:
  80. if check_word(re_objects, words, word):
  81. tm.add_token(word)
  82. else:
  83. print("Word not accepted: {}".format(word))
  84. sys.exit(-1)
  85. read = raw_input()
  86. tm.print_token()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement