fosap h13

#! /usr/bin/python
import sys
import types
import re
def usage():
print("""Usage: {} [word] ... [r_num] [r_id]
Takes all the the words and regular expressions as arguments
or reads them as input if no arguments are supplied.
In the second case pass one expressions per line
and finish with a new linelike below:
[regex1]
[regex2]
...
[regexN]
""".format(sys.argv[0]))
def create_regex(expressions):
# creates two lists: one for the given words and on for the regular expressions
# returns a tuple conatining both lists: (re_objects, words)
words = expressions[0:-2]
re_objects = [re.compile(r"{}".format(exp)) for exp in expressions[-2:]]
return (re_objects, words)
class Token(object):
def __init__(self, word, identifier):
self.word = word
self.identifier = identifier
def __str__(self):
return "({}, {})".format(self.word, self.identifier)
class TokenManager(object):
def __init__(self):
self.tokenDict = {}
self.tokenList = list()
def add_token(self, word):
identifier = None
try:
identifier = self.tokenDict[word]
except:
identifier = len(self.tokenDict)
self.tokenDict[word] = identifier
self.tokenList.append(Token(word, identifier))
def print_token(self):
print("Token:")
for token in self.tokenList:
print(token)
def check_word(re_objects, words, word):
if word in words:
return True
for re_object in re_objects:
result = re_object.match(word)
if result is not None and result.group(0) != "":
return True
return False
if __name__ == "__main__":
expressions = list()
if len(sys.argv) == 1:
#read expressions from stdin
read = raw_input()
while(read != ""):
expressions.append(read)
read = raw_input()
else:
expressions = sys.argv[1:]
if len(expressions) == 0:
usage()
# two lists: for regular expressions and for constant words/characters
re_objects = list()
words = list()
(re_objects, words) = create_regex(expressions)
# list for containing the tokens
results = list()
# create new TokenManager
tm = TokenManager()
print("Now reading input...")
read = raw_input()
while read != "":
if " " in read:
read = read.split(" ")
if type(read) in types.StringTypes:
read = [read]
for word in read:
if check_word(re_objects, words, word):
tm.add_token(word)
else:
print("Word not accepted: {}".format(word))
sys.exit(-1)
read = raw_input()
tm.print_token()