Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import Syn.*
- import Node,Tree from struct.tree
- // A simple collection of 3 elements.
- // token -- a token or RE that is used to break up the input
- // target -- The target status in which the parser must be when the token is found
- // action -- The thing to be done (a function) with the input text and the ongoing parsing
- class PRule( token, target, action )
- token = token
- target = target
- action = action
- end
- // A simple error
- class ParserError( code, desc, extra ) from Error(code, desc, extra )
- end
- // The work horse class
- class Parser
- // states
- states = [=>]
- // The parser status will be set here.
- tree = nil
- // This are temporary status used internally
- tokenMap = nil
- reMap = nil
- nilStates = nil
- _stateList = nil
- // This method is used to add a rule to a state.
- // It's actually a variable parameter function, with r assuming the value
- // of each rule passed after the state name, in turn.
- // r can also be an array of rules.
- function add( stateName, r )
- if r.typeId == ArrayType
- for elem in r
- self.add(stateName, elem )
- end
- else
- if not r.derivedFrom(PRule)
- raise ParamError(10001, "Parameter error", "Rule is not a PRule instance")
- end
- state = self.states.find(stateName, nil)
- if not state
- self.states[s] = [r]
- else
- state += r
- end
- end
- fself.redo()
- end
- // Start parsing
- // Test can also be a stream.
- function parse( text, enc )
- // reset the maps
- self.tokenMap = [=>]
- self.reMap = [=>]
- self.nilStates = [=>]
- self._stateList = []
- self.prepareMaps()
- mt = MultiTokenizer(text, enc)
- tree = Tree()
- self.prepareParsing(mt, tree)
- self.setState("start")
- ^[m]
- return tree
- end
- //==========================================================
- // Private part
- //
- // This function reorganizes the states and the rules BY TOKEN.
- // Rules are more easily declared by rule, but then, we need
- // to know what to do when we find a token; so we reorder the
- // states and their rules by token.
- function prepareMaps()
- for state, seq in self.states
- for r in seq
- if r.token.typeId == StringType
- stateMap = self.tokenMap.find(r.token,nil)
- if stateMap
- stateMap[state] = r
- else
- self.tokenMap[r.token] = [state => r]
- end
- elif r.token == nil
- self.nilStates[state] = r
- else
- stateMap = self.reMap.find(r.token.pattern,nil)
- if stateMap
- stateMap[state] = r
- else
- self.reMap[r.token.pattern] = [state => r]
- end
- end
- end
- end
- end
- // This method fills the MultiTokenizer with callbacks that act on the forming syntactic tree.
- // The calbacks are generated on the fly, filling them with dynamic code.
- // The workhorse for that is the next function.
- function prepareParsing( mt, tree )
- for token, stateMap in self.tokenMap
- self.trace( "Generating parser for token", token.escape() )
- sw = self.prepareSwitch( stateMap, "state" )
- func = {(text) state = ^~self.currentState(); tree = ^~tree; ^~sw() }
- self.trace2( func.render() )
- mt.addToken( token, func )
- end
- for re, stateMap in self.reMap
- self.trace( "Generating parser for re", re.escape() )
- sw = self.prepareSwitch( stateMap, "state" )
- self.trace2( sw.render() )
- func = {(text) state = ^~self.currentState(); tree = ^~tree; ^~sw() }
- mt.addRE( re, func )
- end
- self.trace( "Generating parser pure text" )
- sw = self.prepareSwitch( self.nilStates, "state" )
- self.trace2( sw.render() )
- func = {(text) state = ^~self.currentState(); tree = ^~tree; ^~sw() }
- mt.onText = func
- end
- // This function transform a map of "state" => PRule entities
- // into a switch() ... end code that select the correct rule at given state.
- function prepareSwitch( stateMap, stateVar )
- // Generate a switch on state ...
- sw = Syn.Switch( Symbol(stateVar) )
- for state, r in stateMap
- //... in which every state is a branch ...
- cs = SynTree()
- cs.selector = Syn.Case(state)
- // ... where it invokes the action tied with the rule...
- action = r.action
- if action
- cs.append( {[] ^~action(tree, text)} )
- end
- // ... and possibly changes the state of the parser ...
- cs.append( {[] ^~self.setState(^~(r.target))} )
- sw.append(cs)
- end
- dflt = SynTree()
- dflt.append( {[] printl("Default: ", state)} )
- sw.append( dflt )
- return sw
- end
- function currentState()
- if self._stateList
- return self._stateList[-1]
- end
- return "<EMPTY>"
- end
- function setState( newState )
- self.trace("setState", newState )
- states = newState.split(";")
- for s in states
- switch s
- case "#pop"
- self.popState()
- case "#stay"
- // do nothing
- default
- if s notin states
- self.error("State '" + s + "' not found")
- else
- self.trace("Descending in state", s)
- self._stateList += s
- end
- end
- end
- end
- function popState()
- if not self._stateList
- self.error("Pop state without states.")
- else
- self._stateList.remove(-1)
- end
- end
- function error( s )
- raise ParserError( 10001, "Parser Error", s)
- end
- function trace()
- >> "[P-TRC] "
- ^[fself.params] {(v) >>v, " "}
- printl()
- end
- function trace2()
- >> "[P-TR2] "
- ^[fself.params] {(v) >>v, " "}
- printl()
- end
- end
- //=====================================================================
- // Callbacks
- //=====================================================================
- // This is an action that creates a closure that will act on the tree
- // It's purpose it's that of creating a node of type 'name', and
- // eventually transform the input text via regex/subst pair.
- function addBelow( name, regex, subst )
- return { tree, text =>
- if regex
- text = regex.replace(text, subst)
- end
- newNode = Node(name, text)
- tree.current.appendChild(newNode)
- tree.current = newNode
- }
- end
- // As above, but adds a sibling to the current node
- function addBeside(name, regex, subst )
- return { tree, text =>
- if regex
- text = regex.replace(text, subst)
- end
- newNode = Node(name, text)
- tree.current.insertAfter(newNode)
- tree.current = newNode
- }
- end
- // sets the current node as the parent of the current node
- function popNode(tree)
- if tree.current.parent == nil
- raise ParseError( 10002, "Popping top node in tree" )
- end
- tree.current = tree.current.parent
- end
- // Adds content to the currently active node (or fills it's content field)
- function addToContent(tree, text)
- if tree.current.content
- tree.current.content += text
- else
- tree.current.content = text + ""
- end
- end
- //=====================================================================
- // Rules
- //=====================================================================
- x = Parser()
- // shortcut
- PState = x.add
- PState( "start",
- // AddBelow will actually create a function that will be responsible
- // for adding the text below the forming tree
- PRule( "/*#", "faldoc_text", addBelow("FaldocBlock") ),
- PRule( "//#", "faldoc_text_eol", addBelow("FaldocBlock") ),
- // addToContent doesn't need configuration, so we can use it directly
- PRule( nil, "#stay", addToContent )
- )
- base_faldocText = .[
- PRule( r'@b\s\w+\b', "#stay", addBeside("b", r'@b\s+(\w)', '\1') )
- // the action is optional
- PRule("\\", "ignore")
- PRule(nil, "#stay", addBeside("text") )
- ]
- PState( "faldoc_text",
- base_faldocText,
- PRule( '*/', "#pop", popNode )
- )
- PState( "faldoc_text_eol",
- base_faldocText,
- PRule( "\n", "#pop", popNode )
- )
- PState( "ignore",
- PRule( nil, "#pop", addToContent )
- )
- //=====================================================================
- // Test
- //=====================================================================
- tree = x.parse( "Ignored text /*# hello world @b again! */ other text to be ignored." )
- for node in tree.top
- > "NODE: ", node.type, ": ", node.content
- end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement