Advertisement
Guest User

Untitled

a guest
May 5th, 2014
365
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Falcon 9.02 KB | None | 0 0
  1. import Syn.*
  2. import Node,Tree from struct.tree
  3.  
  4. // A simple collection of 3 elements.
  5. // token -- a token or RE that is used to break up the input
  6. // target -- The target status in which the parser must be when the token is found
  7. // action -- The thing to be done (a function) with the input text and the ongoing parsing
  8. class PRule( token, target, action )
  9.    token = token
  10.    target = target
  11.    action = action
  12. end
  13.  
  14. // A simple error
  15. class ParserError( code, desc, extra ) from Error(code, desc, extra )
  16. end
  17.  
  18.  
  19. // The work horse class
  20. class Parser
  21.    // states
  22.    states = [=>]
  23.    
  24.    // The parser status will be set here.
  25.    tree = nil
  26.      
  27.    // This are temporary status used internally
  28.    tokenMap = nil
  29.    reMap = nil
  30.    nilStates = nil
  31.    _stateList = nil
  32.  
  33.    // This method is used to add a rule to a state.
  34.    // It's actually a variable parameter function, with r assuming the value
  35.    // of each rule passed after the state name, in turn.
  36.    // r can also be an array of rules.
  37.    function add( stateName, r )
  38.       if r.typeId == ArrayType
  39.          for elem in r
  40.             self.add(stateName, elem )
  41.          end
  42.       else
  43.          if not r.derivedFrom(PRule)
  44.             raise ParamError(10001, "Parameter error", "Rule is not a PRule instance")
  45.          end
  46.          
  47.          state = self.states.find(stateName, nil)
  48.          if not state
  49.             self.states[s] = [r]
  50.          else
  51.             state += r
  52.          end
  53.       end
  54.      
  55.       fself.redo()
  56.    end
  57.    
  58.  
  59.    // Start parsing
  60.    // Test can also be a stream.
  61.    function parse( text, enc )
  62.       // reset the maps
  63.       self.tokenMap = [=>]
  64.       self.reMap = [=>]
  65.       self.nilStates = [=>]
  66.       self._stateList = []
  67.  
  68.       self.prepareMaps()
  69.       mt = MultiTokenizer(text, enc)
  70.       tree = Tree()
  71.       self.prepareParsing(mt, tree)
  72.       self.setState("start")
  73.       ^[m]
  74.      
  75.       return tree
  76.    end
  77.    
  78.    //==========================================================
  79.    // Private part
  80.    //
  81.    
  82.    // This function reorganizes the states and the rules BY TOKEN.
  83.    // Rules are more easily declared by rule, but then, we need
  84.    // to know what to do when we find a token; so we reorder the
  85.    // states and their rules by token.
  86.    function prepareMaps()
  87.       for state, seq in self.states
  88.          for r in seq
  89.             if r.token.typeId == StringType
  90.                stateMap = self.tokenMap.find(r.token,nil)
  91.                if stateMap
  92.                   stateMap[state] = r
  93.                else
  94.                   self.tokenMap[r.token] = [state => r]
  95.                end
  96.             elif r.token == nil
  97.                self.nilStates[state] = r
  98.             else
  99.                stateMap = self.reMap.find(r.token.pattern,nil)
  100.                if stateMap
  101.                   stateMap[state] = r
  102.                else
  103.                   self.reMap[r.token.pattern] = [state => r]
  104.                end
  105.             end
  106.          end
  107.       end                  
  108.    end
  109.    
  110.    
  111.    // This method fills the MultiTokenizer with callbacks that act on the forming syntactic tree.
  112.    // The calbacks are generated on the fly, filling them with dynamic code.
  113.    // The workhorse for that is the next function.
  114.    function prepareParsing( mt, tree )    
  115.      
  116.       for token, stateMap in self.tokenMap
  117.          self.trace( "Generating parser for token", token.escape() )
  118.          sw = self.prepareSwitch( stateMap, "state" )        
  119.          func = {(text) state = ^~self.currentState(); tree = ^~tree; ^~sw() }
  120.          self.trace2( func.render() )
  121.          mt.addToken( token, func )
  122.       end
  123.      
  124.       for re, stateMap in self.reMap
  125.          self.trace( "Generating parser for re", re.escape() )
  126.          sw = self.prepareSwitch( stateMap, "state" )        
  127.          self.trace2( sw.render() )
  128.          func = {(text) state = ^~self.currentState(); tree = ^~tree; ^~sw() }
  129.          mt.addRE( re, func )
  130.       end
  131.      
  132.       self.trace( "Generating parser pure text" )
  133.       sw = self.prepareSwitch( self.nilStates, "state" )
  134.       self.trace2( sw.render() )
  135.       func = {(text) state = ^~self.currentState(); tree = ^~tree; ^~sw() }
  136.       mt.onText = func
  137.      
  138.    end
  139.    
  140.    // This function transform a map of "state" => PRule entities
  141.    // into a switch() ... end code that select the correct rule at given state.
  142.    function prepareSwitch( stateMap, stateVar )      
  143.       // Generate a switch on state ...
  144.       sw = Syn.Switch( Symbol(stateVar) )
  145.      
  146.       for state, r in stateMap
  147.          
  148.          //... in which every state is a branch ...
  149.          cs = SynTree()
  150.          cs.selector = Syn.Case(state)
  151.          
  152.          // ... where it invokes the action tied with the rule...
  153.          action = r.action
  154.          if action
  155.             cs.append( {[] ^~action(tree, text)} )
  156.          end
  157.          // ... and possibly changes the state of the parser ...
  158.          cs.append( {[] ^~self.setState(^~(r.target))} )
  159.          
  160.          sw.append(cs)
  161.       end
  162.       dflt = SynTree()
  163.       dflt.append( {[] printl("Default: ", state)} )
  164.       sw.append( dflt )
  165.       return sw
  166.    end
  167.    
  168.    function currentState()
  169.       if self._stateList
  170.          return self._stateList[-1]
  171.       end
  172.       return "<EMPTY>"
  173.    end
  174.    
  175.    function setState( newState )
  176.       self.trace("setState", newState )
  177.      
  178.       states = newState.split(";")
  179.       for s in states
  180.          switch s
  181.             case "#pop"
  182.                self.popState()
  183.             case "#stay"
  184.                // do nothing
  185.             default
  186.                if s notin states
  187.                   self.error("State '" + s + "' not found")
  188.                else              
  189.                   self.trace("Descending in state", s)
  190.                   self._stateList += s
  191.                end
  192.          end
  193.       end      
  194.    end
  195.    
  196.    function popState()
  197.       if not self._stateList
  198.          self.error("Pop state without states.")
  199.       else
  200.          self._stateList.remove(-1)
  201.       end
  202.    end
  203.    
  204.    function error( s )
  205.       raise ParserError( 10001, "Parser Error", s)
  206.    end
  207.    
  208.    function trace()
  209.       >> "[P-TRC] "
  210.       ^[fself.params] {(v) >>v, " "}
  211.       printl()
  212.    end
  213.    
  214.    function trace2()
  215.       >> "[P-TR2] "
  216.       ^[fself.params] {(v) >>v, " "}
  217.       printl()
  218.    end
  219. end
  220.  
  221. //=====================================================================
  222. // Callbacks
  223. //=====================================================================
  224.  
  225. // This is an action that creates a closure that will act on the tree
  226. // It's purpose it's that of creating a node of type 'name', and
  227. // eventually transform the input text via regex/subst pair.
  228. function addBelow( name, regex, subst )
  229.    return { tree, text =>
  230.       if regex
  231.          text = regex.replace(text, subst)
  232.       end
  233.       newNode = Node(name, text)
  234.       tree.current.appendChild(newNode)
  235.       tree.current = newNode
  236.    }
  237. end
  238.  
  239. // As above, but adds a sibling to the current node
  240. function addBeside(name, regex, subst )
  241.    return { tree, text =>
  242.       if regex
  243.          text = regex.replace(text, subst)
  244.       end
  245.       newNode = Node(name, text)
  246.       tree.current.insertAfter(newNode)
  247.       tree.current = newNode
  248.       }
  249. end
  250.  
  251. // sets the current node as the parent of the current node
  252. function popNode(tree)
  253.    if tree.current.parent == nil
  254.       raise ParseError( 10002, "Popping top node in tree" )
  255.    end
  256.    tree.current = tree.current.parent
  257. end
  258.  
  259. // Adds content to the currently active node (or fills it's content field)
  260. function addToContent(tree, text)
  261.    if tree.current.content
  262.       tree.current.content += text
  263.    else
  264.       tree.current.content = text + ""
  265.    end
  266. end
  267.  
  268. //=====================================================================
  269. // Rules
  270. //=====================================================================
  271.  
  272. x = Parser()
  273. // shortcut
  274. PState = x.add
  275.  
  276. PState( "start",
  277.    // AddBelow will actually create a function that will be responsible
  278.    // for adding the text below the forming tree
  279.    PRule( "/*#", "faldoc_text", addBelow("FaldocBlock") ),
  280.    PRule( "//#", "faldoc_text_eol", addBelow("FaldocBlock") ),
  281.    // addToContent doesn't need configuration, so we can use it directly
  282.    PRule( nil, "#stay", addToContent )
  283. )
  284.  
  285. base_faldocText = .[
  286.    PRule( r'@b\s\w+\b', "#stay", addBeside("b", r'@b\s+(\w)', '\1') )
  287.    // the action is optional
  288.    PRule("\\", "ignore")
  289.    PRule(nil, "#stay", addBeside("text") )
  290.    ]
  291.  
  292. PState( "faldoc_text",
  293.    base_faldocText,
  294.    PRule( '*/', "#pop", popNode )
  295. )
  296.  
  297. PState( "faldoc_text_eol",
  298.    base_faldocText,
  299.    PRule( "\n", "#pop", popNode )
  300. )
  301.  
  302. PState( "ignore",
  303.    PRule( nil, "#pop", addToContent )
  304. )
  305.  
  306.  
  307. //=====================================================================
  308. // Test
  309. //=====================================================================
  310.  
  311. tree = x.parse( "Ignored text /*# hello world @b again! */ other text to be ignored." )
  312.  
  313. for node in tree.top
  314.    > "NODE: ", node.type, ": ", node.content
  315. end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement