Advertisement
Guest User

Untitled

a guest
May 4th, 2014
314
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Falcon 8.56 KB | None | 0 0
  1. import Syn.*
  2.  
  3. // A simple collection of 3 elements.
  4. // token -- a token or RE that is used to break up the input
  5. // target -- The target status in which the parser must be when the token is found
  6. // action -- The thing to be done (a function) with the input text and the ongoing parsing
  7. class PRule( token, target, action )
  8.    token = token
  9.    target = target
  10.    action = action
  11. end
  12.  
  13. // A simple error
  14. class ParserError( code, desc, extra ) from Error(code, desc, extra )
  15. end
  16.  
  17.  
  18. // The work horse class
  19. class Parser
  20.    // states
  21.    states = [=>]
  22.    
  23.    // The parser status will be set here.
  24.    tree = nil
  25.      
  26.    // This are temporary status used internally
  27.    tokenMap = nil
  28.    reMap = nil
  29.    nilStates = nil
  30.    _stateList = nil
  31.  
  32.    // This method is used to add a rule to a state.
  33.    // It's actually a variable parameter function, with r assuming the value
  34.    // of each rule passed after the state name, in turn.
  35.    // r can also be an array of rules.
  36.    function add( stateName, r )
  37.       if r.typeId == ArrayType
  38.          for elem in r
  39.             self.add(stateName, elem )
  40.          end
  41.       else
  42.          if not r.derivedFrom(PRule)
  43.             raise ParamError(10001, "Parameter error", "Rule is not a PRule instance")
  44.          end
  45.          
  46.          state = self.states.find(stateName, nil)
  47.          if not state
  48.             self.states[s] = [r]
  49.          else
  50.             state += r
  51.          end
  52.       end
  53.      
  54.       fself.redo()
  55.    end
  56.    
  57.  
  58.    // Start parsing
  59.    // Test can also be a stream.
  60.    function parse( text, enc )
  61.       // reset the maps
  62.       self.tokenMap = [=>]
  63.       self.reMap = [=>]
  64.       self.nilStates = [=>]
  65.       self._stateList = []
  66.  
  67.       self.prepareMaps()
  68.       mt = MultiTokenizer(text, enc)
  69.       self.prepareParsing(mt)
  70.       self.setState("start")
  71.       ^[m]      
  72.    end
  73.    
  74.    //==========================================================
  75.    // Private part
  76.    //
  77.    
  78.    // This function reorganizes the states and the rules BY TOKEN.
  79.    // Rules are more easily declared by rule, but then, we need
  80.    // to know what to do when we find a token; so we reorder the
  81.    // states and their rules by token.
  82.    function prepareMaps()
  83.       for state, seq in self.states
  84.          for r in seq
  85.             if r.token.typeId == StringType
  86.                stateMap = self.tokenMap.find(r.token,nil)
  87.                if stateMap
  88.                   stateMap[state] = r
  89.                else
  90.                   self.tokenMap[r.token] = [state => r]
  91.                end
  92.             elif r.token == nil
  93.                self.nilStates[state] = r
  94.             else
  95.                stateMap = self.reMap.find(r.token.pattern,nil)
  96.                if stateMap
  97.                   stateMap[state] = r
  98.                else
  99.                   self.reMap[r.token.pattern] = [state => r]
  100.                end
  101.             end
  102.          end
  103.       end                  
  104.    end
  105.    
  106.    
  107.    // This method fills the MultiTokenizer with callbacks that act on the forming syntactic tree.
  108.    // The calbacks are generated on the fly, filling them with dynamic code.
  109.    // The workhorse for that is the next function.
  110.    function prepareParsing( mt )    
  111.       this = self
  112.      
  113.       for token, stateMap in self.tokenMap
  114.          self.trace( "Generating parser for token", token.escape() )
  115.          sw = self.prepareSwitch( stateMap, "state" )        
  116.          func = {(text) state = ^~self.currentState(); tree = nil; ^~sw() }
  117.          self.trace2( func.render() )
  118.          mt.addToken( token, func )
  119.       end
  120.      
  121.       for re, stateMap in self.reMap
  122.          self.trace( "Generating parser for re", re.escape() )
  123.          sw = self.prepareSwitch( stateMap, "state" )        
  124.          self.trace2( sw.render() )
  125.          func = {(text) state = ^~self.currentState(); tree = nil; ^~sw() }
  126.          mt.addRE( re, func )
  127.       end
  128.      
  129.       self.trace( "Generating parser pure text" )
  130.       sw = self.prepareSwitch( self.nilStates, "state" )
  131.       self.trace2( sw.render() )
  132.       func = {(text) state = ^~self.currentState(); tree = nil; ^~sw() }
  133.       mt.onText = func
  134.      
  135.    end
  136.    
  137.    // This function transform a map of "state" => PRule entities
  138.    // into a switch() ... end code that select the correct rule at given state.
  139.    function prepareSwitch( stateMap, stateVar )      
  140.       // Generate a switch on state ...
  141.       sw = Syn.Switch( Symbol(stateVar) )
  142.      
  143.       for state, r in stateMap
  144.          
  145.          //... in which every state is a branch ...
  146.          cs = SynTree()
  147.          cs.selector = Syn.Case(state)
  148.          
  149.          // ... where it invokes the action tied with the rule...
  150.          action = r.action
  151.          if action
  152.             cs.append( {[] ^~action(tree, text)} )
  153.          end
  154.          // ... and possibly changes the state of the parser ...
  155.          cs.append( {[] ^~self.setState(^~(r.target))} )
  156.          
  157.          sw.append(cs)
  158.       end
  159.       dflt = SynTree()
  160.       dflt.append( {[] printl("Default: ", state)} )
  161.       sw.append( dflt )
  162.       return sw
  163.    end
  164.    
  165.    function currentState()
  166.       if self._stateList
  167.          return self._stateList[-1]
  168.       end
  169.       return "<EMPTY>"
  170.    end
  171.    
  172.    function setState( newState )
  173.       self.trace("setState", newState )
  174.      
  175.       states = newState.split(";")
  176.       for s in states
  177.          switch s
  178.             case "#pop"
  179.                self.popState()
  180.             case "#stay"
  181.                // do nothing
  182.             default
  183.                if s notin states
  184.                   self.error("State '" + s + "' not found")
  185.                else              
  186.                   self.trace("Descending in state", s)
  187.                   self._stateList += s
  188.                end
  189.          end
  190.       end      
  191.    end
  192.    
  193.    function popState()
  194.       if not self._stateList
  195.          self.error("Pop state without states.")
  196.       else
  197.          self._stateList.remove(-1)
  198.       end
  199.    end
  200.    
  201.    function error( s )
  202.       raise ParserError( 10001, "Parser Error", s)
  203.    end
  204.    
  205.    function trace()
  206.       >> "[P-TRC] "
  207.       ^[fself.params] {(v) >>v, " "}
  208.       printl()
  209.    end
  210.    
  211.    function trace2()
  212.       >> "[P-TR2] "
  213.       ^[fself.params] {(v) >>v, " "}
  214.       printl()
  215.    end
  216. end
  217.  
  218. //=====================================================================
  219. // Callbacks
  220. //=====================================================================
  221.  
  222. // This is an action that creates a closure that will act on the tree
  223. // It's purpose it's that of creating a node of type 'name', and
  224. // eventually transform the input text via regex/subst pair.
  225. function addBelow( name, regex, subst )
  226.    return { tree, text =>
  227.       >> "Below node ", name, ": "
  228.       if regex
  229.          text = regex.replace(text, subst)
  230.       end
  231.       > text
  232.    }
  233. end
  234.  
  235. // As above, but adds a sibling to the current node
  236. function addBeside(name, regex, subst )
  237.    return { tree, text =>
  238.       >> "Beside node ", name, ": "
  239.       if regex
  240.          text = regex.replace(text, subst)
  241.       end
  242.       > text
  243.       }
  244. end
  245.  
  246. // sets the current node as the parent of the current node
  247. function popNode(tree)
  248.    > "Popping current node"
  249. end
  250.  
  251. // Adds content to the currently active node (or fills it's content field)
  252. function addToContent(tree, text)
  253.    > "Adding to Content: ", text
  254. end
  255.  
  256. //=====================================================================
  257. // Rules
  258. //=====================================================================
  259.  
  260. x = Parser()
  261. // shortcut
  262. PState = x.add
  263.  
  264. PState( "start",
  265.    // AddBelow will actually create a function that will be responsible
  266.    // for adding the text below the forming tree
  267.    PRule( "/*#", "faldoc_text", addBelow("FaldocBlock") ),
  268.    PRule( "//#", "faldoc_text_eol", addBelow("FaldocBlock") ),
  269.    // addToContent doesn't need configuration, so we can use it directly
  270.    PRule( nil, "#stay", addToContent )
  271. )
  272.  
  273. base_faldocText = .[
  274.    PRule( r'@b\s.*\s', "#stay", addBeside("b", r'@b\s+(\w)', '\1') )
  275.    // the action is optional
  276.    PRule("\\", "ignore")
  277.    PRule(nil, "#stay", addBeside("text") )
  278.    ]
  279.  
  280. PState( "faldoc_text",
  281.    base_faldocText,
  282.    PRule( '*/', "#pop", popNode )
  283. )
  284.  
  285. PState( "faldoc_text_eol",
  286.    base_faldocText,
  287.    PRule( "\n", "#pop", popNode )
  288. )
  289.  
  290. PState( "ignore",
  291.    PRule( nil, "#pop", addToContent )
  292. )
  293.  
  294.  
  295. //=====================================================================
  296. // Test
  297. //=====================================================================
  298.  
  299. x.parse( "Ignored text /*# hello world @b again! */ other text to be ignored." )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement