Untitled

import Syn.*
import Node,Tree from struct.tree

// A simple collection of 3 elements.
// token -- a token or RE that is used to break up the input
// target -- The target status in which the parser must be when the token is found
// action -- The thing to be done (a function) with the input text and the ongoing parsing
class PRule( token, target, action )
   token = token
   target = target
   action = action
end

// A simple error
class ParserError( code, desc, extra ) from Error(code, desc, extra )
end


// The work horse class
class Parser
   // states
   states = [=>]

   // The parser status will be set here.
   tree = nil

   // This are temporary status used internally
   tokenMap = nil
   reMap = nil
   nilStates = nil
   _stateList = nil

   // This method is used to add a rule to a state.
   // It's actually a variable parameter function, with r assuming the value
   // of each rule passed after the state name, in turn.
   // r can also be an array of rules.
   function add( stateName, r )
      if r.typeId == ArrayType
         for elem in r
            self.add(stateName, elem )
         end
      else
         if not r.derivedFrom(PRule)
            raise ParamError(10001, "Parameter error", "Rule is not a PRule instance")
         end

         state = self.states.find(stateName, nil)
         if not state
            self.states[s] = [r]
         else
            state += r
         end
      end

      fself.redo()
   end


   // Start parsing
   // Test can also be a stream.
   function parse( text, enc )
      // reset the maps
      self.tokenMap = [=>]
      self.reMap = [=>]
      self.nilStates = [=>]
      self._stateList = []

      self.prepareMaps()
      mt = MultiTokenizer(text, enc)
      tree = Tree()
      self.prepareParsing(mt, tree)
      self.setState("start")
      ^[m]

      return tree
   end

   //==========================================================
   // Private part
   //

   // This function reorganizes the states and the rules BY TOKEN.
   // Rules are more easily declared by rule, but then, we need
   // to know what to do when we find a token; so we reorder the
   // states and their rules by token.
   function prepareMaps()
      for state, seq in self.states
         for r in seq
            if r.token.typeId == StringType
               stateMap = self.tokenMap.find(r.token,nil)
               if stateMap
                  stateMap[state] = r
               else
                  self.tokenMap[r.token] = [state => r]
               end
            elif r.token == nil
               self.nilStates[state] = r
            else
               stateMap = self.reMap.find(r.token.pattern,nil)
               if stateMap
                  stateMap[state] = r
               else
                  self.reMap[r.token.pattern] = [state => r]
               end
            end
         end
      end
   end


   // This method fills the MultiTokenizer with callbacks that act on the forming syntactic tree.
   // The calbacks are generated on the fly, filling them with dynamic code.
   // The workhorse for that is the next function.
   function prepareParsing( mt, tree )

      for token, stateMap in self.tokenMap
         self.trace( "Generating parser for token", token.escape() )
         sw = self.prepareSwitch( stateMap, "state" )
         func = {(text) state = ^~self.currentState(); tree = ^~tree; ^~sw() }
         self.trace2( func.render() )
         mt.addToken( token, func )
      end

      for re, stateMap in self.reMap
         self.trace( "Generating parser for re", re.escape() )
         sw = self.prepareSwitch( stateMap, "state" )
         self.trace2( sw.render() )
         func = {(text) state = ^~self.currentState(); tree = ^~tree; ^~sw() }
         mt.addRE( re, func )
      end

      self.trace( "Generating parser pure text" )
      sw = self.prepareSwitch( self.nilStates, "state" )
      self.trace2( sw.render() )
      func = {(text) state = ^~self.currentState(); tree = ^~tree; ^~sw() }
      mt.onText = func

   end

   // This function transform a map of "state" => PRule entities
   // into a switch() ... end code that select the correct rule at given state.
   function prepareSwitch( stateMap, stateVar )
      // Generate a switch on state ...
      sw = Syn.Switch( Symbol(stateVar) )

      for state, r in stateMap

         //... in which every state is a branch ...
         cs = SynTree()
         cs.selector = Syn.Case(state)

         // ... where it invokes the action tied with the rule...
         action = r.action
         if action
            cs.append( {[] ^~action(tree, text)} )
         end
         // ... and possibly changes the state of the parser ...
         cs.append( {[] ^~self.setState(^~(r.target))} )

         sw.append(cs)
      end
      dflt = SynTree()
      dflt.append( {[] printl("Default: ", state)} )
      sw.append( dflt )
      return sw
   end

   function currentState()
      if self._stateList
         return self._stateList[-1]
      end
      return "<EMPTY>"
   end

   function setState( newState )
      self.trace("setState", newState )

      states = newState.split(";")
      for s in states
         switch s
            case "#pop"
               self.popState()
            case "#stay"
               // do nothing
            default
               if s notin states
                  self.error("State '" + s + "' not found")
               else
                  self.trace("Descending in state", s)
                  self._stateList += s
               end
         end
      end
   end

   function popState()
      if not self._stateList
         self.error("Pop state without states.")
      else
         self._stateList.remove(-1)
      end
   end

   function error( s )
      raise ParserError( 10001, "Parser Error", s)
   end

   function trace()
      >> "[P-TRC] "
      ^[fself.params] {(v) >>v, " "}
      printl()
   end

   function trace2()
      >> "[P-TR2] "
      ^[fself.params] {(v) >>v, " "}
      printl()
   end
end

//=====================================================================
// Callbacks
//=====================================================================

// This is an action that creates a closure that will act on the tree
// It's purpose it's that of creating a node of type 'name', and
// eventually transform the input text via regex/subst pair.
function addBelow( name, regex, subst )
   return { tree, text =>
      if regex
         text = regex.replace(text, subst)
      end
      newNode = Node(name, text)
      tree.current.appendChild(newNode)
      tree.current = newNode
   }
end

// As above, but adds a sibling to the current node
function addBeside(name, regex, subst )
   return { tree, text =>
      if regex
         text = regex.replace(text, subst)
      end
      newNode = Node(name, text)
      tree.current.insertAfter(newNode)
      tree.current = newNode
      }
end

// sets the current node as the parent of the current node
function popNode(tree)
   if tree.current.parent == nil
      raise ParseError( 10002, "Popping top node in tree" )
   end
   tree.current = tree.current.parent
end

// Adds content to the currently active node (or fills it's content field)
function addToContent(tree, text)
   if tree.current.content
      tree.current.content += text
   else
      tree.current.content = text + ""
   end
end

//=====================================================================
// Rules
//=====================================================================

x = Parser()
// shortcut
PState = x.add

PState( "start",
   // AddBelow will actually create a function that will be responsible
   // for adding the text below the forming tree
   PRule( "/*#", "faldoc_text", addBelow("FaldocBlock") ),
   PRule( "//#", "faldoc_text_eol", addBelow("FaldocBlock") ),
   // addToContent doesn't need configuration, so we can use it directly
   PRule( nil, "#stay", addToContent )
)

base_faldocText = .[
   PRule( r'@b\s\w+\b', "#stay", addBeside("b", r'@b\s+(\w)', '\1') )
   // the action is optional
   PRule("\\", "ignore")
   PRule(nil, "#stay", addBeside("text") )
   ]

PState( "faldoc_text",
   base_faldocText,
   PRule( '*/', "#pop", popNode )
)

PState( "faldoc_text_eol",
   base_faldocText,
   PRule( "\n", "#pop", popNode )
)

PState( "ignore",
   PRule( nil, "#pop", addToContent )
)


//=====================================================================
// Test
//=====================================================================

tree = x.parse( "Ignored text /*# hello world @b again! */ other text to be ignored." )

for node in tree.top
   > "NODE: ", node.type, ": ", node.content
end