Advertisement
Guest User

Untitled

a guest
May 13th, 2015
218
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 4.58 KB | None | 0 0
  1. //  Copyright (c) 2001-2010 Hartmut Kaiser
  2. // but butchered by me
  3.  
  4. // #define BOOST_SPIRIT_LEXERTL_DEBUG
  5. #define BOOST_VARIANT_MINIMIZE_SIZE
  6.  
  7. #include <boost/bind.hpp>
  8. #include <boost/config/warning_disable.hpp>
  9. //[wcp_includes
  10. #include <boost/spirit/include/qi.hpp>
  11. #include <boost/spirit/include/lex_lexertl.hpp>
  12. #include <boost/spirit/include/phoenix_operator.hpp>
  13. #include <boost/spirit/include/phoenix_statement.hpp>
  14. #include <boost/spirit/include/phoenix_container.hpp>
  15. //]
  16.  
  17. #include <iostream>
  18. #include <string>
  19.  
  20. //[wcp_namespaces
  21. namespace sp = boost::spirit;
  22. using namespace boost::spirit::ascii;
  23. //]
  24.  
  25. ///////////////////////////////////////////////////////////////////////////////
  26. //  Token definition: We use the lexertl based lexer engine as the underlying
  27. //                    lexer type.
  28. ///////////////////////////////////////////////////////////////////////////////
  29. //[wcp_token_ids
  30. enum tokenids
  31. {
  32.     IDANY = sp::lex::min_token_id + 10
  33. };
  34. //]
  35.  
  36. template <typename Lexer>
  37. struct word_count_tokens : sp::lex::lexer<Lexer>
  38. {
  39.     word_count_tokens()
  40.     {
  41.         // define patterns (lexer macros) to be used during token definition
  42.         // below
  43.         this->self.add_pattern
  44.             ("WORD", "[^ \t\n]+")
  45.         ;
  46.  
  47.         // define tokens and associate them with the lexer
  48.         word = "{WORD}";    // reference the pattern 'WORD' as defined above
  49.  
  50.         // this lexer will recognize 3 token types: words, newlines, and
  51.         // everything else
  52.         this->self.add
  53.             (word)          // no token id is needed here
  54.             ('\n')          // characters are usable as tokens as well
  55.             (".", IDANY)    // string literals will not be escaped by the library
  56.         ;
  57.     }
  58.  
  59.     // the token 'word' exposes the matched string as its parser attribute
  60.     sp::lex::token_def<std::string> word;
  61. };
  62.  
  63. ///////////////////////////////////////////////////////////////////////////////
  64. //  Grammar definition
  65. ///////////////////////////////////////////////////////////////////////////////
  66. template <typename Iterator>
  67. struct word_count_grammar : sp::qi::grammar<Iterator>
  68. {
  69.     template <typename TokenDef>
  70.     word_count_grammar(TokenDef const& tok)
  71.       : word_count_grammar::base_type(start)
  72.       , c(0), w(0), l(0)
  73.     {
  74.         using boost::phoenix::ref;
  75.         using boost::phoenix::size;
  76.  
  77.         start =  *(   tok.word          [++ref(w), ref(c) += size(boost::spirit::_1)]
  78.                   |   sp::lit('\n')         [++ref(c), ++ref(l)]
  79.                   |   sp::qi::token(IDANY)  [++ref(c), boost::bind(&word_count_grammar::idany_sem_action, this)]
  80.                   )
  81.               ;
  82.     }
  83.  
  84.     // Next step: Add arguments as described in 'Lexer semantic Actions'
  85.     void idany_sem_action()
  86.     {
  87.         std::cout << "found idany!" << std::endl;
  88.     }
  89.  
  90.     std::size_t c, w, l;
  91.     sp::qi::rule<Iterator> start;
  92. };
  93.  
  94. ///////////////////////////////////////////////////////////////////////////////
  95. int main(int argc, char* argv[])
  96. {
  97. /*<  Define the token type to be used: `std::string` is available as the
  98.      type of the token attribute
  99. >*/  typedef sp::lex::lexertl::token<
  100.         char const*, boost::mpl::vector<std::string>
  101.     > token_type;
  102.  
  103. /*<  Define the lexer type to use implementing the state machine
  104. >*/  typedef sp::lex::lexertl::lexer<token_type> lexer_type;
  105.  
  106. /*<  Define the iterator type exposed by the lexer type
  107. >*/  typedef word_count_tokens<lexer_type>::iterator_type iterator_type;
  108.  
  109.     // now we use the types defined above to create the lexer and grammar
  110.     // object instances needed to invoke the parsing process
  111.     word_count_tokens<lexer_type> word_count;          // Our lexer
  112.     word_count_grammar<iterator_type> g (word_count);  // Our parser
  113.  
  114.     // read in the file int memory
  115.     std::string str ("one two three\nfour");
  116.     char const* first = str.c_str();
  117.     char const* last = &first[str.size()];
  118.  
  119. /*<  Parsing is done based on the token stream, not the character
  120.      stream read from the input. The function `tokenize_and_parse()` wraps
  121.      the passed iterator range `[first, last)` by the lexical analyzer and
  122.      uses its exposed iterators to parse the token stream.
  123. >*/  bool r = sp::lex::tokenize_and_parse(first, last, word_count, g);
  124.  
  125.     if (r) {
  126.         std::cout << "lines: " << g.l << ", words: " << g.w
  127.                   << ", characters: " << g.c << "\n";
  128.     }
  129.     else {
  130.         std::string rest(first, last);
  131.         std::cerr << "Parsing failed\n" << "stopped at: \""
  132.                   << rest << "\"\n";
  133.     }
  134.     return 0;
  135. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement