Guest User

boost::spirit::lex example

a guest
Aug 7th, 2011
753
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 5.57 KB | None | 0 0
  1. //  Copyright (c) 2001-2010 Hartmut Kaiser
  2. //
  3. //  Distributed under the Boost Software License, Version 1.0. (See accompanying
  4. //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  5.  
  6. //  This example is the equivalent to the following lex program:
  7. //
  8. //       %{
  9. //       /* INITIAL is the default start state.  COMMENT is our new  */
  10. //       /* state where we remove comments.                          */
  11. //       %}
  12. //
  13. //       %s COMMENT
  14. //       %%
  15. //       <INITIAL>"//".*    ;
  16. //       <INITIAL>"/*"      BEGIN COMMENT;
  17. //       <INITIAL>.         ECHO;
  18. //       <INITIAL>[\n]      ECHO;
  19. //       <COMMENT>"*/"      BEGIN INITIAL;
  20. //       <COMMENT>.         ;
  21. //       <COMMENT>[\n]      ;
  22. //       %%
  23. //
  24. //       main()
  25. //       {
  26. //         yylex();
  27. //       }
  28. //
  29. //  Its purpose is to strip comments out of C code.
  30. //
  31. //  Additionally this example demonstrates the use of lexer states to structure
  32. //  the lexer definition.
  33.  
  34. // #define BOOST_SPIRIT_LEXERTL_DEBUG
  35.  
  36. #include <boost/config/warning_disable.hpp>
  37. #include <boost/spirit/include/lex_lexertl.hpp>
  38. #include <boost/spirit/include/phoenix_operator.hpp>
  39. #include <boost/spirit/include/phoenix_statement.hpp>
  40. #include <boost/spirit/include/phoenix_core.hpp>
  41.  
  42. #include <iostream>
  43. #include <string>
  44.  
  45. #include "example.hpp"
  46.  
  47. using namespace boost::spirit;  
  48.  
  49. ///////////////////////////////////////////////////////////////////////////////
  50. //  Token definition: We use the lexertl based lexer engine as the underlying
  51. //                    lexer type.
  52. ///////////////////////////////////////////////////////////////////////////////
  53. enum tokenids
  54. {
  55.     IDANY = lex::min_token_id + 10,
  56.     IDEOL = lex::min_token_id + 11
  57. };
  58.  
  59. ///////////////////////////////////////////////////////////////////////////////
  60. // Simple custom semantic action function object used to print the matched
  61. // input sequence for a particular token
  62. template <typename Char, typename Traits>
  63. struct echo_input_functor
  64. {
  65.     echo_input_functor (std::basic_ostream<Char, Traits>& os_)
  66.       : os(os_) {}
  67.  
  68.     // This is called by the semantic action handling code during the lexing
  69.     template <typename Iterator, typename Context>
  70.     void operator()(Iterator const& b, Iterator const& e
  71.       , BOOST_SCOPED_ENUM(boost::spirit::lex::pass_flags)&
  72.       , std::size_t&, Context&) const
  73.     {
  74.         os << std::string(b, e);
  75.     }
  76.  
  77.     std::basic_ostream<Char, Traits>& os;
  78. };
  79.  
  80. template <typename Char, typename Traits>
  81. inline echo_input_functor<Char, Traits>
  82. echo_input(std::basic_ostream<Char, Traits>& os)
  83. {
  84.     return echo_input_functor<Char, Traits>(os);
  85. }
  86.  
  87. ///////////////////////////////////////////////////////////////////////////////
  88. // Another simple custom semantic action function object used to switch the
  89. // state of the lexer
  90. struct set_lexer_state
  91. {
  92.     set_lexer_state(char const* state_)
  93.       : state(state_) {}
  94.  
  95.     // This is called by the semantic action handling code during the lexing
  96.     template <typename Iterator, typename Context>
  97.     void operator()(Iterator const&, Iterator const&
  98.       , BOOST_SCOPED_ENUM(boost::spirit::lex::pass_flags)&
  99.       , std::size_t&, Context& ctx) const
  100.     {
  101.         ctx.set_state_name(state.c_str());
  102.     }
  103.  
  104.     std::string state;
  105. };
  106.  
  107. ///////////////////////////////////////////////////////////////////////////////
  108. template <typename Lexer>
  109. struct strip_comments_tokens : lex::lexer<Lexer>
  110. {
  111.     strip_comments_tokens()
  112.       : strip_comments_tokens::base_type(lex::match_flags::match_default)
  113.     {
  114.         // define tokens and associate them with the lexer
  115.         cppcomment = "\\/\\/[^\n]*";    // '//[^\n]*'
  116.         ccomment = "\\/\\*";            // '/*'
  117.         endcomment = "\\*\\/";          // '*/'
  118.         any = ".";
  119.         eol = "\n";
  120.  
  121.         // The following tokens are associated with the default lexer state
  122.         // (the "INITIAL" state). Specifying 'INITIAL' as a lexer state is
  123.         // strictly optional.
  124.         this->self
  125.             =   cppcomment
  126.             |   ccomment    [ set_lexer_state("COMMENT") ]
  127.             |   eol         [ echo_input(std::cout) ]
  128.             |   any         [ echo_input(std::cout) ]
  129.             ;
  130.  
  131.         // The following tokens are associated with the lexer state 'COMMENT'.
  132.         this->self("COMMENT")
  133.             =   endcomment  [ set_lexer_state("INITIAL") ]
  134.             |   "\n"
  135.             |   "."
  136.             ;
  137.     }
  138.  
  139.     lex::token_def<> cppcomment, ccomment, endcomment, any, eol;
  140. };
  141.  
  142.   ///////////////////////////////////////////////////////////////////////////////
  143. int main(int argc, char* argv[])
  144. {
  145.     // iterator type used to expose the underlying input stream
  146.     typedef std::string::iterator base_iterator_type;
  147.  
  148.     // lexer type
  149.     typedef
  150.         lex::lexertl::actor_lexer<lex::lexertl::token<base_iterator_type> >
  151.     lexer_type;
  152.  
  153.     // now we use the types defined above to create the lexer and grammar
  154.     // object instances needed to invoke the parsing process
  155.     strip_comments_tokens<lexer_type> strip_comments;             // Our lexer
  156.  
  157.     // No parsing is done alltogether, everything happens in the lexer semantic
  158.     // actions.
  159.     std::string str (read_from_file(1 == argc ? "strip_comments.input" : argv[1]));
  160.     base_iterator_type first = str.begin();
  161.     bool r = lex::tokenize(first, str.end(), strip_comments);
  162.  
  163.     if (!r) {
  164.         std::string rest(first, str.end());
  165.         std::cerr << "Lexical analysis failed\n" << "stopped at: \""
  166.                   << rest << "\"\n";
  167.     }
  168.     return 0;
  169. }
Advertisement
Add Comment
Please, Sign In to add comment