Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // Copyright (c) 2001-2010 Hartmut Kaiser
- // but butchered by me
- // #define BOOST_SPIRIT_LEXERTL_DEBUG
- #define BOOST_VARIANT_MINIMIZE_SIZE
- #include <boost/bind.hpp>
- #include <boost/config/warning_disable.hpp>
- //[wcp_includes
- #include <boost/spirit/include/qi.hpp>
- #include <boost/spirit/include/lex_lexertl.hpp>
- #include <boost/spirit/include/phoenix_operator.hpp>
- #include <boost/spirit/include/phoenix_statement.hpp>
- #include <boost/spirit/include/phoenix_container.hpp>
- //]
- #include <iostream>
- #include <string>
- //[wcp_namespaces
- namespace sp = boost::spirit;
- using namespace boost::spirit::ascii;
- //]
- ///////////////////////////////////////////////////////////////////////////////
- // Token definition: We use the lexertl based lexer engine as the underlying
- // lexer type.
- ///////////////////////////////////////////////////////////////////////////////
- //[wcp_token_ids
- enum tokenids
- {
- IDANY = sp::lex::min_token_id + 10
- };
- //]
- template <typename Lexer>
- struct word_count_tokens : sp::lex::lexer<Lexer>
- {
- word_count_tokens()
- {
- // define patterns (lexer macros) to be used during token definition
- // below
- this->self.add_pattern
- ("WORD", "[^ \t\n]+")
- ;
- // define tokens and associate them with the lexer
- word = "{WORD}"; // reference the pattern 'WORD' as defined above
- // this lexer will recognize 3 token types: words, newlines, and
- // everything else
- this->self.add
- (word) // no token id is needed here
- ('\n') // characters are usable as tokens as well
- (".", IDANY) // string literals will not be escaped by the library
- ;
- }
- // the token 'word' exposes the matched string as its parser attribute
- sp::lex::token_def<std::string> word;
- };
- ///////////////////////////////////////////////////////////////////////////////
- // Grammar definition
- ///////////////////////////////////////////////////////////////////////////////
- template <typename Iterator>
- struct word_count_grammar : sp::qi::grammar<Iterator>
- {
- template <typename TokenDef>
- word_count_grammar(TokenDef const& tok)
- : word_count_grammar::base_type(start)
- , c(0), w(0), l(0)
- {
- using boost::phoenix::ref;
- using boost::phoenix::size;
- start = *( tok.word [++ref(w), ref(c) += size(boost::spirit::_1)]
- | sp::lit('\n') [++ref(c), ++ref(l)]
- | sp::qi::token(IDANY) [++ref(c), boost::bind(&word_count_grammar::idany_sem_action, this)]
- )
- ;
- }
- // Next step: Add arguments as described in 'Lexer semantic Actions'
- void idany_sem_action()
- {
- std::cout << "found idany!" << std::endl;
- }
- std::size_t c, w, l;
- sp::qi::rule<Iterator> start;
- };
- ///////////////////////////////////////////////////////////////////////////////
- int main(int argc, char* argv[])
- {
- /*< Define the token type to be used: `std::string` is available as the
- type of the token attribute
- >*/ typedef sp::lex::lexertl::token<
- char const*, boost::mpl::vector<std::string>
- > token_type;
- /*< Define the lexer type to use implementing the state machine
- >*/ typedef sp::lex::lexertl::lexer<token_type> lexer_type;
- /*< Define the iterator type exposed by the lexer type
- >*/ typedef word_count_tokens<lexer_type>::iterator_type iterator_type;
- // now we use the types defined above to create the lexer and grammar
- // object instances needed to invoke the parsing process
- word_count_tokens<lexer_type> word_count; // Our lexer
- word_count_grammar<iterator_type> g (word_count); // Our parser
- // read in the file int memory
- std::string str ("one two three\nfour");
- char const* first = str.c_str();
- char const* last = &first[str.size()];
- /*< Parsing is done based on the token stream, not the character
- stream read from the input. The function `tokenize_and_parse()` wraps
- the passed iterator range `[first, last)` by the lexical analyzer and
- uses its exposed iterators to parse the token stream.
- >*/ bool r = sp::lex::tokenize_and_parse(first, last, word_count, g);
- if (r) {
- std::cout << "lines: " << g.l << ", words: " << g.w
- << ", characters: " << g.c << "\n";
- }
- else {
- std::string rest(first, last);
- std::cerr << "Parsing failed\n" << "stopped at: \""
- << rest << "\"\n";
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement