Untitled

//  Copyright (c) 2001-2010 Hartmut Kaiser
// but butchered by me

// #define BOOST_SPIRIT_LEXERTL_DEBUG
#define BOOST_VARIANT_MINIMIZE_SIZE

#include <boost/bind.hpp>
#include <boost/config/warning_disable.hpp>
//[wcp_includes
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_container.hpp>
//]

#include <iostream>
#include <string>

//[wcp_namespaces
namespace sp = boost::spirit;
using namespace boost::spirit::ascii;
//]

///////////////////////////////////////////////////////////////////////////////
//  Token definition: We use the lexertl based lexer engine as the underlying
//                    lexer type.
///////////////////////////////////////////////////////////////////////////////
//[wcp_token_ids
enum tokenids
{
    IDANY = sp::lex::min_token_id + 10
};
//]

template <typename Lexer>
struct word_count_tokens : sp::lex::lexer<Lexer>
{
    word_count_tokens()
    {
        // define patterns (lexer macros) to be used during token definition
        // below
        this->self.add_pattern
            ("WORD", "[^ \t\n]+")
        ;

        // define tokens and associate them with the lexer
        word = "{WORD}";    // reference the pattern 'WORD' as defined above

        // this lexer will recognize 3 token types: words, newlines, and
        // everything else
        this->self.add
            (word)          // no token id is needed here
            ('\n')          // characters are usable as tokens as well
            (".", IDANY)    // string literals will not be escaped by the library
        ;
    }

    // the token 'word' exposes the matched string as its parser attribute
    sp::lex::token_def<std::string> word;
};

///////////////////////////////////////////////////////////////////////////////
//  Grammar definition
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator>
struct word_count_grammar : sp::qi::grammar<Iterator>
{
    template <typename TokenDef>
    word_count_grammar(TokenDef const& tok)
      : word_count_grammar::base_type(start)
      , c(0), w(0), l(0)
    {
        using boost::phoenix::ref;
        using boost::phoenix::size;

        start =  *(   tok.word          [++ref(w), ref(c) += size(boost::spirit::_1)]
                  |   sp::lit('\n')         [++ref(c), ++ref(l)]
                  |   sp::qi::token(IDANY)  [++ref(c), boost::bind(&word_count_grammar::idany_sem_action, this)]
                  )
              ;
    }

    // Next step: Add arguments as described in 'Lexer semantic Actions'
    void idany_sem_action()
    {
        std::cout << "found idany!" << std::endl;
    }

    std::size_t c, w, l;
    sp::qi::rule<Iterator> start;
};

///////////////////////////////////////////////////////////////////////////////
int main(int argc, char* argv[])
{
/*<  Define the token type to be used: `std::string` is available as the
     type of the token attribute
>*/  typedef sp::lex::lexertl::token<
        char const*, boost::mpl::vector<std::string>
    > token_type;

/*<  Define the lexer type to use implementing the state machine
>*/  typedef sp::lex::lexertl::lexer<token_type> lexer_type;

/*<  Define the iterator type exposed by the lexer type
>*/  typedef word_count_tokens<lexer_type>::iterator_type iterator_type;

    // now we use the types defined above to create the lexer and grammar
    // object instances needed to invoke the parsing process
    word_count_tokens<lexer_type> word_count;          // Our lexer
    word_count_grammar<iterator_type> g (word_count);  // Our parser

    // read in the file int memory
    std::string str ("one two three\nfour");
    char const* first = str.c_str();
    char const* last = &first[str.size()];

/*<  Parsing is done based on the token stream, not the character
     stream read from the input. The function `tokenize_and_parse()` wraps
     the passed iterator range `[first, last)` by the lexical analyzer and
     uses its exposed iterators to parse the token stream.
>*/  bool r = sp::lex::tokenize_and_parse(first, last, word_count, g);

    if (r) {
        std::cout << "lines: " << g.l << ", words: " << g.w
                  << ", characters: " << g.c << "\n";
    }
    else {
        std::string rest(first, last);
        std::cerr << "Parsing failed\n" << "stopped at: \""
                  << rest << "\"\n";
    }
    return 0;
}