Untitled

#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <iostream>
#include <list>
#include <vector>
#include <map>
#include <regex>
#include <assert.h>
#include <fstream>
#include <set>

#include <readline/readline.h>
#include <readline/history.h>
#include <boost/regex.hpp>
using namespace std;

class Tokenizer {
public:

    Tokenizer() {}


struct TokenExpr {
    boost::regex TOKEN_STRING {"\".*?\""};
    boost::regex TOKEN_PARAM {"\\(.*?\\)"};
    boost::regex TOKEN_BRACKETS {"\\{.*?\\}"};
    boost::regex TOKEN_FILE {"\\w*\\.\\w*"};
    boost::regex TOKEN_WORD {"[^\\W\\s]\\w+"};
    boost::regex TOKEN_INT {"\\b\\d*?\\.*?\\d?\\b"};
    boost::regex TOKEN_DASH {"-\\b\\w+\\s"};
    boost::regex TOKEN_EQUAL {"="};
    boost::regex TOKEN_QUOTE {"\""};
    boost::regex TOKEN_COMMA {","};
    boost::regex TOKEN_OP {"(\\+|-|\\*|\\/|%)"};
    boost::regex TOKEN_PIPE {"\\|"};
    boost::regex TOKEN_AMPERSAND {"\\&"};
    boost::regex TOKEN_PAREN_LEFT {"\\("};
    boost::regex TOKEN_PAREN_RIGHT {"\\)"};
    } te;

    struct TokenType {
        string TOKEN;
    } tt;

    TokenType TOKEN_STRING = {"STRING"},
            TOKEN_PARAM = {"PARAM"},
            TOKEN_BRACKETS = {"BRACKETS"},
            TOKEN_FILE = {"FILE"},
            TOKEN_WORD = {"WORD"},
            TOKEN_INT = {"INT"},
            TOKEN_DASH = {"DASH"},
            TOKEN_EQUAL = {"EQUAL"},
            TOKEN_QUOTE = {"QUOTE"},
            TOKEN_COMMA = {"COMMA"},
            TOKEN_OP = {"OP"},
            TOKEN_PIPE = {"PIPE"},
            TOKEN_AMPERSAND = {"AMPERSAND"},
            TOKEN_PAREN_LEFT = {"PAREN_LEFT"},
            TOKEN_PAREN_RIGHT = {"PAREN_RIGHT"};


    struct TokenMap {
        string str;
        int begin_pos;
        int end_pos;
        int index;
        TokenType tt;
    };


    TokenMap tmap;


    TokenType get_token_type(string token) {
        string::const_iterator start, end;
        start = token.begin();
        end = token.end();
        boost::match_results<string::const_iterator> what;
        while (regex_search(start, end, what, te.TOKEN_STRING)) {
            cout << "TOKEN_STRING" << endl;
            start = what[0].second;
            return TOKEN_STRING;
        }
        while (regex_search(start, end, what, te.TOKEN_PARAM)) {
            cout << "TOKEN_PARAM" << endl;
            start = what[0].second;
            return TOKEN_PARAM;
        }
        while (regex_search(start, end, what, te.TOKEN_BRACKETS)) {
            cout << "TOKEN_BRACKETS" << endl;
            start = what[0].second;
            return TOKEN_BRACKETS;
        }
        while (regex_search(start, end, what, te.TOKEN_FILE)) {
            cout << "TOKEN_FILE" << endl;
            start = what[0].second;
            return TOKEN_FILE;
        }
        while (regex_search(start, end, what, te.TOKEN_WORD)) {
            cout << "TOKEN_WORD" << endl;
            start = what[0].second;
            return TOKEN_WORD;
        }
        while (regex_search(start, end, what, te.TOKEN_INT)) {
            cout << "TOKEN_INT" << endl;
            start = what[0].second;
            return TOKEN_INT;
        }
        while (regex_search(start, end, what, te.TOKEN_DASH)) {
            cout << "TOKEN_DASH" << endl;
            start = what[0].second;
            return TOKEN_DASH;
        }
        while (regex_search(start, end, what, te.TOKEN_EQUAL)) {
            cout << "TOKEN_EQUAL" << endl;
            start = what[0].second;
            return TOKEN_EQUAL;
        }
        while (regex_search(start, end, what, te.TOKEN_QUOTE)) {
            cout << "TOKEN_QUOTE" << endl;
            start = what[0].second;
            return TOKEN_QUOTE;
        }
        while (regex_search(start, end, what, te.TOKEN_COMMA)) {
            cout << "TOKEN_COMMA" << endl;
            start = what[0].second;
            return TOKEN_COMMA;
        }
        while (regex_search(start, end, what, te.TOKEN_OP)) {
            cout << "TOKEN_OP" << endl;
            start = what[0].second;
            return TOKEN_OP;
        }
        while (regex_search(start, end, what, te.TOKEN_PIPE)) {
            cout << "TOKEN_PIPE" << endl;
            start = what[0].second;
            return TOKEN_PIPE;
        }
        while (regex_search(start, end, what, te.TOKEN_AMPERSAND)) {
            cout << "TOKEN_AMPERSAND" << endl;
            start = what[0].second;
            return TOKEN_AMPERSAND;
        }
        while (regex_search(start, end, what, te.TOKEN_PAREN_LEFT)) {
            cout << "TOKEN_PAREN_LEFT" << endl;
            start = what[0].second;
            return TOKEN_PAREN_LEFT;
        }
        while (regex_search(start, end, what, te.TOKEN_PAREN_RIGHT)) {
            cout << "TOKEN_PAREN_RIGHT" << endl;
            start = what[0].second;
            return TOKEN_PAREN_RIGHT;
        }
    }

    void tokenize(char* s) {

        str_ = string(s);
        vector<string> vec;
        boost::regex re {("\\w+|\\W")};
        boost::sregex_token_iterator i(str_.begin(), str_.end(), re);
        boost::sregex_token_iterator j;
        string t;
        string str = "";
        TokenType last_type;

        bool QUOTED = false;
        bool PARENS = false;
        bool BRACKETS = false;
        unsigned count = 0;
        int first_pos = 0;
        while (i != j) {
            if (*i != " " && *i != "\"" && *i != "(" && *i != ")" && *i != "{"
                    && *i != "}" && QUOTED == false && PARENS == false && BRACKETS == false) {
                vec.push_back(*i);
                t = *i;
                tmap.str = *i;
                tmap.begin_pos = first_pos;
                tmap.end_pos = first_pos + t.size();
                tmap.index++;
                tmap.tt = get_token_type(t);
                last_type = tmap.tt;
                i++;
                count++;
            } else if ((*i != "\"" && QUOTED == true)
                        || *i != ")" && PARENS == true
                        || *i != "}" && BRACKETS == true) {
                str += *i;
                i++;
            } else if ((*i == "\"" && QUOTED == true)
                        || *i == ")" && PARENS == true
                        || *i == "}" && BRACKETS == true) {
                str += *i;
                vec.push_back(str);
                tmap.str = str;
                tmap.begin_pos = first_pos;
                tmap.end_pos = first_pos + str.size();
                tmap.index++;
                tmap.tt = get_token_type(str);
                last_type = tmap.tt;
                i++;
                count++;
                QUOTED = false;
                PARENS = false;
                BRACKETS = false;
                str = "";
            } else if (*i == "\"" && QUOTED == false) {
                str += *i;
                QUOTED = true;
                i++;
            } else if (*i == "(" && PARENS == false) {
                str += *i;
                PARENS = true;
                i++;
            } else if (*i == "{" && BRACKETS == false) {
                str += *i;
                BRACKETS = true;
                i++;
            } else {
                i++;
            }
        }

        for (int n=0;n<vec.size();n++) {
            cout << vec[n] << endl;
        }
        cout << "There were " << count << " tokens found." << endl;
        prompt_token_list = vec;
    }


private:
    string str_;
    unsigned count;
    vector<string> prompt_token_list;
};


int main()
{
    char* buf;
    Tokenizer tk;
    while ((buf = readline("Shell>> ")) != nullptr) {
        if (strlen(buf) > 0) {
            add_history(buf);
            tk.tokenize(buf);
        }
    }
}