Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include <stdexcept>
- #include <string>
- #include <utility>
- #include <memory>
- #include <vector>
- #include <queue>
- #include <cassert>
- #define USER
- /*************************************************************************************************/
- class Error : public std::runtime_error
- {
- public:
- using std::runtime_error::runtime_error;
- };
- class SyntaxError : public Error
- {
- public:
- constexpr const static std::size_t nPos = std::numeric_limits<std::size_t>::max();
- SyntaxError(const std::string& msg, std::size_t tokPosInInput = nPos) :
- Error(BuildMsg(msg, tokPosInInput)) {}
- private:
- std::string BuildMsg(const std::string& msg, std::size_t pos);
- };
- /*************************************************************************************************/
- class Expr
- {
- public:
- constexpr static const std::size_t notMatched = std::numeric_limits<std::size_t>::max();
- virtual ~Expr() = default;
- //Tries to match passed string with regular expression
- //Returns length of matched part or 'notMatched'
- std::size_t Match(const std::string& str, std::size_t begin, std::size_t maxL);
- private:
- //Implements the matching
- virtual std::size_t MatchStr(const std::string& str, std::size_t begin, std::size_t end) = 0;
- };
- using ExprPtr = std::unique_ptr<Expr>;
- class Or : public Expr
- {
- public:
- Or(ExprPtr&& left, ExprPtr&& right) : left(std::move(left)), right(std::move(right)) {}
- private:
- virtual std::size_t MatchStr(const std::string& str, std::size_t begin, std::size_t maxL) override final;
- ExprPtr left, right;
- };
- class Seq : public Expr
- {
- public:
- Seq(ExprPtr&& head, ExprPtr&& tail) : head(std::move(head)), tail(std::move(tail)) {}
- private:
- virtual std::size_t MatchStr(const std::string& str, std::size_t begin, std::size_t maxL) override;
- ExprPtr head, tail;
- };
- class Star : public Expr
- {
- public:
- Star(ExprPtr&& expr) : expr(std::move(expr)) {}
- private:
- virtual std::size_t MatchStr(const std::string& str, std::size_t begin, std::size_t maxL) override final;
- ExprPtr expr;
- };
- class Parens : public Expr
- {
- public:
- struct Match
- {
- std::size_t b, len;
- };
- Parens(ExprPtr&& expr) : expr(std::move(expr)) {}
- const Match& GetMatchedPart() const { return matched; }
- private:
- virtual std::size_t MatchStr(const std::string& str, std::size_t begin, std::size_t maxL) override final;
- Match matched;
- ExprPtr expr;
- };
- class GroupItem
- {
- public:
- virtual ~GroupItem() = default;
- virtual bool Match(char c) = 0;
- };
- class Group : public Expr
- {
- public:
- using Items = std::vector<std::unique_ptr<GroupItem>>;
- Group(Group::Items && items, bool invert) :items(std::move(items)), inv(invert) {}
- private:
- virtual std::size_t MatchStr(const std::string& str, std::size_t begin, std::size_t maxL) override final;
- Items items;
- bool inv;
- };
- class Range : public GroupItem
- {
- public:
- Range(char b, char e) : b(b), e(e) { assert(b <= e); }
- virtual bool Match(char c)override final { return b <= c && c <= e; }
- private:
- char b, e;
- };
- class Letter : public Expr, public GroupItem
- {
- public:
- Letter(char c) : c(c) {}
- virtual bool Match(char ch) override final { return c == ch; }
- private:
- virtual std::size_t MatchStr(const std::string& str, std::size_t begin, std::size_t maxL) override final;
- char c;
- };
- class Dot : public Expr
- {
- private:
- virtual std::size_t MatchStr(const std::string& str, std::size_t begin, std::size_t maxL) override final;
- };
- /*************************************************************************************************/
- class RegexMatch
- {
- public:
- using Matches = std::vector<std::string>;
- RegexMatch(bool matched, Matches matches) : matched(matched), matches(std::move(matches)) {}
- bool matched;
- Matches matches;
- };
- struct Token
- {
- enum tokType
- {
- letter, group, paren, hyphen,
- star, caret, dot, or
- } type;
- char c;//value(even for non-letter for simplicity)
- std::size_t pos;//Position in input(for syntax errors)
- Token(tokType type, char c, std::size_t pos) :type(type), c(c), pos(pos) { }
- };
- using Tokens = std::queue<Token>;
- class Regex
- {
- public:
- //Builds regular expression
- Regex(const std::string& str) : expr(BuildExpr(str)) {}
- //Tries to match passed string
- RegexMatch Match(const std::string& str);
- private:
- Tokens Tokenize(const std::string& str);
- Token::tokType GetTokType(char c);
- ExprPtr BuildExpr(const std::string& str);
- //TODO Rename
- //Empty or parenthesis/bracket
- bool HasNext(const Tokens& tokens);
- ExprPtr ParseExpr(Tokens& tokens);
- ExprPtr ParseOrExpr(Tokens& tokens);
- ExprPtr ParseSeqExpr(Tokens& tokens);
- ExprPtr ParseStarExpr(Tokens& tokens);
- ExprPtr ParseSimpleExpr(Tokens& tokens);
- ExprPtr ParseParenExpr(Tokens& tokens);
- ExprPtr ParseGroupExpr(Tokens& tokens);
- //ORDER-DEPENDENT
- std::vector<Parens*> parens;
- ExprPtr expr;
- };
- /*************************************************************************************************/
- class Grep
- {
- public:
- static int Run(int argc, char** argv);
- static void PrintMatch(const RegexMatch& match);
- };
- /**********************************IMPLEMENTATION*************************************************/
- std::string SyntaxError::BuildMsg(const std::string& msg, std::size_t pos)
- {
- if (pos == nPos)
- return "Syntax Error: " + msg;
- else
- return "Syntax Error (" + std::to_string(pos) + "): " + msg;
- }
- std::size_t Expr::Match(const std::string& str, std::size_t begin, std::size_t maxL)
- {
- if (maxL < 0)return notMatched;
- return MatchStr(str, begin, maxL);
- }
- std::size_t Or::MatchStr(const std::string& str, std::size_t begin, std::size_t maxL)
- {
- //Match either one or second, return better match
- std::size_t leftL = left->Match(str, begin, maxL);
- std::size_t rightL = right->Match(str, begin, maxL);
- if (leftL == notMatched)
- return rightL;
- else if (rightL == notMatched)
- return leftL;
- else
- return std::max(leftL, rightL);
- }
- std::size_t Seq::MatchStr(const std::string& str, std::size_t begin, std::size_t maxL)
- {
- //Try all combinations
- bool matched = false;
- std::size_t bestMatchL = 0;
- for (std::size_t i = 0; i <= maxL; ++i)
- {
- std::size_t hMatchL = head->Match(str, begin, maxL - i);
- if (hMatchL == notMatched)
- return notMatched;
- std::size_t tMatchL = tail->Match(str, begin + hMatchL, maxL - hMatchL);
- if (tMatchL == notMatched)
- continue;
- if (hMatchL + tMatchL == maxL)//The best we can get
- return maxL;
- else
- {
- matched = true;
- bestMatchL = std::max(bestMatchL, hMatchL + tMatchL);
- }
- }
- return matched ? bestMatchL : notMatched;
- }
- std::size_t Star::MatchStr(const std::string& str, std::size_t begin, std::size_t maxL)
- {
- std::size_t matchL = 0;
- //Greedy match
- while (maxL - matchL > 0)
- {
- std::size_t tmpMatchL = expr->Match(str, begin + matchL, maxL - matchL);
- if (tmpMatchL == notMatched || tmpMatchL == 0)
- break;
- else
- matchL += tmpMatchL;
- }
- return matchL;
- }
- std::size_t Parens::MatchStr(const std::string& str, std::size_t begin, std::size_t maxL)
- {
- auto matchL = expr->Match(str, begin, maxL);
- matched.b = begin;
- matched.len = matchL == notMatched ? 0 : matchL;
- return matchL;
- }
- std::size_t Group::MatchStr(const std::string& str, std::size_t begin, std::size_t maxL)
- {
- char c = str[begin];
- for (auto& item : items)
- if (item->Match(c) ^ inv)
- return 1;
- return notMatched;
- }
- std::size_t Letter::MatchStr(const std::string& str, std::size_t begin, std::size_t maxL)
- {
- return str[begin] == c ? 1 : notMatched;
- }
- std::size_t Dot::MatchStr(const std::string& str, std::size_t begin, std::size_t maxL)
- {
- return maxL == 0 ? notMatched : 1;
- }
- RegexMatch Regex::Match(const std::string& str)
- {
- assert(str.length() != Expr::notMatched);
- bool matched = expr->Match(str, 0, str.length()) == str.length();
- RegexMatch::Matches matches;
- std::transform(parens.rbegin(), parens.rend(), std::back_inserter(matches), [&str](auto &&p)
- { auto m = p->GetMatchedPart(); return str.substr(m.b, m.len); });
- return RegexMatch(matched, std::move(matches));
- }
- Tokens Regex::Tokenize(const std::string& str)
- {
- Tokens tokens;
- bool hasAParen = str.find('(') != std::string::npos;
- if (!hasAParen)
- tokens.emplace(Token::paren, '(', SyntaxError::nPos);//Won't generate an error
- for (std::size_t i = 0; i < str.length(); ++i)
- {
- Token::tokType type;
- if (str[i] == '\\')//Escaped char
- if (++i == str.length())
- throw SyntaxError("Unescaped '\\'", i - 1);
- else
- if (GetTokType(str[i]) == Token::letter && str[i] != '\\')
- throw SyntaxError(std::string("Unknown escaped character '") + str[i] +
- "'. Only special characters may be escaped.", i);
- else
- type = Token::letter;
- else
- type = GetTokType(str[i]);
- tokens.emplace(type, str[i], i);
- }
- if (!hasAParen)
- tokens.emplace(Token::paren, ')', SyntaxError::nPos);//Won't generate an error
- return tokens;
- }
- Token::tokType Regex::GetTokType(char c)
- {
- switch (c)
- {
- case '|':
- return Token:: or ;
- case '.':
- return Token::dot;
- case '^':
- return Token::caret;
- case '*':
- return Token::star;
- case '-':
- return Token::hyphen;
- case '(':
- case ')':
- return Token::paren;
- case '[':
- case ']':
- return Token::group;
- default:
- return Token::letter;
- }
- }
- ExprPtr Regex::BuildExpr(const std::string& str)
- {
- auto tokens = Tokenize(str);
- auto e = ParseExpr(tokens);
- if (!tokens.empty())
- throw SyntaxError("Unknown syntax", tokens.front().pos);
- else
- return e;
- }
- bool Regex::HasNext(const Tokens& tokens)
- {
- if (tokens.empty())
- return false;
- const auto& token = tokens.front();
- if (token.type == Token::group && token.c == ']')
- return false;
- if (token.type == Token::paren && token.c == ')')
- return false;
- return true;
- }
- ExprPtr Regex::ParseExpr(Tokens& tokens)
- {
- return ParseOrExpr(tokens);
- }
- ExprPtr Regex::ParseOrExpr(Tokens& tokens)
- {
- auto seqExpr = ParseSeqExpr(tokens);
- if (HasNext(tokens))
- {
- const auto& token = tokens.front();
- if (token.type == Token:: or )
- {
- tokens.pop();
- return std::make_unique<Or>(std::move(seqExpr), ParseOrExpr(tokens));
- }
- }
- return seqExpr;
- }
- ExprPtr Regex::ParseSeqExpr(Tokens& tokens)
- {
- auto starExpr = ParseStarExpr(tokens);
- //Or has lower precedence
- if (HasNext(tokens) && tokens.front().type != Token:: or )
- return std::make_unique<Seq>(std::move(starExpr), ParseSeqExpr(tokens));
- else
- return starExpr;
- }
- ExprPtr Regex::ParseStarExpr(Tokens& tokens)
- {
- auto simpleExpr = ParseSimpleExpr(tokens);
- if (HasNext(tokens))
- {
- const auto& token = tokens.front();
- if (token.type == Token::star)
- {
- tokens.pop();
- return std::make_unique<Star>(std::move(simpleExpr));
- }
- }
- return simpleExpr;
- }
- ExprPtr Regex::ParseSimpleExpr(Tokens& tokens)
- {
- if (tokens.empty())
- throw SyntaxError("Unexpected end of the expression.");
- else
- {
- Token token = tokens.front();
- tokens.pop();
- switch (token.type)
- {
- case Token::letter:
- return std::make_unique<Letter>(token.c);
- case Token::dot:
- return std::make_unique<Dot>();
- case Token::paren:
- if (token.c != '(')
- {
- assert(token.c == ')');
- throw SyntaxError("Unmatched/Unescaped right parenthesis.", token.pos);
- }
- else
- return ParseParenExpr(tokens);
- case Token::group:
- if (token.c != '[')
- {
- assert(token.c == ']');
- throw SyntaxError("Unmatched/Unescaped right bracket.", token.pos);
- }
- else
- return ParseGroupExpr(tokens);
- default:
- throw SyntaxError(std::string("Unescaped character '") + token.c + '\'', token.pos);
- }
- }
- }
- ExprPtr Regex::ParseParenExpr(Tokens& tokens)
- {
- auto e = ParseExpr(tokens);
- if (HasNext(tokens))
- throw SyntaxError("Missing a right parenthesis.", tokens.front().pos);
- else if (!tokens.empty())
- {
- assert(tokens.front().type == Token::paren && tokens.front().c == ')');
- tokens.pop();
- }
- auto paren = std::make_unique<Parens>(std::move(e));
- //Remember it
- parens.push_back(paren.get());
- return paren;
- }
- ExprPtr Regex::ParseGroupExpr(Tokens& tokens)
- {
- if (tokens.empty())
- throw SyntaxError("Unexpected end of the expression, expected ']'");
- bool invert = tokens.front().type == Token::caret;
- if (invert)tokens.pop();
- Group::Items items;
- while (HasNext(tokens))
- {
- Token token = tokens.front();
- tokens.pop();
- switch (token.type)
- {
- case Token::letter:
- if (tokens.size() >= 2 && tokens.front().type == Token::hyphen)
- {
- tokens.pop();//Hyphen
- Token endToken = tokens.front();
- tokens.pop();
- if (endToken.type != Token::letter)
- throw SyntaxError("Expected a letter for the range", endToken.pos + 2);
- else if (token.c > endToken.c)
- throw SyntaxError("End letter must have a higher or equal ASCII value than the start letter", endToken.pos);
- else
- items.push_back(std::make_unique<Range>(token.c, endToken.c));
- }
- else
- items.push_back(std::make_unique<Letter>(token.c));
- break;
- default:
- throw SyntaxError(std::string("Unescaped special character '") + token.c + '\'', token.pos);
- break;
- }
- }
- if (!tokens.empty() && tokens.front().c == ']')
- {
- tokens.pop();
- return std::make_unique<Group>(std::move(items), invert);
- }
- else if (tokens.empty())
- throw SyntaxError("Unexpected end of the expression, expected ']'");
- else
- throw SyntaxError("Missing ']'", tokens.front().pos);
- }
- int Grep::Run(int argc, char** argv)
- {
- #ifdef USER
- std::string line;
- do
- {
- try
- {
- std::cout << "Enter regex please:\n";
- std::getline(std::cin, line);
- Regex regex(line);
- std::cout << "OK. Enter inputs:\n";
- while (!std::cin.bad())
- {
- std::getline(std::cin, line);
- if (line == "q")break;
- PrintMatch(regex.Match(line));
- }
- }
- catch (const Error& e)
- {
- std::cout << e.what() << '\n';
- }
- catch (const std::exception& e)
- {
- std::cout << "UNKNOWN ERROR: " << e.what();
- }
- } while (!line.empty());
- return 0;
- #else
- try
- {
- if (argc != 2)//program's name + regexp
- throw Error("Program expects only one argument - a regular expression.");
- else
- {
- Regex regex(argv[1]);
- std::string line;
- while (!std::cin.bad())
- {
- std::getline(std::cin, line);
- std::cout << regex.Match(line);
- }
- return 0;
- }
- std::string line;
- }
- catch (const Error& e)
- {
- std::cout << e.what() << '\n';
- return 1;
- }
- catch (const std::exception& e)
- {
- std::cout << "UNKNOWN ERROR: " << e.what();
- return 1;
- }
- #endif
- }
- void Grep::PrintMatch(const RegexMatch& match)
- {
- #ifdef USER
- std::cout << std::boolalpha << match.matched << '\n';
- #endif
- if (match.matched)
- for (const auto& m : match.matches)
- std::cout << m << '\n';
- else
- std::cout << '\n';
- }
- /**********************************MAIN***********************************************************/
- int main(int argc, char** argv)
- {
- Grep::Run(argc, argv);
- //TODO remove pause, return from grep;
- system("PAUSE");
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement