Advertisement
Guest User

Untitled

a guest
Dec 12th, 2017
64
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 14.90 KB | None | 0 0
  1. #include <iostream>
  2. #include <stdexcept>
  3. #include <string>
  4. #include <utility>
  5. #include <memory>
  6. #include <vector>
  7. #include <queue>
  8. #include <cassert>
  9.  
  10. #define USER
  11.  
  12. /*************************************************************************************************/
  13.  
  14. class Error : public std::runtime_error
  15. {
  16. public:
  17. using std::runtime_error::runtime_error;
  18. };
  19.  
  20. class SyntaxError : public Error
  21. {
  22. public:
  23. constexpr const static std::size_t nPos = std::numeric_limits<std::size_t>::max();
  24. SyntaxError(const std::string& msg, std::size_t tokPosInInput = nPos) :
  25. Error(BuildMsg(msg, tokPosInInput)) {}
  26. private:
  27. std::string BuildMsg(const std::string& msg, std::size_t pos);
  28. };
  29.  
  30. /*************************************************************************************************/
  31.  
  32. class Expr
  33. {
  34. public:
  35. constexpr static const std::size_t notMatched = std::numeric_limits<std::size_t>::max();
  36. virtual ~Expr() = default;
  37. //Tries to match passed string with regular expression
  38. //Returns length of matched part or 'notMatched'
  39. std::size_t Match(const std::string& str, std::size_t begin, std::size_t maxL);
  40. private:
  41. //Implements the matching
  42. virtual std::size_t MatchStr(const std::string& str, std::size_t begin, std::size_t end) = 0;
  43. };
  44. using ExprPtr = std::unique_ptr<Expr>;
  45.  
  46. class Or : public Expr
  47. {
  48. public:
  49. Or(ExprPtr&& left, ExprPtr&& right) : left(std::move(left)), right(std::move(right)) {}
  50. private:
  51. virtual std::size_t MatchStr(const std::string& str, std::size_t begin, std::size_t maxL) override final;
  52. ExprPtr left, right;
  53. };
  54.  
  55. class Seq : public Expr
  56. {
  57. public:
  58. Seq(ExprPtr&& head, ExprPtr&& tail) : head(std::move(head)), tail(std::move(tail)) {}
  59. private:
  60. virtual std::size_t MatchStr(const std::string& str, std::size_t begin, std::size_t maxL) override;
  61. ExprPtr head, tail;
  62. };
  63.  
  64. class Star : public Expr
  65. {
  66. public:
  67. Star(ExprPtr&& expr) : expr(std::move(expr)) {}
  68. private:
  69. virtual std::size_t MatchStr(const std::string& str, std::size_t begin, std::size_t maxL) override final;
  70. ExprPtr expr;
  71. };
  72.  
  73. class Parens : public Expr
  74. {
  75. public:
  76. struct Match
  77. {
  78. std::size_t b, len;
  79. };
  80. Parens(ExprPtr&& expr) : expr(std::move(expr)) {}
  81. const Match& GetMatchedPart() const { return matched; }
  82. private:
  83. virtual std::size_t MatchStr(const std::string& str, std::size_t begin, std::size_t maxL) override final;
  84. Match matched;
  85. ExprPtr expr;
  86. };
  87.  
  88. class GroupItem
  89. {
  90. public:
  91. virtual ~GroupItem() = default;
  92. virtual bool Match(char c) = 0;
  93. };
  94.  
  95. class Group : public Expr
  96. {
  97. public:
  98. using Items = std::vector<std::unique_ptr<GroupItem>>;
  99. Group(Group::Items && items, bool invert) :items(std::move(items)), inv(invert) {}
  100. private:
  101. virtual std::size_t MatchStr(const std::string& str, std::size_t begin, std::size_t maxL) override final;
  102. Items items;
  103. bool inv;
  104. };
  105.  
  106. class Range : public GroupItem
  107. {
  108. public:
  109. Range(char b, char e) : b(b), e(e) { assert(b <= e); }
  110. virtual bool Match(char c)override final { return b <= c && c <= e; }
  111. private:
  112. char b, e;
  113. };
  114.  
  115. class Letter : public Expr, public GroupItem
  116. {
  117. public:
  118. Letter(char c) : c(c) {}
  119. virtual bool Match(char ch) override final { return c == ch; }
  120. private:
  121. virtual std::size_t MatchStr(const std::string& str, std::size_t begin, std::size_t maxL) override final;
  122. char c;
  123. };
  124.  
  125. class Dot : public Expr
  126. {
  127. private:
  128. virtual std::size_t MatchStr(const std::string& str, std::size_t begin, std::size_t maxL) override final;
  129. };
  130.  
  131. /*************************************************************************************************/
  132.  
  133. class RegexMatch
  134. {
  135. public:
  136. using Matches = std::vector<std::string>;
  137. RegexMatch(bool matched, Matches matches) : matched(matched), matches(std::move(matches)) {}
  138. bool matched;
  139. Matches matches;
  140. };
  141. struct Token
  142. {
  143. enum tokType
  144. {
  145. letter, group, paren, hyphen,
  146. star, caret, dot, or
  147. } type;
  148. char c;//value(even for non-letter for simplicity)
  149. std::size_t pos;//Position in input(for syntax errors)
  150. Token(tokType type, char c, std::size_t pos) :type(type), c(c), pos(pos) { }
  151. };
  152. using Tokens = std::queue<Token>;
  153. class Regex
  154. {
  155. public:
  156. //Builds regular expression
  157. Regex(const std::string& str) : expr(BuildExpr(str)) {}
  158. //Tries to match passed string
  159. RegexMatch Match(const std::string& str);
  160. private:
  161.  
  162.  
  163. Tokens Tokenize(const std::string& str);
  164. Token::tokType GetTokType(char c);
  165. ExprPtr BuildExpr(const std::string& str);
  166. //TODO Rename
  167. //Empty or parenthesis/bracket
  168. bool HasNext(const Tokens& tokens);
  169. ExprPtr ParseExpr(Tokens& tokens);
  170. ExprPtr ParseOrExpr(Tokens& tokens);
  171. ExprPtr ParseSeqExpr(Tokens& tokens);
  172. ExprPtr ParseStarExpr(Tokens& tokens);
  173. ExprPtr ParseSimpleExpr(Tokens& tokens);
  174. ExprPtr ParseParenExpr(Tokens& tokens);
  175. ExprPtr ParseGroupExpr(Tokens& tokens);
  176. //ORDER-DEPENDENT
  177. std::vector<Parens*> parens;
  178. ExprPtr expr;
  179. };
  180.  
  181. /*************************************************************************************************/
  182.  
  183. class Grep
  184. {
  185. public:
  186. static int Run(int argc, char** argv);
  187. static void PrintMatch(const RegexMatch& match);
  188. };
  189.  
  190.  
  191. /**********************************IMPLEMENTATION*************************************************/
  192.  
  193. std::string SyntaxError::BuildMsg(const std::string& msg, std::size_t pos)
  194. {
  195. if (pos == nPos)
  196. return "Syntax Error: " + msg;
  197. else
  198. return "Syntax Error (" + std::to_string(pos) + "): " + msg;
  199. }
  200.  
  201. std::size_t Expr::Match(const std::string& str, std::size_t begin, std::size_t maxL)
  202. {
  203. if (maxL < 0)return notMatched;
  204. return MatchStr(str, begin, maxL);
  205. }
  206.  
  207. std::size_t Or::MatchStr(const std::string& str, std::size_t begin, std::size_t maxL)
  208. {
  209. //Match either one or second, return better match
  210. std::size_t leftL = left->Match(str, begin, maxL);
  211. std::size_t rightL = right->Match(str, begin, maxL);
  212.  
  213. if (leftL == notMatched)
  214. return rightL;
  215. else if (rightL == notMatched)
  216. return leftL;
  217. else
  218. return std::max(leftL, rightL);
  219. }
  220.  
  221. std::size_t Seq::MatchStr(const std::string& str, std::size_t begin, std::size_t maxL)
  222. {
  223. //Try all combinations
  224. bool matched = false;
  225. std::size_t bestMatchL = 0;
  226. for (std::size_t i = 0; i <= maxL; ++i)
  227. {
  228. std::size_t hMatchL = head->Match(str, begin, maxL - i);
  229. if (hMatchL == notMatched)
  230. return notMatched;
  231. std::size_t tMatchL = tail->Match(str, begin + hMatchL, maxL - hMatchL);
  232. if (tMatchL == notMatched)
  233. continue;
  234.  
  235. if (hMatchL + tMatchL == maxL)//The best we can get
  236. return maxL;
  237. else
  238. {
  239. matched = true;
  240. bestMatchL = std::max(bestMatchL, hMatchL + tMatchL);
  241. }
  242. }
  243. return matched ? bestMatchL : notMatched;
  244. }
  245.  
  246. std::size_t Star::MatchStr(const std::string& str, std::size_t begin, std::size_t maxL)
  247. {
  248. std::size_t matchL = 0;
  249. //Greedy match
  250. while (maxL - matchL > 0)
  251. {
  252. std::size_t tmpMatchL = expr->Match(str, begin + matchL, maxL - matchL);
  253. if (tmpMatchL == notMatched || tmpMatchL == 0)
  254. break;
  255. else
  256. matchL += tmpMatchL;
  257. }
  258. return matchL;
  259. }
  260.  
  261. std::size_t Parens::MatchStr(const std::string& str, std::size_t begin, std::size_t maxL)
  262. {
  263. auto matchL = expr->Match(str, begin, maxL);
  264. matched.b = begin;
  265. matched.len = matchL == notMatched ? 0 : matchL;
  266. return matchL;
  267. }
  268.  
  269. std::size_t Group::MatchStr(const std::string& str, std::size_t begin, std::size_t maxL)
  270. {
  271. char c = str[begin];
  272. for (auto& item : items)
  273. if (item->Match(c) ^ inv)
  274. return 1;
  275. return notMatched;
  276. }
  277.  
  278. std::size_t Letter::MatchStr(const std::string& str, std::size_t begin, std::size_t maxL)
  279. {
  280. return str[begin] == c ? 1 : notMatched;
  281. }
  282.  
  283. std::size_t Dot::MatchStr(const std::string& str, std::size_t begin, std::size_t maxL)
  284. {
  285. return maxL == 0 ? notMatched : 1;
  286. }
  287.  
  288. RegexMatch Regex::Match(const std::string& str)
  289. {
  290. assert(str.length() != Expr::notMatched);
  291. bool matched = expr->Match(str, 0, str.length()) == str.length();
  292. RegexMatch::Matches matches;
  293. std::transform(parens.rbegin(), parens.rend(), std::back_inserter(matches), [&str](auto &&p)
  294. { auto m = p->GetMatchedPart(); return str.substr(m.b, m.len); });
  295.  
  296. return RegexMatch(matched, std::move(matches));
  297. }
  298.  
  299. Tokens Regex::Tokenize(const std::string& str)
  300. {
  301. Tokens tokens;
  302. bool hasAParen = str.find('(') != std::string::npos;
  303. if (!hasAParen)
  304. tokens.emplace(Token::paren, '(', SyntaxError::nPos);//Won't generate an error
  305. for (std::size_t i = 0; i < str.length(); ++i)
  306. {
  307. Token::tokType type;
  308. if (str[i] == '\\')//Escaped char
  309. if (++i == str.length())
  310. throw SyntaxError("Unescaped '\\'", i - 1);
  311. else
  312. if (GetTokType(str[i]) == Token::letter && str[i] != '\\')
  313. throw SyntaxError(std::string("Unknown escaped character '") + str[i] +
  314. "'. Only special characters may be escaped.", i);
  315. else
  316. type = Token::letter;
  317. else
  318. type = GetTokType(str[i]);
  319. tokens.emplace(type, str[i], i);
  320. }
  321. if (!hasAParen)
  322. tokens.emplace(Token::paren, ')', SyntaxError::nPos);//Won't generate an error
  323. return tokens;
  324. }
  325.  
  326. Token::tokType Regex::GetTokType(char c)
  327. {
  328. switch (c)
  329. {
  330. case '|':
  331. return Token:: or ;
  332. case '.':
  333. return Token::dot;
  334. case '^':
  335. return Token::caret;
  336. case '*':
  337. return Token::star;
  338. case '-':
  339. return Token::hyphen;
  340. case '(':
  341. case ')':
  342. return Token::paren;
  343. case '[':
  344. case ']':
  345. return Token::group;
  346. default:
  347. return Token::letter;
  348. }
  349. }
  350.  
  351. ExprPtr Regex::BuildExpr(const std::string& str)
  352. {
  353. auto tokens = Tokenize(str);
  354.  
  355. auto e = ParseExpr(tokens);
  356. if (!tokens.empty())
  357. throw SyntaxError("Unknown syntax", tokens.front().pos);
  358. else
  359. return e;
  360. }
  361.  
  362. bool Regex::HasNext(const Tokens& tokens)
  363. {
  364. if (tokens.empty())
  365. return false;
  366.  
  367. const auto& token = tokens.front();
  368. if (token.type == Token::group && token.c == ']')
  369. return false;
  370. if (token.type == Token::paren && token.c == ')')
  371. return false;
  372.  
  373. return true;
  374. }
  375.  
  376. ExprPtr Regex::ParseExpr(Tokens& tokens)
  377. {
  378. return ParseOrExpr(tokens);
  379. }
  380.  
  381. ExprPtr Regex::ParseOrExpr(Tokens& tokens)
  382. {
  383. auto seqExpr = ParseSeqExpr(tokens);
  384. if (HasNext(tokens))
  385. {
  386. const auto& token = tokens.front();
  387. if (token.type == Token:: or )
  388. {
  389. tokens.pop();
  390. return std::make_unique<Or>(std::move(seqExpr), ParseOrExpr(tokens));
  391. }
  392. }
  393. return seqExpr;
  394. }
  395.  
  396. ExprPtr Regex::ParseSeqExpr(Tokens& tokens)
  397. {
  398. auto starExpr = ParseStarExpr(tokens);
  399. //Or has lower precedence
  400. if (HasNext(tokens) && tokens.front().type != Token:: or )
  401. return std::make_unique<Seq>(std::move(starExpr), ParseSeqExpr(tokens));
  402. else
  403. return starExpr;
  404. }
  405.  
  406. ExprPtr Regex::ParseStarExpr(Tokens& tokens)
  407. {
  408. auto simpleExpr = ParseSimpleExpr(tokens);
  409. if (HasNext(tokens))
  410. {
  411. const auto& token = tokens.front();
  412. if (token.type == Token::star)
  413. {
  414. tokens.pop();
  415. return std::make_unique<Star>(std::move(simpleExpr));
  416. }
  417. }
  418. return simpleExpr;
  419. }
  420.  
  421. ExprPtr Regex::ParseSimpleExpr(Tokens& tokens)
  422. {
  423. if (tokens.empty())
  424. throw SyntaxError("Unexpected end of the expression.");
  425. else
  426. {
  427. Token token = tokens.front();
  428. tokens.pop();
  429. switch (token.type)
  430. {
  431. case Token::letter:
  432. return std::make_unique<Letter>(token.c);
  433. case Token::dot:
  434. return std::make_unique<Dot>();
  435. case Token::paren:
  436. if (token.c != '(')
  437. {
  438. assert(token.c == ')');
  439. throw SyntaxError("Unmatched/Unescaped right parenthesis.", token.pos);
  440. }
  441. else
  442. return ParseParenExpr(tokens);
  443. case Token::group:
  444. if (token.c != '[')
  445. {
  446. assert(token.c == ']');
  447. throw SyntaxError("Unmatched/Unescaped right bracket.", token.pos);
  448. }
  449. else
  450. return ParseGroupExpr(tokens);
  451. default:
  452. throw SyntaxError(std::string("Unescaped character '") + token.c + '\'', token.pos);
  453. }
  454. }
  455. }
  456. ExprPtr Regex::ParseParenExpr(Tokens& tokens)
  457. {
  458. auto e = ParseExpr(tokens);
  459. if (HasNext(tokens))
  460. throw SyntaxError("Missing a right parenthesis.", tokens.front().pos);
  461. else if (!tokens.empty())
  462. {
  463. assert(tokens.front().type == Token::paren && tokens.front().c == ')');
  464. tokens.pop();
  465. }
  466. auto paren = std::make_unique<Parens>(std::move(e));
  467. //Remember it
  468. parens.push_back(paren.get());
  469. return paren;
  470. }
  471.  
  472. ExprPtr Regex::ParseGroupExpr(Tokens& tokens)
  473. {
  474. if (tokens.empty())
  475. throw SyntaxError("Unexpected end of the expression, expected ']'");
  476.  
  477. bool invert = tokens.front().type == Token::caret;
  478. if (invert)tokens.pop();
  479. Group::Items items;
  480. while (HasNext(tokens))
  481. {
  482. Token token = tokens.front();
  483. tokens.pop();
  484. switch (token.type)
  485. {
  486. case Token::letter:
  487. if (tokens.size() >= 2 && tokens.front().type == Token::hyphen)
  488. {
  489. tokens.pop();//Hyphen
  490. Token endToken = tokens.front();
  491. tokens.pop();
  492. if (endToken.type != Token::letter)
  493. throw SyntaxError("Expected a letter for the range", endToken.pos + 2);
  494. else if (token.c > endToken.c)
  495. throw SyntaxError("End letter must have a higher or equal ASCII value than the start letter", endToken.pos);
  496. else
  497. items.push_back(std::make_unique<Range>(token.c, endToken.c));
  498. }
  499. else
  500. items.push_back(std::make_unique<Letter>(token.c));
  501. break;
  502. default:
  503. throw SyntaxError(std::string("Unescaped special character '") + token.c + '\'', token.pos);
  504. break;
  505. }
  506. }
  507. if (!tokens.empty() && tokens.front().c == ']')
  508. {
  509. tokens.pop();
  510. return std::make_unique<Group>(std::move(items), invert);
  511. }
  512. else if (tokens.empty())
  513. throw SyntaxError("Unexpected end of the expression, expected ']'");
  514. else
  515. throw SyntaxError("Missing ']'", tokens.front().pos);
  516. }
  517.  
  518. int Grep::Run(int argc, char** argv)
  519. {
  520. #ifdef USER
  521. std::string line;
  522. do
  523. {
  524. try
  525. {
  526. std::cout << "Enter regex please:\n";
  527. std::getline(std::cin, line);
  528. Regex regex(line);
  529. std::cout << "OK. Enter inputs:\n";
  530. while (!std::cin.bad())
  531. {
  532. std::getline(std::cin, line);
  533. if (line == "q")break;
  534. PrintMatch(regex.Match(line));
  535. }
  536. }
  537. catch (const Error& e)
  538. {
  539. std::cout << e.what() << '\n';
  540. }
  541. catch (const std::exception& e)
  542. {
  543. std::cout << "UNKNOWN ERROR: " << e.what();
  544. }
  545. } while (!line.empty());
  546. return 0;
  547. #else
  548. try
  549. {
  550. if (argc != 2)//program's name + regexp
  551. throw Error("Program expects only one argument - a regular expression.");
  552. else
  553. {
  554. Regex regex(argv[1]);
  555. std::string line;
  556. while (!std::cin.bad())
  557. {
  558. std::getline(std::cin, line);
  559. std::cout << regex.Match(line);
  560. }
  561. return 0;
  562. }
  563. std::string line;
  564. }
  565. catch (const Error& e)
  566. {
  567. std::cout << e.what() << '\n';
  568. return 1;
  569. }
  570. catch (const std::exception& e)
  571. {
  572. std::cout << "UNKNOWN ERROR: " << e.what();
  573. return 1;
  574. }
  575. #endif
  576. }
  577. void Grep::PrintMatch(const RegexMatch& match)
  578. {
  579. #ifdef USER
  580. std::cout << std::boolalpha << match.matched << '\n';
  581. #endif
  582. if (match.matched)
  583. for (const auto& m : match.matches)
  584. std::cout << m << '\n';
  585. else
  586. std::cout << '\n';
  587. }
  588. /**********************************MAIN***********************************************************/
  589.  
  590.  
  591. int main(int argc, char** argv)
  592. {
  593. Grep::Run(argc, argv);
  594. //TODO remove pause, return from grep;
  595. system("PAUSE");
  596. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement