SHARE
TWEET

Untitled

a guest Jun 19th, 2019 55 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #ifndef CTLEX_HPP
  2. #define CTLEX_HPP
  3.  
  4. #include <ctre.hpp>
  5. #include <string_view>
  6. #include <algorithm>
  7. #include <stdexcept>
  8.  
  9. namespace ctlex
  10. {
  11.  
  12. class lexer_error : public std::runtime_error
  13. {
  14. public:
  15.     using std::runtime_error::runtime_error;
  16.  
  17.     lexer_error(const std::string_view & sv) : lexer_error(std::string(sv.begin(), sv.end())) {}
  18. };
  19.  
  20. template<std::size_t SizeId, std::size_t SizeRegex>
  21. struct token
  22. {
  23.     ctll::fixed_string<SizeId> id;
  24.     ctll::fixed_string<SizeRegex> regex;
  25.  
  26.     bool ignore = false;
  27.  
  28.     constexpr std::u32string_view id_view() const {return {id.begin(), id.size()};}
  29.  
  30.  
  31.     constexpr token(const token & tk) noexcept : id(tk.id), regex(tk.regex), ignore(tk.ignore) {}
  32.  
  33.     constexpr token& operator=(const token & tk) noexcept
  34.     {
  35.         id    = tk.id;
  36.         regex = tk.regex;
  37.         ignore= tk.ignore;
  38.         return *this;
  39.     }
  40.  
  41.     template<typename IdChar, typename RegexChar>
  42.     constexpr token(const IdChar (&id)[SizeId], const RegexChar (&regex)[SizeRegex]) noexcept : regex(regex), id(id)
  43.     {
  44.     }
  45.  
  46.     template<typename IdChar, typename RegexChar>
  47.     constexpr token(const IdChar (&id)[SizeId], const RegexChar (&regex)[SizeRegex], decltype(std::ignore)) noexcept : regex(regex), id(id), ignore(true)
  48.     {
  49.     }
  50.  
  51.     template<std::size_t RhsSizeId, std::size_t RhsSizeRegex>
  52.     constexpr bool operator<(const token<RhsSizeId, RhsSizeRegex> & rhs) {return this->id < rhs.id;}
  53.  
  54.     template<std::size_t RhsSizeId, std::size_t RhsSizeRegex>
  55.     constexpr bool operator==(const token<RhsSizeId, RhsSizeRegex> & rhs) {return this->id == rhs.id;}
  56.  
  57.     template<std::size_t RhsSizeId, std::size_t RhsSizeRegex>
  58.     constexpr bool operator>(const token<RhsSizeId, RhsSizeRegex> & rhs) {return this->id  > id;}
  59. };
  60.  
  61. struct invalid_token_tag{};
  62.  
  63. template<typename char_type>
  64. struct token_result
  65. {
  66.     std::u32string_view id;
  67.     std::basic_string_view<char_type> value;
  68.  
  69.  
  70.  
  71.     template<ctll::fixed_string Id> constexpr bool is()                                    const {return id == std::u32string_view(Id.begin(), Id.size());}
  72.     template<std::size_t Size>      constexpr bool is(const ctll::fixed_string<Size> & id) const {return this->id ==  std::u32string_view(id.begin(), id.size());}
  73.  
  74.     constexpr token_result() noexcept {};
  75.     constexpr token_result(std::u32string_view id, std::basic_string_view<char_type> value) noexcept : id(id), value(value)
  76.     {
  77.     }
  78.  
  79.     template<std::size_t RhsSizeId, std::size_t RhsSizeRegex>
  80.     constexpr bool operator<(const token<RhsSizeId, RhsSizeRegex> & rhs) {return this->value.begin() < rhs.value.begin();}
  81.  
  82.     template<std::size_t RhsSizeId, std::size_t RhsSizeRegex>
  83.     constexpr bool operator==(const token<RhsSizeId, RhsSizeRegex> & rhs) {return this->value.begin() == rhs.value.begin();}
  84.  
  85.     template<std::size_t RhsSizeId, std::size_t RhsSizeRegex>
  86.     constexpr bool operator>(const token<RhsSizeId, RhsSizeRegex> & rhs) {return this->value.begin()  > value.begin();}
  87. };
  88.  
  89. template<token ... Tokens>
  90. constexpr inline auto build_regex()
  91. {
  92.     //the way we build it: (regex1)|(regex2)|(regex3) -> meaning SumSize + SizeOfElem * 2 + (SizeOfElem - 1) + '$'
  93.     constexpr auto SumSize = ( 0 + ... + Tokens.regex.size());
  94.     constexpr auto SizeOfElem = sizeof...(Tokens);
  95.  
  96.     char32_t res[SumSize + (SizeOfElem * 4) - 1] = {};
  97.  
  98.     constexpr std::array<std::u32string_view, SizeOfElem> input = {  std::u32string_view(Tokens.regex.begin(), Tokens.regex.size())... };
  99.  
  100.     auto idx = 0u;
  101.     res[idx++] = U'^';
  102.     res[idx++] = U'(';
  103.  
  104.     for (auto & in : input)
  105.     {
  106.         //copy should be constexpr, isn't yet though...sooo, second loop aye
  107.         //std::copy(std::begin(in), std::end(in), &res[idx]);
  108.         for (auto idx_ = 0u; idx_ < in.size(); idx_ ++)
  109.             res[idx++] = in[idx_];
  110.  
  111.         if (&in == (input.end() - 1))
  112.             break;
  113.  
  114.         res[idx++] = U')';
  115.         res[idx++] = U'|';
  116.         res[idx++] = U'^';
  117.         res[idx++] = U'(';
  118.     }
  119.  
  120.     res[idx] = U')';
  121.  
  122.     return ctll::fixed_string<SumSize + (SizeOfElem * 4) - 1>(res);
  123. }
  124.  
  125. template<auto... Tokens, typename Iterator, typename... Captures>
  126. constexpr auto get_resulting_token(const ctre::regex_results<Iterator, Captures...> & res)
  127.   -> std::optional<std::pair<token_result<typename std::iterator_traits<Iterator>::value_type>, bool>>
  128. {
  129.     static_assert(sizeof...(Tokens) == sizeof...(Captures));
  130.     using char_type = typename std::iterator_traits<Iterator>::value_type;
  131.     constexpr auto transform_cap = [](auto && val) constexpr -> std::optional<std::basic_string_view<char_type>>
  132.             {
  133.                 if (val)
  134.                     return val.to_view();
  135.                 else
  136.                     return std::nullopt;
  137.             };
  138.  
  139.     std::array<std::optional<std::basic_string_view<char_type>>, sizeof...(Captures)> caps = {transform_cap(res.template get<Captures::template storage<Iterator>::get_id()>())...};
  140.     using char_type = typename std::iterator_traits<Iterator>::value_type;
  141.  
  142.     std::array<std::pair<std::u32string_view, bool>, sizeof...(Tokens)> toks = {std::make_pair(Tokens.id_view(), Tokens.ignore)...};
  143.     auto idx = 0u;
  144.     for (; idx < sizeof...(Captures); idx++)
  145.         if (caps[idx])
  146.             break;
  147.  
  148.     if (idx == sizeof...(Captures))
  149.         return std::nullopt;
  150.  
  151.     auto val = *caps[idx];
  152.  
  153.     return std::make_pair(token_result<char_type>(toks[idx].first, val), toks[idx].second);
  154. }
  155.  
  156. template<auto Searcher, auto... Tokens, typename Iterator>
  157. constexpr auto get_next_token(Iterator itr, Iterator end)
  158. {
  159.     using char_type = typename std::iterator_traits<Iterator>::value_type;
  160.     auto match = Searcher(itr, end);
  161.     auto res = get_resulting_token<Tokens...>(match);
  162.     return res;
  163. }
  164.  
  165. template<typename Iterator, auto Searcher, auto... Tokens>
  166. struct token_iterator
  167. {
  168.     constexpr token_iterator(Iterator begin, Iterator end) : _itr(begin), _end(end)
  169.     {
  170.         _seek_next();
  171.     }
  172.  
  173.     constexpr token_iterator(const token_iterator &) = default;
  174.     constexpr token_iterator &operator=(const token_iterator &) = default;
  175.  
  176.  
  177.     using char_type = typename std::iterator_traits<Iterator>::value_type;
  178.     using value_type = token_result<char_type>;
  179.     using reference = value_type &;
  180.     using pointer   = value_type *;
  181.     using difference_type = std::size_t;
  182.     using iterator_category = std::forward_iterator_tag;
  183.  
  184.     constexpr reference operator*()       {return *_current;}
  185.     constexpr reference operator*() const {return *_current;}
  186.     constexpr auto operator->()       {return _current;}
  187.     constexpr auto operator->() const {return _current;}
  188.  
  189.     constexpr operator bool() const {return _current.has_value();}
  190.     constexpr bool valid()    const {return _current.has_value();}
  191.  
  192.     constexpr auto current_position() {return _itr;}
  193.     constexpr auto end_position() {return _end;}
  194.  
  195.     constexpr token_iterator& operator++()
  196.     {
  197.         _itr += _current ? _current->value.size() : 1u;
  198.         _seek_next();
  199.         return *this;
  200.  
  201.     }
  202.     constexpr token_iterator operator++(int)
  203.     {
  204.         const auto res = *this;
  205.         _itr += _current ? _current->value.size() : 1u;
  206.         _seek_next();
  207.         return res;
  208.     }
  209.  
  210.     constexpr bool eoi() const {return _itr == _end;}
  211.  
  212.     constexpr bool operator< (const token_iterator& rhs) {return this->_itr <  rhs._itr;}
  213.     constexpr bool operator==(const token_iterator& rhs) {return this->_itr == rhs._itr;}
  214.     constexpr bool operator> (const token_iterator& rhs) {return this->_itr >  rhs._itr;}
  215.     constexpr bool operator!=(const token_iterator& rhs) {return this->_itr != rhs._itr;}
  216. private:
  217.     constexpr void _seek_next()
  218.     {
  219.         while (_itr != _end)
  220.         {
  221.             const auto next_token = get_next_token<Searcher, Tokens...>(_itr, _end);
  222.             if (!next_token) //invalid token
  223.             {
  224.                 _current = value_type({},{});
  225.                 break; //not found
  226.             }
  227.  
  228.  
  229.             if (!next_token->second) //token valid, but not ignored
  230.             {
  231.                 _current = std::optional(next_token->first);
  232.                 return;
  233.             }
  234.             else
  235.                 _itr += next_token->first.value.size();
  236.         }
  237.     }
  238.  
  239.     std::optional<value_type> _current;
  240.     Iterator _itr;
  241.     Iterator _end;
  242. };
  243.  
  244. template<typename Iterator, auto... Tokens>
  245. struct token_range
  246. {
  247.     using char_type = typename std::iterator_traits<Iterator>::value_type;
  248.     using value_type = token_result<char_type>;
  249.     using reference = value_type &;
  250.  
  251.     constexpr static auto searcher = ctre::search<build_regex<Tokens...>()>;
  252.     using iterator = token_iterator<Iterator, searcher, Tokens...>;
  253.  
  254.     constexpr token_range(token_range&) = default;
  255.  
  256.     constexpr token_range(Iterator begin, Iterator end) : _begin(begin), _end(end) {}
  257.  
  258.     constexpr iterator begin() const {return iterator(_begin, _end);}
  259.     constexpr iterator   end() const {return iterator(  _end, _end);}
  260.  
  261.     constexpr bool operator==(const token_range& rhs) const {return (_begin == rhs._begin) && (_end == rhs._end);}
  262.     constexpr bool operator!=(const token_range& rhs) const {return (_begin != rhs._begin) || (_end != rhs._end);}
  263.  
  264. private:
  265.     Iterator _begin;
  266.     Iterator _end;
  267.  
  268. };
  269.  
  270. template<token ... Tokens>
  271. constexpr auto tokenize(const std::string_view &sv)
  272. {
  273.     using iterator = std::string_view::const_iterator ;
  274.     using range = token_range<iterator, Tokens...>;
  275.     return range(sv.begin(), sv.end());
  276. }
  277.  
  278. }
  279.  
  280. ///SOME TESTS, only here because gist..
  281.  
  282. constexpr auto sv = std::string_view("foo", 4);
  283.  
  284. constexpr auto tk1 = ctlex::token("foo", "fo+");
  285. constexpr auto tk2 = ctlex::token("bar", "bar");
  286. constexpr auto tk3 = ctlex::token("ws", "[a-zA-Z0-9_]+", std::ignore);
  287.  
  288. constexpr auto tk_ = tk1;
  289. constexpr ctlex::token tk_cp2 = tk1;
  290. constexpr ctlex::token tk_cp3(tk1);
  291.  
  292.  
  293. using tr = ctlex::token_range<char* , tk1, tk2, tk3>;
  294.  
  295. constexpr auto rx = ctlex::build_regex<tk1, tk2, tk3>();
  296.  
  297. static_assert(std::u32string_view(rx.begin(), rx.size()) == U"^(fo+)|^(bar)|^([a-zA-Z0-9_]+)");
  298.  
  299. constexpr auto match = ctre::search<"^(fo+)|^(bar)|^(\\s+)">("foo bar");
  300.  
  301. static_assert(match. get<1>());
  302. static_assert(match. get<1>().to_view() == "foo");
  303. static_assert(!match.get<2>());
  304. static_assert(!match.get<3>());
  305.  
  306. constexpr auto match1 = ctre::search<"^(fo+)|^(bar)|^(\\s+)">(" bar");
  307.  
  308. static_assert(!match1.get<1>());
  309. static_assert(!match1.get<2>());
  310. static_assert( match1.get<3>());
  311. static_assert( match1.get<3>().to_view() == " ");
  312.  
  313.  
  314. constexpr auto match2 = ctre::search<"^(fo+)|^(bar)|^(\\s+)">("bar");
  315.  
  316. static_assert(!match2. get<1>());
  317. static_assert( match2.get<2>());
  318. static_assert( match2.get<2>().to_view() == "bar");
  319. static_assert(!match2.get<3>());
  320.  
  321. constexpr auto seq = ctlex::tokenize<ctlex::token("foo", "fo+"),
  322.                                      ctlex::token("bar", "bar"),
  323.                                      ctlex::token("ws", "\\s+", std::ignore)>("fooo bar");
  324.  
  325.  
  326. static_assert(seq.begin());
  327. constexpr auto start = *seq.begin();
  328. static_assert(start.is<"foo">());
  329. static_assert(start.value == "fooo");
  330.  
  331. constexpr auto next = std::next(seq.begin());
  332. static_assert(next);
  333. static_assert(next->is<"bar">());
  334. static_assert(next->value == "bar");
  335.  
  336. constexpr auto eend = std::next(seq.begin(), 2);
  337. static_assert(seq.end() == eend);
  338.  
  339. #endif
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top