Advertisement
Guest User

Untitled

a guest
Jun 19th, 2019
90
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 11.01 KB | None | 0 0
  1. #ifndef CTLEX_HPP
  2. #define CTLEX_HPP
  3.  
  4. #include <ctre.hpp>
  5. #include <string_view>
  6. #include <algorithm>
  7. #include <stdexcept>
  8.  
  9. namespace ctlex
  10. {
  11.  
  12. class lexer_error : public std::runtime_error
  13. {
  14. public:
  15. using std::runtime_error::runtime_error;
  16.  
  17. lexer_error(const std::string_view & sv) : lexer_error(std::string(sv.begin(), sv.end())) {}
  18. };
  19.  
  20. template<std::size_t SizeId, std::size_t SizeRegex>
  21. struct token
  22. {
  23. ctll::fixed_string<SizeId> id;
  24. ctll::fixed_string<SizeRegex> regex;
  25.  
  26. bool ignore = false;
  27.  
  28. constexpr std::u32string_view id_view() const {return {id.begin(), id.size()};}
  29.  
  30.  
  31. constexpr token(const token & tk) noexcept : id(tk.id), regex(tk.regex), ignore(tk.ignore) {}
  32.  
  33. constexpr token& operator=(const token & tk) noexcept
  34. {
  35. id = tk.id;
  36. regex = tk.regex;
  37. ignore= tk.ignore;
  38. return *this;
  39. }
  40.  
  41. template<typename IdChar, typename RegexChar>
  42. constexpr token(const IdChar (&id)[SizeId], const RegexChar (&regex)[SizeRegex]) noexcept : regex(regex), id(id)
  43. {
  44. }
  45.  
  46. template<typename IdChar, typename RegexChar>
  47. constexpr token(const IdChar (&id)[SizeId], const RegexChar (&regex)[SizeRegex], decltype(std::ignore)) noexcept : regex(regex), id(id), ignore(true)
  48. {
  49. }
  50.  
  51. template<std::size_t RhsSizeId, std::size_t RhsSizeRegex>
  52. constexpr bool operator<(const token<RhsSizeId, RhsSizeRegex> & rhs) {return this->id < rhs.id;}
  53.  
  54. template<std::size_t RhsSizeId, std::size_t RhsSizeRegex>
  55. constexpr bool operator==(const token<RhsSizeId, RhsSizeRegex> & rhs) {return this->id == rhs.id;}
  56.  
  57. template<std::size_t RhsSizeId, std::size_t RhsSizeRegex>
  58. constexpr bool operator>(const token<RhsSizeId, RhsSizeRegex> & rhs) {return this->id > id;}
  59. };
  60.  
  61. struct invalid_token_tag{};
  62.  
  63. template<typename char_type>
  64. struct token_result
  65. {
  66. std::u32string_view id;
  67. std::basic_string_view<char_type> value;
  68.  
  69.  
  70.  
  71. template<ctll::fixed_string Id> constexpr bool is() const {return id == std::u32string_view(Id.begin(), Id.size());}
  72. template<std::size_t Size> constexpr bool is(const ctll::fixed_string<Size> & id) const {return this->id == std::u32string_view(id.begin(), id.size());}
  73.  
  74. constexpr token_result() noexcept {};
  75. constexpr token_result(std::u32string_view id, std::basic_string_view<char_type> value) noexcept : id(id), value(value)
  76. {
  77. }
  78.  
  79. template<std::size_t RhsSizeId, std::size_t RhsSizeRegex>
  80. constexpr bool operator<(const token<RhsSizeId, RhsSizeRegex> & rhs) {return this->value.begin() < rhs.value.begin();}
  81.  
  82. template<std::size_t RhsSizeId, std::size_t RhsSizeRegex>
  83. constexpr bool operator==(const token<RhsSizeId, RhsSizeRegex> & rhs) {return this->value.begin() == rhs.value.begin();}
  84.  
  85. template<std::size_t RhsSizeId, std::size_t RhsSizeRegex>
  86. constexpr bool operator>(const token<RhsSizeId, RhsSizeRegex> & rhs) {return this->value.begin() > value.begin();}
  87. };
  88.  
  89. template<token ... Tokens>
  90. constexpr inline auto build_regex()
  91. {
  92. //the way we build it: (regex1)|(regex2)|(regex3) -> meaning SumSize + SizeOfElem * 2 + (SizeOfElem - 1) + '$'
  93. constexpr auto SumSize = ( 0 + ... + Tokens.regex.size());
  94. constexpr auto SizeOfElem = sizeof...(Tokens);
  95.  
  96. char32_t res[SumSize + (SizeOfElem * 4) - 1] = {};
  97.  
  98. constexpr std::array<std::u32string_view, SizeOfElem> input = { std::u32string_view(Tokens.regex.begin(), Tokens.regex.size())... };
  99.  
  100. auto idx = 0u;
  101. res[idx++] = U'^';
  102. res[idx++] = U'(';
  103.  
  104. for (auto & in : input)
  105. {
  106. //copy should be constexpr, isn't yet though...sooo, second loop aye
  107. //std::copy(std::begin(in), std::end(in), &res[idx]);
  108. for (auto idx_ = 0u; idx_ < in.size(); idx_ ++)
  109. res[idx++] = in[idx_];
  110.  
  111. if (&in == (input.end() - 1))
  112. break;
  113.  
  114. res[idx++] = U')';
  115. res[idx++] = U'|';
  116. res[idx++] = U'^';
  117. res[idx++] = U'(';
  118. }
  119.  
  120. res[idx] = U')';
  121.  
  122. return ctll::fixed_string<SumSize + (SizeOfElem * 4) - 1>(res);
  123. }
  124.  
  125. template<auto... Tokens, typename Iterator, typename... Captures>
  126. constexpr auto get_resulting_token(const ctre::regex_results<Iterator, Captures...> & res)
  127. -> std::optional<std::pair<token_result<typename std::iterator_traits<Iterator>::value_type>, bool>>
  128. {
  129. static_assert(sizeof...(Tokens) == sizeof...(Captures));
  130. using char_type = typename std::iterator_traits<Iterator>::value_type;
  131. constexpr auto transform_cap = [](auto && val) constexpr -> std::optional<std::basic_string_view<char_type>>
  132. {
  133. if (val)
  134. return val.to_view();
  135. else
  136. return std::nullopt;
  137. };
  138.  
  139. std::array<std::optional<std::basic_string_view<char_type>>, sizeof...(Captures)> caps = {transform_cap(res.template get<Captures::template storage<Iterator>::get_id()>())...};
  140. using char_type = typename std::iterator_traits<Iterator>::value_type;
  141.  
  142. std::array<std::pair<std::u32string_view, bool>, sizeof...(Tokens)> toks = {std::make_pair(Tokens.id_view(), Tokens.ignore)...};
  143. auto idx = 0u;
  144. for (; idx < sizeof...(Captures); idx++)
  145. if (caps[idx])
  146. break;
  147.  
  148. if (idx == sizeof...(Captures))
  149. return std::nullopt;
  150.  
  151. auto val = *caps[idx];
  152.  
  153. return std::make_pair(token_result<char_type>(toks[idx].first, val), toks[idx].second);
  154. }
  155.  
  156. template<auto Searcher, auto... Tokens, typename Iterator>
  157. constexpr auto get_next_token(Iterator itr, Iterator end)
  158. {
  159. using char_type = typename std::iterator_traits<Iterator>::value_type;
  160. auto match = Searcher(itr, end);
  161. auto res = get_resulting_token<Tokens...>(match);
  162. return res;
  163. }
  164.  
  165. template<typename Iterator, auto Searcher, auto... Tokens>
  166. struct token_iterator
  167. {
  168. constexpr token_iterator(Iterator begin, Iterator end) : _itr(begin), _end(end)
  169. {
  170. _seek_next();
  171. }
  172.  
  173. constexpr token_iterator(const token_iterator &) = default;
  174. constexpr token_iterator &operator=(const token_iterator &) = default;
  175.  
  176.  
  177. using char_type = typename std::iterator_traits<Iterator>::value_type;
  178. using value_type = token_result<char_type>;
  179. using reference = value_type &;
  180. using pointer = value_type *;
  181. using difference_type = std::size_t;
  182. using iterator_category = std::forward_iterator_tag;
  183.  
  184. constexpr reference operator*() {return *_current;}
  185. constexpr reference operator*() const {return *_current;}
  186. constexpr auto operator->() {return _current;}
  187. constexpr auto operator->() const {return _current;}
  188.  
  189. constexpr operator bool() const {return _current.has_value();}
  190. constexpr bool valid() const {return _current.has_value();}
  191.  
  192. constexpr auto current_position() {return _itr;}
  193. constexpr auto end_position() {return _end;}
  194.  
  195. constexpr token_iterator& operator++()
  196. {
  197. _itr += _current ? _current->value.size() : 1u;
  198. _seek_next();
  199. return *this;
  200.  
  201. }
  202. constexpr token_iterator operator++(int)
  203. {
  204. const auto res = *this;
  205. _itr += _current ? _current->value.size() : 1u;
  206. _seek_next();
  207. return res;
  208. }
  209.  
  210. constexpr bool eoi() const {return _itr == _end;}
  211.  
  212. constexpr bool operator< (const token_iterator& rhs) {return this->_itr < rhs._itr;}
  213. constexpr bool operator==(const token_iterator& rhs) {return this->_itr == rhs._itr;}
  214. constexpr bool operator> (const token_iterator& rhs) {return this->_itr > rhs._itr;}
  215. constexpr bool operator!=(const token_iterator& rhs) {return this->_itr != rhs._itr;}
  216. private:
  217. constexpr void _seek_next()
  218. {
  219. while (_itr != _end)
  220. {
  221. const auto next_token = get_next_token<Searcher, Tokens...>(_itr, _end);
  222. if (!next_token) //invalid token
  223. {
  224. _current = value_type({},{});
  225. break; //not found
  226. }
  227.  
  228.  
  229. if (!next_token->second) //token valid, but not ignored
  230. {
  231. _current = std::optional(next_token->first);
  232. return;
  233. }
  234. else
  235. _itr += next_token->first.value.size();
  236. }
  237. }
  238.  
  239. std::optional<value_type> _current;
  240. Iterator _itr;
  241. Iterator _end;
  242. };
  243.  
  244. template<typename Iterator, auto... Tokens>
  245. struct token_range
  246. {
  247. using char_type = typename std::iterator_traits<Iterator>::value_type;
  248. using value_type = token_result<char_type>;
  249. using reference = value_type &;
  250.  
  251. constexpr static auto searcher = ctre::search<build_regex<Tokens...>()>;
  252. using iterator = token_iterator<Iterator, searcher, Tokens...>;
  253.  
  254. constexpr token_range(token_range&) = default;
  255.  
  256. constexpr token_range(Iterator begin, Iterator end) : _begin(begin), _end(end) {}
  257.  
  258. constexpr iterator begin() const {return iterator(_begin, _end);}
  259. constexpr iterator end() const {return iterator( _end, _end);}
  260.  
  261. constexpr bool operator==(const token_range& rhs) const {return (_begin == rhs._begin) && (_end == rhs._end);}
  262. constexpr bool operator!=(const token_range& rhs) const {return (_begin != rhs._begin) || (_end != rhs._end);}
  263.  
  264. private:
  265. Iterator _begin;
  266. Iterator _end;
  267.  
  268. };
  269.  
  270. template<token ... Tokens>
  271. constexpr auto tokenize(const std::string_view &sv)
  272. {
  273. using iterator = std::string_view::const_iterator ;
  274. using range = token_range<iterator, Tokens...>;
  275. return range(sv.begin(), sv.end());
  276. }
  277.  
  278. }
  279.  
  280. ///SOME TESTS, only here because gist..
  281.  
  282. constexpr auto sv = std::string_view("foo", 4);
  283.  
  284. constexpr auto tk1 = ctlex::token("foo", "fo+");
  285. constexpr auto tk2 = ctlex::token("bar", "bar");
  286. constexpr auto tk3 = ctlex::token("ws", "[a-zA-Z0-9_]+", std::ignore);
  287.  
  288. constexpr auto tk_ = tk1;
  289. constexpr ctlex::token tk_cp2 = tk1;
  290. constexpr ctlex::token tk_cp3(tk1);
  291.  
  292.  
  293. using tr = ctlex::token_range<char* , tk1, tk2, tk3>;
  294.  
  295. constexpr auto rx = ctlex::build_regex<tk1, tk2, tk3>();
  296.  
  297. static_assert(std::u32string_view(rx.begin(), rx.size()) == U"^(fo+)|^(bar)|^([a-zA-Z0-9_]+)");
  298.  
  299. constexpr auto match = ctre::search<"^(fo+)|^(bar)|^(\\s+)">("foo bar");
  300.  
  301. static_assert(match. get<1>());
  302. static_assert(match. get<1>().to_view() == "foo");
  303. static_assert(!match.get<2>());
  304. static_assert(!match.get<3>());
  305.  
  306. constexpr auto match1 = ctre::search<"^(fo+)|^(bar)|^(\\s+)">(" bar");
  307.  
  308. static_assert(!match1.get<1>());
  309. static_assert(!match1.get<2>());
  310. static_assert( match1.get<3>());
  311. static_assert( match1.get<3>().to_view() == " ");
  312.  
  313.  
  314. constexpr auto match2 = ctre::search<"^(fo+)|^(bar)|^(\\s+)">("bar");
  315.  
  316. static_assert(!match2. get<1>());
  317. static_assert( match2.get<2>());
  318. static_assert( match2.get<2>().to_view() == "bar");
  319. static_assert(!match2.get<3>());
  320.  
  321. constexpr auto seq = ctlex::tokenize<ctlex::token("foo", "fo+"),
  322. ctlex::token("bar", "bar"),
  323. ctlex::token("ws", "\\s+", std::ignore)>("fooo bar");
  324.  
  325.  
  326. static_assert(seq.begin());
  327. constexpr auto start = *seq.begin();
  328. static_assert(start.is<"foo">());
  329. static_assert(start.value == "fooo");
  330.  
  331. constexpr auto next = std::next(seq.begin());
  332. static_assert(next);
  333. static_assert(next->is<"bar">());
  334. static_assert(next->value == "bar");
  335.  
  336. constexpr auto eend = std::next(seq.begin(), 2);
  337. static_assert(seq.end() == eend);
  338.  
  339. #endif
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement