Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- Vector<Token> lex(Source& src) {
- LexerIterator it = LexerIterator::create(src);
- Vector<Token> tokens = Vector<Token>::create(2056);
- while(it.valid()) {
- while(it.is_whitespace()) { it.advance(); }
- if(it.current() == '#') {
- while(!it.is_newline() && it.valid()) { it.advance(); }
- continue;
- }
- if(it.is_alphabetical()) {
- KeywordKind kwkind = KeywordKind::none;
- it.mark();
- it.advance();
- while(it.is_alphanumeric()) { it.advance(); }
- for(size_t i = 0; i < VOID_ARRAY_LEN(KEYWORDS); ++ i) {
- ReservedToken const& kw = KEYWORDS[i];
- if(it.mark_distance() == kw.str_len && memcmp(it.mark_it, kw.str, kw.str_len) == 0) {
- kwkind = KeywordKind(i);
- break;
- }
- }
- if(kwkind == KeywordKind::none) { tokens.add(Token::create(it.mark_it, it.it)); }
- else { tokens.add(Token::create(kwkind, it.mark_it, it.it)); }
- }
- if(it.current() == '0' && (it.next() == 'x' || it.next() == 'X')) {
- bool invalid_char = false;
- it.mark();
- it.advance(2);
- while(it.is_alphanumeric()) {
- if(!it.is_hexadecimal()) { invalid_char = true; }
- it.advance();
- }
- unsigned long long val = 0;
- if(distance(it.mark_it + 2, it.it) == 0) {
- src.error(it.mark_it, it.it, "Expected hexadecimal numbers");
- }
- if(invalid_char) { src.error(it.mark_it, it.it, "Invalid hexadecimal character"); }
- if(!hex_str_to_u64(it.mark_it, distance(it.mark_it, it.it), val)) {
- src.error(it.mark_it, it.it, "Invalid hexadecimal literal");
- }
- tokens.add(Token::create(val, it.mark_it, it.it));
- }
- if(it.current() == '0' && (it.next() == 'b' || it.next() == 'B')) {
- bool invalid_char = false;
- it.mark();
- it.advance(2);
- while(it.is_alphanumeric()) {
- if(!it.is_binary()) { invalid_char = true; }
- it.advance();
- }
- unsigned long long val = 0;
- if(distance(it.mark_it + 2, it.it) == 0) {
- src.error(it.mark_it, it.it, "Expected binary numbers");
- }
- if(invalid_char) { src.error(it.mark_it, it.it, "Invalid binary character"); }
- if(!bin_str_to_u64(it.mark_it + 2, distance(it.mark_it, it.it) - 2, val)) {
- src.error(it.mark_it, it.it, "Invalid binary literal");
- }
- tokens.add(Token::create(val, it.mark_it, it.it));
- }
- if(it.is_numeric() || (it.current() == '.' && it.is_next_numeric())) {
- int exponent_cnt = 0;
- bool expect_num = false;
- bool is_point = false;
- it.mark();
- while(it.is_alphanumeric()) {
- if(it.current() == 'e' || it.current() == 'E') {
- is_point = true;
- ++ exponent_cnt;
- it.advance();
- if(it.current() == '-' || it.current() == '+') { it.advance(); }
- if(!it.is_numeric()) { expect_num = true; }
- continue;
- }
- it.advance();
- }
- if(it.current() == '.' && it.is_next_numeric()) {
- is_point = true;
- it.advance();
- }
- while(it.is_alphanumeric()) {
- if(it.current() == 'e' || it.current() == 'E') {
- is_point = true;
- ++ exponent_cnt;
- it.advance();
- if(it.current() == '-' || it.current() == '+') { it.advance(); }
- if(!it.is_numeric()) { expect_num = true; }
- continue;
- }
- it.advance();
- }
- if(exponent_cnt > 1) { src.error(it.mark_it, it.it, "Too many exponentials in floating point literal"); }
- if(expect_num) { src.error(it.mark_it, it.it, "Expected number after exponential"); }
- unsigned long long int_val = 0;
- double point_val = 0;
- if(is_point) {
- if(!str_to_f64(it.mark_it, it.mark_distance(), point_val)) {
- src.error(it.mark_it, it.it, "Invalid floating point literal");
- }
- tokens.add(Token::create(point_val, it.mark_it, it.it));
- } else {
- if(!str_to_u64(it.mark_it, it.mark_distance(), int_val)) {
- src.error(it.mark_it, it.it, "Invalid integer literal");
- }
- tokens.add(Token::create(int_val, it.mark_it, it.it));
- }
- }
- for(size_t i = 0; i < VOID_ARRAY_LEN(OPERATORS); ++ i) {
- ReservedToken const& op = OPERATORS[i];
- if(memcmp(it.it, op.str, op.str_len) == 0) {
- it.mark();
- it.advance(op.str_len);
- tokens.add(Token::create(OperatorKind(i), it.mark_it, it.it));
- break;
- }
- }
- if(it.current() == '"') {
- String str = String::create();
- bool closed = false;
- it.mark();
- it.advance();
- while(it.valid()) {
- if(it.current() == '"') {
- it.advance();
- closed = true;
- break;
- }
- if(it.current() == '\\') {
- char const* start = it.it;
- it.advance();
- switch(it.current()) {
- case 'a':
- case 'A':
- str.add('\a');
- it.advance();
- break;
- case 'b':
- case 'B':
- str.add('\b');
- it.advance();
- break;
- case 'f':
- case 'F':
- str.add('\f');
- it.advance();
- break;
- case 'n':
- case 'N':
- str.add('\n');
- it.advance();
- break;
- case 'r':
- case 'R':
- str.add('\r');
- it.advance();
- break;
- case 't':
- case 'T':
- str.add('\t');
- it.advance();
- break;
- case 'v':
- case 'V':
- str.add('\v');
- it.advance();
- break;
- case '\\':
- str.add('\\');
- it.advance();
- break;
- case '\'':
- str.add('\'');
- it.advance();
- break;
- case '\"':
- str.add('\"');
- it.advance();
- break;
- case '\?':
- str.add('\?');
- it.advance();
- break;
- case '0':
- str.add('\0');
- it.advance();
- break;
- case 'U':
- case 'u':
- {
- size_t len = 0;
- bool invalid_char = false;
- it.advance();
- for(size_t i = 0; i < 8; ++ i) {
- if(it.current() == '"') { break; }
- if(it.current() == '\\') { it.advance(); break; }
- if(!it.is_hexadecimal()) { invalid_char = true; }
- it.advance();
- ++ len;
- }
- unsigned long long val = 0;
- if(len == 0) { src.error(start, it.it, "Expected hexadecimal numbers"); }
- if(invalid_char) { src.error(start, it.it, "Invalid character in hexadecimal literal"); }
- if(!hex_str_to_u64(start + 2, len, val)) {
- src.error(start, it.it, "Invalid unicode escape sequence");
- }
- char c_cache[5] {};
- to_utf8_str(val, c_cache);
- str.add(c_cache);
- }
- break;
- default:
- it.advance();
- src.error(start, it.it, "Unknown escape sequence");
- break;
- }
- continue;
- }
- str.add(it.it, it.char_size());
- it.advance();
- }
- if(!closed) { src.error(it.mark_it, it.it, "Expected closing `\"`"); }
- tokens.add(Token::create(str, it.mark_it, it.it));
- }
- if(it.current() == '\'') {
- String str = String::create();
- bool closed = false;
- it.mark();
- it.advance();
- while(it.valid()) {
- if(it.current() == '\'') {
- it.advance();
- closed = true;
- break;
- }
- if(it.current() == '\\') {
- char const* start = it.it;
- it.advance();
- switch(it.current()) {
- case 'a':
- case 'A':
- str.add('\a');
- it.advance();
- break;
- case 'b':
- case 'B':
- str.add('\b');
- it.advance();
- break;
- case 'f':
- case 'F':
- str.add('\f');
- it.advance();
- break;
- case 'n':
- case 'N':
- str.add('\n');
- it.advance();
- break;
- case 'r':
- case 'R':
- str.add('\r');
- it.advance();
- break;
- case 't':
- case 'T':
- str.add('\t');
- it.advance();
- break;
- case 'v':
- case 'V':
- str.add('\v');
- it.advance();
- break;
- case '\\':
- str.add('\\');
- it.advance();
- break;
- case '\'':
- str.add('\'');
- it.advance();
- break;
- case '\"':
- str.add('\"');
- it.advance();
- break;
- case '\?':
- str.add('\?');
- it.advance();
- break;
- case '0':
- str.add('\0');
- it.advance();
- break;
- case 'U':
- case 'u':
- {
- size_t len = 0;
- bool invalid_char = false;
- it.advance();
- for(size_t i = 0; i < 8; ++ i) {
- if(it.current() == '\'') { break; }
- if(it.current() == '\\') { it.advance(); break; }
- if(!it.is_hexadecimal()) { invalid_char = true; }
- it.advance();
- ++ len;
- }
- unsigned long long val = 0;
- if(len == 0) { src.error(start, it.it, "Expected hexadecimal numbers"); }
- if(invalid_char) { src.error(start, it.it, "Invalid character in hexadecimal literal"); }
- if(!hex_str_to_u64(start + 2, len, val)) {
- src.error(start, it.it, "Invalid unicode escape sequence");
- }
- char c_cache[5] {};
- to_utf8_str(val, c_cache);
- str.add(c_cache);
- }
- break;
- default:
- it.advance();
- src.error(start, it.it, "Unknown escape sequence");
- break;
- }
- continue;
- }
- str.add(it.it, it.char_size());
- it.advance();
- }
- if(str.len == 0) { src.error(it.mark_it, it.it, "Expected character"); }
- if(char_size(str.data) < str.len) {
- src.error(it.mark_it, it.it, "Too many characters in character literal");
- }
- if(!closed) { src.error(it.mark_it, it.it, "Expected closing `'`"); }
- tokens.add(Token::create(to_utf32_char(str.data), it.mark_it, it.it));
- }
- }
- return tokens;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement