Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- using System;
- using System.Collections.Generic;
- using System.Text;
- using System.Text.RegularExpressions;
- namespace MyLexer
- {
- public class TokenDefinition
- {
- public string Name { get; set; }
- public Regex Regex { get; set; }
- public bool Ignore { get; set; }
- public TokenDefinition(string name, Regex regex, bool ignore = false)
- {
- Name = name;
- Regex = regex;
- Ignore = ignore;
- }
- public Token CreateToken(int line, int column, int index, string matchedText)
- {
- return new Token
- {
- Name = Name,
- Position = new TokenPosition(line, column, index),
- MatchedText = matchedText
- };
- }
- }
- public class Token
- {
- public string Name { get; set; }
- public TokenPosition Position { get; set; }
- public string MatchedText { get; set; }
- }
- public class TokenPosition
- {
- public int Line { get; set; }
- public int Column { get; set; }
- public int Index { get; set; }
- public TokenPosition(int line, int column, int index)
- {
- Line = line;
- Column = column;
- Index = index;
- }
- public override string ToString()
- {
- return $"({Index}: {Line},{Column})";
- }
- }
- public class Lexer
- {
- public static Regex s_EofRegex = new Regex(@"\r\n|\r|\n", RegexOptions.Compiled);
- public List<TokenDefinition> _tokenDefinitions = new List<TokenDefinition>();
- public void AddDefinition(TokenDefinition tokenDefinition)
- {
- _tokenDefinitions.Add(tokenDefinition);
- }
- public IEnumerable<Token> GetTokens(string text)
- {
- int line = 1, column = 1, currentIndex = 0;
- while (currentIndex < text.Length)
- {
- TokenDefinition matchedTokenDefinition = null;
- string matchedText = null;
- foreach (var tokenDefinition in _tokenDefinitions)
- {
- var tokenMatch = tokenDefinition.Regex.Match(text, currentIndex);
- if (tokenMatch.Success && tokenMatch.Index == currentIndex)
- {
- matchedText = tokenMatch.Value;
- matchedTokenDefinition = tokenDefinition;
- }
- }
- if (matchedTokenDefinition == null)
- {
- Console.WriteLine($"Failed to match any definition at '{text[currentIndex]}' in {new TokenPosition(line, column, currentIndex)}.");
- continue;
- }
- if (!matchedTokenDefinition.Ignore)
- yield return matchedTokenDefinition.CreateToken(line, column, currentIndex, matchedText);
- var eofMatch = s_EofRegex.Match(matchedText);
- if (eofMatch.Success)
- {
- while (eofMatch.Success)
- {
- line += 1;
- column = matchedText.Length - (eofMatch.Index + eofMatch.Length) + 1;
- eofMatch = eofMatch.NextMatch();
- }
- }
- else
- {
- column += matchedText.Length;
- }
- currentIndex += matchedText.Length;
- }
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement