Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- public class Token
- {
- public Token(string type, string token, int index)
- {
- Value = token;
- Type = type;
- Index = index;
- }
- public string Value { get; private set; }
- public string Type { get; private set; }
- public int Index { get; private set; }
- }
- Tokenizer tokenizer = Tokenizer.Empty
- .WithToken(SYMBOL, "(ABC|CDE)")
- .WithToken(SYMBOL_AND_NUMBERS, "(A|B|C|D|E|F)")
- .WithToken(RANGE, "-")
- .WithToken(SEPARATOR, ",")
- .WithToken(NUMBER, "[0-9]+");
- Token[] rokens = tokenizer.Parse("ABC B1,4-5");
- // results in:
- // Token (Value="ABC", Index=0, TYPE="SYMBOL")
- // Token (Value="B", Index=4, TYPE="SYMBOL_AND_NUMBERS")
- // Token (Value="1", Index=5, TYPE="NUMBER")
- // Token (Value=",", Index=6, TYPE="SEPARATOR")
- // Token (Value="4", Index=7, TYPE="NUMBER")
- // Token (Value="-", Index=8, TYPE="RANGE")
- // Token (Value="5", Index=9, TYPE="NUMBER")
- public class Tokenizer
- {
- private class TokenDefinition
- {
- private readonly Regex myRegex;
- public TokenDefinition(string type, string regex)
- {
- myRegex = new Regex(regex, RegexOptions.IgnoreCase | RegexOptions.Compiled);
- Type = type;
- }
- public string Type { get; set; }
- public MatchCollection Matches(string input)
- {
- return myRegex.Matches(input);
- }
- }
- private readonly List<TokenDefinition> myTokenDefinitions = new List<TokenDefinition>();
- public Tokenizer WithToken(string type, params string[] regexes)
- {
- foreach (var regex in regexes)
- myTokenDefinitions.Add(new TokenDefinition(type, regex));
- return this;
- }
- public Token[] Tokenize(string input)
- {
- if (input == null)
- input = string.Empty;
- var occupied = new bool[input.Length];
- return CollectTokens(input, occupied);
- }
- private Token[] CollectTokens(string input, bool[] occupied)
- {
- var tokens = new List<Token>();
- foreach (var tokenDefinition in myTokenDefinitions)
- foreach (var token in TokenizeInternal(input, occupied, tokenDefinition))
- tokens.Add(token);
- return tokens.OrderBy(t => t.Index).ToArray();
- }
- private static IEnumerable<Token> TokenizeInternal(string input, bool[] occupied, TokenDefinition tokenDefinition)
- {
- foreach (Match match in tokenDefinition.Matches(input))
- {
- if (!match.Success)
- continue;
- var indexRange = Enumerable.Range(match.Index, match.Length).ToList();
- if (indexRange.Any(idx => occupied[idx]))
- continue;
- indexRange.ForEach(idx => occupied[idx] = true);
- yield return new Token(tokenDefinition.Type, match.Value, match.Index);
- }
- }
- public static Tokenizer Empty
- {
- get { return new Tokenizer(); }
- }
- }
- public class ParserResult
- {
- public string Symbol { get; private set; }
- public int[] Numbers { get; private set; }
- public ParserResult(string symbol, params int[] numbers)
- {
- Symbol = symbol;
- Numbers = numbers.OrderBy(n => n).ToArray();
- }
- }
- public class Parser
- {
- public const string SYMBOL_WITH_NUMBERS = "SYMBOL_WITH_NUMBERS";
- public const string SYMBOL = "SYMBOL";
- public const string SEPARATOR = "SEPARATOR";
- public const string RANGE = "RANGE";
- public const string NUMBER = "NUMBER";
- private readonly Tokenizer myTokenizer;
- public Parser()
- {
- // note: first added token definitions have a higher prio (will be processed first).
- myTokenizer = Tokenizer.Empty
- .WithToken(SYMBOL, "(ABC|CDE)")
- .WithToken(SYMBOL_WITH_NUMBERS, "(A|B|C|D|E|F)")
- .WithToken(RANGE, "-")
- .WithToken(SEPARATOR, ",")
- .WithToken(NUMBER, "[0-9]+");
- }
- public IEnumerable<ParserResult> Parse(string input)
- {
- var tokens = myTokenizer.Tokenize(input);
- foreach (var result in ParseInternal(tokens).Where(r => r != null)
- yield return result;
- }
- private IEnumerable<ParserResult> ParseInternal(Token[] tokens)
- {
- var stack = new Stack<Token>();
- for (int i = 0; i < tokens.Length; i++)
- {
- Token current = tokens[i];
- switch (current.Type)
- {
- case NUMBER:
- stack.Push(current);
- break;
- case SYMBOL_WITH_NUMBERS:
- if (stack.Count > 0)
- yield return FromStack(stack);
- stack.Push(current);
- break;
- case RANGE:
- stack.Push(current);
- break;
- case SEPARATOR:
- // nothing to do
- break;
- case SYMBOL:
- if (stack.Count > 0)
- yield return FromStack(stack);
- yield return new ParserResult(current.Value);
- break;
- default:
- throw new InvalidOperationException("Invalid type: '" + current.Type + "'");
- }
- }
- if (stack.Count > 0)
- yield return FromStack(stack);
- }
- private ParserResult FromStack(Stack<Token> stack)
- {
- var numbers = new List<int>();
- bool addRange = false;
- while (stack.Count > 0)
- {
- var token = stack.Pop();
- switch (token.Type)
- {
- case NUMBER:
- AddNumber(numbers, int.Parse(token.Value), ref addRange);
- break;
- case SYMBOL_AND_NUMBERS:
- return new ParserResult(token.Value, numbers.ToArray());
- case RANGE:
- addRange = true;
- break;
- }
- }
- return null;
- }
- private void AddNumber(List<int> numbers, int numberToAdd, ref bool addRange)
- {
- var last = addRange && numbers.Any() ? numbers.Last() : numberToAdd;
- var from = Math.Min(last, numberToAdd);
- var count = Math.Max(Math.Abs(last - numberToAdd), 1);
- foreach (var rangedNumberToAdd in Enumerable.Range(from, count))
- if (!numbers.Contains(rangedNumberToAdd))
- numbers.Add(rangedNumberToAdd);
- addRange = false;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement