Advertisement
Guest User

Untitled

a guest
May 30th, 2016
58
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.33 KB | None | 0 0
  1. public class Token
  2. {
  3. public Token(string type, string token, int index)
  4. {
  5. Value = token;
  6. Type = type;
  7. Index = index;
  8. }
  9. public string Value { get; private set; }
  10. public string Type { get; private set; }
  11. public int Index { get; private set; }
  12. }
  13.  
  14. Tokenizer tokenizer = Tokenizer.Empty
  15. .WithToken(SYMBOL, "(ABC|CDE)")
  16. .WithToken(SYMBOL_AND_NUMBERS, "(A|B|C|D|E|F)")
  17. .WithToken(RANGE, "-")
  18. .WithToken(SEPARATOR, ",")
  19. .WithToken(NUMBER, "[0-9]+");
  20.  
  21. Token[] rokens = tokenizer.Parse("ABC B1,4-5");
  22. // results in:
  23. // Token (Value="ABC", Index=0, TYPE="SYMBOL")
  24. // Token (Value="B", Index=4, TYPE="SYMBOL_AND_NUMBERS")
  25. // Token (Value="1", Index=5, TYPE="NUMBER")
  26. // Token (Value=",", Index=6, TYPE="SEPARATOR")
  27. // Token (Value="4", Index=7, TYPE="NUMBER")
  28. // Token (Value="-", Index=8, TYPE="RANGE")
  29. // Token (Value="5", Index=9, TYPE="NUMBER")
  30.  
  31. public class Tokenizer
  32. {
  33. private class TokenDefinition
  34. {
  35. private readonly Regex myRegex;
  36. public TokenDefinition(string type, string regex)
  37. {
  38. myRegex = new Regex(regex, RegexOptions.IgnoreCase | RegexOptions.Compiled);
  39. Type = type;
  40. }
  41. public string Type { get; set; }
  42. public MatchCollection Matches(string input)
  43. {
  44. return myRegex.Matches(input);
  45. }
  46. }
  47.  
  48. private readonly List<TokenDefinition> myTokenDefinitions = new List<TokenDefinition>();
  49.  
  50. public Tokenizer WithToken(string type, params string[] regexes)
  51. {
  52. foreach (var regex in regexes)
  53. myTokenDefinitions.Add(new TokenDefinition(type, regex));
  54. return this;
  55. }
  56.  
  57. public Token[] Tokenize(string input)
  58. {
  59. if (input == null)
  60. input = string.Empty;
  61.  
  62. var occupied = new bool[input.Length];
  63.  
  64. return CollectTokens(input, occupied);
  65. }
  66.  
  67. private Token[] CollectTokens(string input, bool[] occupied)
  68. {
  69. var tokens = new List<Token>();
  70.  
  71. foreach (var tokenDefinition in myTokenDefinitions)
  72. foreach (var token in TokenizeInternal(input, occupied, tokenDefinition))
  73. tokens.Add(token);
  74.  
  75. return tokens.OrderBy(t => t.Index).ToArray();
  76. }
  77.  
  78. private static IEnumerable<Token> TokenizeInternal(string input, bool[] occupied, TokenDefinition tokenDefinition)
  79. {
  80. foreach (Match match in tokenDefinition.Matches(input))
  81. {
  82. if (!match.Success)
  83. continue;
  84.  
  85. var indexRange = Enumerable.Range(match.Index, match.Length).ToList();
  86. if (indexRange.Any(idx => occupied[idx]))
  87. continue;
  88.  
  89. indexRange.ForEach(idx => occupied[idx] = true);
  90.  
  91. yield return new Token(tokenDefinition.Type, match.Value, match.Index);
  92. }
  93. }
  94.  
  95. public static Tokenizer Empty
  96. {
  97. get { return new Tokenizer(); }
  98. }
  99. }
  100.  
  101. public class ParserResult
  102. {
  103. public string Symbol { get; private set; }
  104. public int[] Numbers { get; private set; }
  105.  
  106. public ParserResult(string symbol, params int[] numbers)
  107. {
  108. Symbol = symbol;
  109. Numbers = numbers.OrderBy(n => n).ToArray();
  110. }
  111. }
  112.  
  113. public class Parser
  114. {
  115. public const string SYMBOL_WITH_NUMBERS = "SYMBOL_WITH_NUMBERS";
  116. public const string SYMBOL = "SYMBOL";
  117. public const string SEPARATOR = "SEPARATOR";
  118. public const string RANGE = "RANGE";
  119. public const string NUMBER = "NUMBER";
  120.  
  121. private readonly Tokenizer myTokenizer;
  122.  
  123. public Parser()
  124. {
  125. // note: first added token definitions have a higher prio (will be processed first).
  126. myTokenizer = Tokenizer.Empty
  127. .WithToken(SYMBOL, "(ABC|CDE)")
  128. .WithToken(SYMBOL_WITH_NUMBERS, "(A|B|C|D|E|F)")
  129. .WithToken(RANGE, "-")
  130. .WithToken(SEPARATOR, ",")
  131. .WithToken(NUMBER, "[0-9]+");
  132. }
  133.  
  134. public IEnumerable<ParserResult> Parse(string input)
  135. {
  136. var tokens = myTokenizer.Tokenize(input);
  137.  
  138. foreach (var result in ParseInternal(tokens).Where(r => r != null)
  139. yield return result;
  140. }
  141.  
  142. private IEnumerable<ParserResult> ParseInternal(Token[] tokens)
  143. {
  144. var stack = new Stack<Token>();
  145.  
  146. for (int i = 0; i < tokens.Length; i++)
  147. {
  148. Token current = tokens[i];
  149.  
  150. switch (current.Type)
  151. {
  152. case NUMBER:
  153. stack.Push(current);
  154. break;
  155. case SYMBOL_WITH_NUMBERS:
  156. if (stack.Count > 0)
  157. yield return FromStack(stack);
  158. stack.Push(current);
  159. break;
  160. case RANGE:
  161. stack.Push(current);
  162. break;
  163. case SEPARATOR:
  164. // nothing to do
  165. break;
  166. case SYMBOL:
  167. if (stack.Count > 0)
  168. yield return FromStack(stack);
  169. yield return new ParserResult(current.Value);
  170. break;
  171. default:
  172. throw new InvalidOperationException("Invalid type: '" + current.Type + "'");
  173. }
  174. }
  175.  
  176. if (stack.Count > 0)
  177. yield return FromStack(stack);
  178. }
  179.  
  180. private ParserResult FromStack(Stack<Token> stack)
  181. {
  182. var numbers = new List<int>();
  183. bool addRange = false;
  184. while (stack.Count > 0)
  185. {
  186. var token = stack.Pop();
  187. switch (token.Type)
  188. {
  189. case NUMBER:
  190. AddNumber(numbers, int.Parse(token.Value), ref addRange);
  191. break;
  192. case SYMBOL_AND_NUMBERS:
  193. return new ParserResult(token.Value, numbers.ToArray());
  194. case RANGE:
  195. addRange = true;
  196. break;
  197. }
  198. }
  199.  
  200. return null;
  201. }
  202.  
  203. private void AddNumber(List<int> numbers, int numberToAdd, ref bool addRange)
  204. {
  205. var last = addRange && numbers.Any() ? numbers.Last() : numberToAdd;
  206. var from = Math.Min(last, numberToAdd);
  207. var count = Math.Max(Math.Abs(last - numberToAdd), 1);
  208. foreach (var rangedNumberToAdd in Enumerable.Range(from, count))
  209. if (!numbers.Contains(rangedNumberToAdd))
  210. numbers.Add(rangedNumberToAdd);
  211.  
  212. addRange = false;
  213. }
  214. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement