Advertisement
osipyonok

Untitled

Jan 16th, 2017
188
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.42 KB | None | 0 0
  1. enum Info{
  2. FullName,
  3. ShortName
  4. };
  5. class Token{
  6. public Info Type;
  7. public string Value;
  8. public int Index;
  9. public int Length;
  10. }
  11. static Dictionary<Info, Regex> regexes = new Dictionary<Info, Regex>();
  12. static void Init(){
  13. regexes2[Info.ShortName] = new Regex(@"^[A-Z]{1}[a-z]+\\s[A-Z]{1}\\.[A-Z]{1}\\.", RegexOptions.Compiled);
  14. regexes2[Info.FullName] = new Regex(@"^[A-Z]{1}[a-z]+\\s[A-Z]{1}[a-z]+\\s[A-Z]{1}[a-z]+", RegexOptions.Compiled);
  15. }
  16. void Tokenize(string text){
  17. string cur = "";
  18. string text = "";
  19. //cчитали файл в текст
  20. text = text.TrimStart();
  21. while(text != ""){
  22. var bestMatch =
  23. (from pair in regexes
  24. let tokenType = pair.Key
  25. let regex = pair.Value
  26. let match = regex.Match(remainingText)
  27. let matchLength = match.Length
  28. orderby matchLength descending, tokenType
  29. select new { tokenType, value = match.Value, matchLength, index = match.Index }).First();
  30. if (bestMatch.matchLength == 0)continue;
  31. var token = new Token() { Type = bestMatch.tokenType, Index = bestMatch.index, Value = bestMatch.value, Length = bestMatch.matchLength };
  32. yield return token;
  33. remainingText = remainingText.Substring(bestMatch.matchLength).TrimStart();
  34. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement