Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- enum Info{
- FullName,
- ShortName
- };
- class Token{
- public Info Type;
- public string Value;
- public int Index;
- public int Length;
- }
- static Dictionary<Info, Regex> regexes = new Dictionary<Info, Regex>();
- static void Init(){
- regexes2[Info.ShortName] = new Regex(@"^[A-Z]{1}[a-z]+\\s[A-Z]{1}\\.[A-Z]{1}\\.", RegexOptions.Compiled);
- regexes2[Info.FullName] = new Regex(@"^[A-Z]{1}[a-z]+\\s[A-Z]{1}[a-z]+\\s[A-Z]{1}[a-z]+", RegexOptions.Compiled);
- }
- void Tokenize(string text){
- string cur = "";
- string text = "";
- //cчитали файл в текст
- text = text.TrimStart();
- while(text != ""){
- var bestMatch =
- (from pair in regexes
- let tokenType = pair.Key
- let regex = pair.Value
- let match = regex.Match(remainingText)
- let matchLength = match.Length
- orderby matchLength descending, tokenType
- select new { tokenType, value = match.Value, matchLength, index = match.Index }).First();
- if (bestMatch.matchLength == 0)continue;
- var token = new Token() { Type = bestMatch.tokenType, Index = bestMatch.index, Value = bestMatch.value, Length = bestMatch.matchLength };
- yield return token;
- remainingText = remainingText.Substring(bestMatch.matchLength).TrimStart();
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement