Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class Lexer
- {
- public static Task<List<QualifiedToken>> Parse(IEnumerable<RawText> texts)
- {
- var lexer = new Lexer(texts);
- return Task.Run(() => lexer.Parse().ToList());
- }
- List<int> partIndices = new List<int>();
- List<TextPointer> pointers = new List<TextPointer>();
- string totalText;
- Lexer(IEnumerable<RawText> texts)
- {
- StringBuilder sb = new StringBuilder();
- foreach (var text in texts)
- {
- partIndices.Add(sb.Length);
- sb.Append(text.Text);
- pointers.Add(text.Start);
- }
- totalText = sb.ToString();
- }
- Tuple<TextPointer, int> GetBasePointerAndOffset(int position)
- {
- var partNo = partIndices.BinarySearch(position);
- if (partNo < 0)
- partNo = ~partNo - 1;
- var partStart = partIndices[partNo];
- var delta = position - partStart;
- return Tuple.Create(pointers[partNo], delta);
- }
- IEnumerable<QualifiedToken> Parse()
- {
- var regexParts = new Dictionary<string, string>()
- {
- { "ident", @"\p{L}\w*" },
- { "num", @"[+-]?(\d+(\.\d*)?)|(\.\d+)" },
- { "comment", @"(/\*.*?\*/)|(//.*$)" },
- { "punct", @"(:=)|(<=)|(>=)|(==)|[;+\-\*/\(\){}:<>=]" },
- { "string", "\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*\"" },
- { "space", @"\s+" }
- };
- var combinedRegex = "^(" + string.Join("|", regexParts.Select(kvp => string.Format("(?<{0}>{1})", kvp.Key, kvp.Value))) + ")";
- var compiledRe = new Regex(combinedRegex, RegexOptions.Compiled | RegexOptions.Multiline);
- var names = regexParts.Keys;
- var tokenMapping = new Dictionary<string, TokenType?>()
- {
- { "ident", TokenType.Ident },
- { "num", TokenType.Number },
- { "comment", TokenType.Comment },
- { "punct", TokenType.Punct },
- { "string", TokenType.String },
- { "space", null }
- };
- string restLine = totalText;
- int currPos = 0;
- while (restLine != "")
- {
- var match = compiledRe.Match(restLine);
- var nameAndGroup = names.Select(name => new { name, group = match.Groups[name] })
- .Single(ng => ng.group.Success);
- var text = nameAndGroup.group.Value;
- var length = nameAndGroup.group.Length;
- var tokenType = tokenMapping[nameAndGroup.name];
- if (tokenType == TokenType.Ident && CheckKeyword(text))
- tokenType = TokenType.Keyword;
- if (tokenType != null)
- {
- var start = GetBasePointerAndOffset(currPos);
- var end = GetBasePointerAndOffset(currPos + length);
- yield return new QualifiedToken()
- {
- Type = tokenType.Value,
- StartPosition = start.Item1,
- StartOffset = start.Item2,
- EndPosition = end.Item1,
- EndOffset = end.Item2
- };
- }
- currPos += length;
- restLine = restLine.Substring(length);
- }
- }
- HashSet<string> keywords = new HashSet<string>()
- {
- "for",
- "while",
- "int",
- "string",
- "bool"
- };
- bool CheckKeyword(string text)
- {
- return keywords.Contains(text);
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement