Advertisement
Patasuss

Tokenizers

Mar 26th, 2019
187
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.33 KB | None | 0 0
  1. var strDefs = """generic: /
  2. block: [cs]
  3. binop: lhs rhs
  4. infix: lhs op rhs
  5. """
  6.  
  7. type
  8. TokenKind = enum
  9. tkIdent
  10. tkColon
  11. tkLSquare
  12. tkRSquare
  13. tkSlash
  14.  
  15. Token = object
  16. case kind: TokenKind
  17. of tkIdent:
  18. ident: string
  19. of tkColon, tkLSquare, tkRSquare, tkSlash:
  20. discard
  21.  
  22. TokenizerResult = tuple[succ: bool, index: int]
  23.  
  24. proc discardWhitespace(str: string, index: int) : TokenizerResult =
  25. var index = index
  26. while index < str.len():
  27. let c = str[index]
  28. if c notin {' ', '\t'}:
  29. return (true, index)
  30. index.inc(1)
  31. return (true, index+1)
  32.  
  33. proc readIdentToken(str: string, index: int, token: var Token) : TokenizerResult =
  34. var
  35. index = index
  36. ident = ""
  37. while index < str.len():
  38. let c = str[index]
  39. if c in {'a'..'z'}:
  40. ident.add(c)
  41. else:
  42. break
  43. if ident.len()==0:
  44. return (false, -1)
  45. token = Token(kind: tkIdent, ident: ident)
  46. return (true, index+1)
  47.  
  48. proc readSymbolToken(str: string, index: int, token: var Token) : TokenizerResult =
  49. let c = str[index]
  50. case c
  51. of '[':
  52. token = Token(kind: tkLSquare)
  53. return (true, index+1)
  54. of ']':
  55. token = Token(kind: tkRSquare)
  56. return (true, index+1)
  57. of ':':
  58. token = Token(kind: tkColon)
  59. return (true, index+1)
  60. of '/':
  61. token = Token(kind: tkSlash)
  62. return (true, index+1)
  63. else:
  64. return (false, -1)
  65.  
  66. proc tokenizeDefs(str: string) : seq[Token] =
  67. result = @[]
  68. var
  69. index = 0
  70. token : Token = Token(kind: tkSlash)
  71. while index < str.len():
  72. index = discardWhitespace(str, index).index
  73. var tokResult = readIdentToken(str, index, token)
  74. if tokResult.succ:
  75. index = tokResult.index
  76. result.add(token)
  77. echo "Read an ident"
  78. continue
  79. tokResult = readSymbolToken(str, index, token)
  80. if tokResult.succ:
  81. index = tokResult.index
  82. result.add(token)
  83. echo "Read a symbol"
  84. continue
  85. break
  86. return result
  87.  
  88.  
  89. echo tokenizeDefs(strDefs)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement