Advertisement
Guest User

Untitled

a guest
Feb 12th, 2016
54
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.08 KB | None | 0 0
  1. package stackoverflow;
  2.  
  3. import java.util.ArrayList;
  4. import java.util.List;
  5. import java.util.regex.Matcher;
  6. import java.util.regex.Pattern;
  7.  
  8. public class Tokenizer {
  9.  
  10. private static class Pat {
  11.  
  12. final Pattern pattern;
  13. final int type;
  14.  
  15. public Pat(Pattern pattern, int type) {
  16. this.pattern = pattern;
  17. this.type = type;
  18. }
  19. }
  20.  
  21. private final List<Pat> patterns = new ArrayList<>();
  22. public static final int EOS = -1;
  23. public static final int ANY = -2;
  24. public static final int UNKNOWN = -3;
  25.  
  26. private Pat skip = new Pat(Pattern.compile("^\\s*"), 0);
  27. private static final Pat any = new Pat(Pattern.compile("."), ANY);
  28.  
  29. public String string;
  30. public int index;
  31.  
  32. private Pattern compile(String s) {
  33. if (!s.startsWith("^"))
  34. s = "^" + s;
  35. return Pattern.compile(s);
  36. }
  37.  
  38. public Tokenizer tokenPattern(Pattern p, int type) {
  39. if (type < 0)
  40. throw new IllegalArgumentException("type(" + type + ") must be >= 0");
  41. patterns.add(new Pat(p, type));
  42. return this;
  43. }
  44.  
  45. public Tokenizer tokenPattern(String s, int type) {
  46. return tokenPattern(compile(s), type);
  47. }
  48.  
  49. public Tokenizer tokenConstant(String s, int type) {
  50. return tokenPattern(Pattern.quote(s), type);
  51. }
  52.  
  53. private Tokenizer skipPattern(Pattern p) {
  54. skip = new Pat(p, 0);
  55. return this;
  56. }
  57.  
  58. public Tokenizer skipPattern(String s) {
  59. return skipPattern(compile(s));
  60. }
  61.  
  62. public Tokenizer source(String string) {
  63. this.string = string;
  64. this.index = 0;
  65. return this;
  66. }
  67.  
  68. private int start;
  69. public int start() { return start; }
  70. private int end;
  71. public int end() { return end; }
  72. private int type;
  73. public int type() { return type; }
  74. private String value;
  75. public String value() { return value; }
  76.  
  77. public boolean hasNext() {
  78. return index < string.length();
  79. }
  80.  
  81. private int set(int type, int start, int end, String value) {
  82. this.type = type;
  83. this.start = start;
  84. this.end = end;
  85. this.value = value;
  86. return type;
  87. }
  88.  
  89. private int next(Pat pat) {
  90. if (index >= string.length())
  91. return set(EOS, string.length(), string.length(), "EOS");
  92. Matcher m = pat.pattern.matcher(string.substring(index));
  93. if (!m.find())
  94. return set(UNKNOWN, index, index, "");
  95. int type = pat.type;
  96. if (type == ANY)
  97. type = m.group().charAt(0);
  98. set(type, m.start(), m.end(), m.group());
  99. index += m.end();
  100. return type;
  101. }
  102.  
  103. public Tokenizer next() {
  104. next(skip);
  105. if (type == EOS)
  106. return this;
  107. for (Pat p : patterns) {
  108. next(p);
  109. if (type != UNKNOWN)
  110. return this;
  111. }
  112. next(any);
  113. return this;
  114. }
  115.  
  116. @Override
  117. public String toString() {
  118. return String.format("Tokenizer(type=%d value=%s string=%s index=%d)", type, value, string, index);
  119. }
  120. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement