Advertisement
Guest User

Untitled

a guest
Dec 10th, 2016
72
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.27 KB | None | 0 0
  1. package system.prog3;
  2. import java.io.BufferedReader;
  3. import java.io.FileReader;
  4. import java.io.IOException;
  5. import java.util.ArrayList;
  6. import java.util.regex.Matcher;
  7. import java.util.regex.Pattern;
  8.  
  9. public class laba3 {
  10. static final String file = "D:\\input.txt";
  11. public static void main(String[] args) throws IOException{
  12. BufferedReader br = new BufferedReader(new FileReader(file));
  13. String line;
  14. String input = "";
  15. while ((line = br.readLine())!=null){
  16. input += line + "\n";
  17. }
  18. //System.out.println(input);
  19. ArrayList<Token> tokens = lex(input);
  20. for (Token token : tokens)
  21. System.out.println(token);
  22. }
  23. public static enum TokenType{
  24. STRING("\".*?\""),
  25. CHAR("'.*?'"),
  26. COMMENT("\\/\\*[\\s\\S]*?\\*\\/"),
  27. NUMBER("-?[0-9]?[.]?[0-9]+"),
  28. KEYWORD("(?<!\\w)(?:auto|break|case|const|continue|default|do|else|extern|for|goto|if|register|return|sizeof|static|switch|typedef|void|volatile|while)(?!\\w)"),
  29. DIRECTIVE("#[^(\\n|#|\\/\\*)]+"),
  30. OPERATOR("[\\!<>\\+-/%&\\|\\^]=|\\*=|<<=|>>=|={1,2}|\\+{1,2}|-{1,2}|\\*|/|%|<{1,2}|>{1,2}|!|&{1,2}|\\|{1,2}|~|\\^"),
  31. TYPE("(?<!\\w)(?:(signed |unsigned )?char|short( int)?|(un)?signed short( int)?|(un)?signed int|long int||long long int|(un)?signed long long( int)?|(un)?signed long( int)?|float|(long )?double|int|(un)?signed|long long|long|enum|struct|union)(?!\\w)"),
  32. IDENTIFIER("\\w+"),
  33. PUNCTUATION("[.|,|;|:]"),
  34. BRACKET("[\\[|\\]|\\(|\\)|\\{|\\}]"),
  35. WHITESPACE("\\s+"),
  36. UNEXPECTED("\\S+");
  37.  
  38. public final String pattern;
  39.  
  40. private TokenType(String pattern) {
  41. this.pattern = pattern;
  42. }
  43. }
  44. public static class Token {
  45. public TokenType type;
  46. public String data;
  47.  
  48. public Token(TokenType type, String data) {
  49. this.type = type;
  50. this.data = data;
  51. }
  52.  
  53. @Override
  54. public String toString() {
  55. return String.format("(%s %s)", type.name(), data);
  56. }
  57. }
  58. public static ArrayList<Token> lex(String input) {
  59. ArrayList<Token> tokens = new ArrayList<Token>();
  60.  
  61. StringBuffer tokenPatternsBuffer = new StringBuffer();
  62. for (TokenType tokenType : TokenType.values())
  63. tokenPatternsBuffer.append(String.format("|(?<%s>%s)", tokenType.name(), tokenType.pattern));
  64. Pattern tokenPatterns = Pattern.compile(new String(tokenPatternsBuffer.substring(1)));
  65.  
  66. Matcher matcher = tokenPatterns.matcher(input);
  67. while (matcher.find()) {
  68. if (matcher.group(TokenType.STRING.name()) != null) {
  69. tokens.add(new Token(TokenType.STRING, matcher.group(TokenType.STRING.name())));
  70. continue;
  71. } else if (matcher.group(TokenType.COMMENT.name()) != null) {
  72. tokens.add(new Token(TokenType.COMMENT, matcher.group(TokenType.COMMENT.name())));
  73. continue;
  74. }else if (matcher.group(TokenType.CHAR.name()) != null) {
  75. tokens.add(new Token(TokenType.CHAR, matcher.group(TokenType.CHAR.name())));
  76. continue;
  77. }else if (matcher.group(TokenType.OPERATOR.name()) != null) {
  78. tokens.add(new Token(TokenType.OPERATOR, matcher.group(TokenType.OPERATOR.name())));
  79. continue;
  80. }else if (matcher.group(TokenType.TYPE.name()) != null) {
  81. tokens.add(new Token(TokenType.TYPE, matcher.group(TokenType.TYPE.name())));
  82. continue;
  83. }else if (matcher.group(TokenType.KEYWORD.name()) != null) {
  84. tokens.add(new Token(TokenType.KEYWORD, matcher.group(TokenType.KEYWORD.name())));
  85. continue;
  86. }else if (matcher.group(TokenType.NUMBER.name()) != null) {
  87. tokens.add(new Token(TokenType.NUMBER, matcher.group(TokenType.NUMBER.name())));
  88. continue;
  89. }else if (matcher.group(TokenType.IDENTIFIER.name()) != null) {
  90. tokens.add(new Token(TokenType.IDENTIFIER, matcher.group(TokenType.IDENTIFIER.name())));
  91. continue;
  92. }else if (matcher.group(TokenType.PUNCTUATION.name()) != null) {
  93. tokens.add(new Token(TokenType.PUNCTUATION, matcher.group(TokenType.PUNCTUATION.name())));
  94. continue;
  95. }else if (matcher.group(TokenType.BRACKET.name()) != null) {
  96. tokens.add(new Token(TokenType.BRACKET, matcher.group(TokenType.BRACKET.name())));
  97. continue;
  98. }else if (matcher.group(TokenType.UNEXPECTED.name()) != null) {
  99. tokens.add(new Token(TokenType.UNEXPECTED, matcher.group(TokenType.UNEXPECTED.name())));
  100. continue;
  101. }else if (matcher.group(TokenType.DIRECTIVE.name()) != null) {
  102. tokens.add(new Token(TokenType.DIRECTIVE, matcher.group(TokenType.DIRECTIVE.name())));
  103. continue;
  104. }else if (matcher.group(TokenType.WHITESPACE.name()) != null) {
  105. //tokens.add(new Token(TokenType.WHITESPACE, matcher.group(TokenType.WHITESPACE.name())));
  106. continue;
  107. }
  108.  
  109. }
  110. return tokens;
  111. }
  112. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement