Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package pr3;
- import java.io.File;
- import java.util.ArrayList;
- import java.util.Scanner;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- import java.io.BufferedReader;
- import java.io.FileReader;
- import java.io.IOException;
- public class Main {
- static BufferedReader c1;
- public static enum TokenType {
- STRING("\'[^']*\'"),
- COMMENT("(\\*(.|\\n)*?\\*) | //.* [%]+ | \\{(.|\\n)*?\\}"), //ВОПРОС
- KEYWORD("(?<!\\w)(?:[Pp]rogram|[Uu]ses|[Vv]ar|[Bb]egin|[Ee]nd|[Ww]hile|[Ii]f|[Tt]hen|[Ee]lse|[Dd]o|[Rr]epeat|[Uu]ntil|[Oo]f|[Ff]unction|[Pp]rocedure|[Tt]o|[Dd]ownto|[Ff]or|[Aa]bsolute|[Aa]nd|[Aa]rray|[Cc]ase|[Cc]onst|[Dd]iv|[Gg]oto|[Ll]abel|[Ii]nterface|[Ll]ibrary|[Mm]od|[Nn]ot|[Oo]r|[Oo]bject|[Tt]ype|[Ww]ith|[Xx]or)(?!\\w)"),
- OPERATOR(":=|<>|[*|/|+|-|<|>|=]|div|mod|and|or|xor|<=|>="),
- TYPE("[Ii]nteger|[Rr]eal|[Cc]har|[Bb]yte|[Ww]ord|[Ss]hortint|[Ll]ongint|[Ii]nt64|[Uu]int64|[Dd]ouble|[Ss]tring|[Bb]oolean|[Aa]rray|[Rr]ecord|[Pp]ointer"),
- NUMBER("-?[0-9]?[.]?[0-9]+"),
- IDENTIFIER("\\w+"),
- BRACKET("[\\[|\\]|(|)]"),
- PUNCTUATION("[.|,|:|;|.]"),
- WHITESPACE("\\s+"),
- UNEXPECTED("\\S+");
- public final String pattern;
- private TokenType(String pattern) {
- this.pattern = pattern;
- }
- }
- public static class Token {
- public TokenType type;
- public String data;
- public Token(TokenType type, String data) {
- this.type = type;
- this.data = data;
- }
- @Override
- public String toString() {
- return String.format("(%s %s)", type.name(), data);
- }
- }
- public static ArrayList<Token> lex(String input) {
- ArrayList<Token> tokens = new ArrayList<Token>();
- StringBuffer tokenPatternsBuffer = new StringBuffer();
- for (TokenType tokenType : TokenType.values())
- tokenPatternsBuffer.append(String.format("|(?<%s>%s)", tokenType.name(), tokenType.pattern));
- Pattern tokenPatterns = Pattern.compile(new String(tokenPatternsBuffer.substring(1)));
- Matcher matcher = tokenPatterns.matcher(input);
- while (matcher.find()) {
- if (matcher.group(TokenType.STRING.name()) != null) {
- tokens.add(new Token(TokenType.STRING, matcher.group(TokenType.STRING.name())));
- continue;
- } else if (matcher.group(TokenType.COMMENT.name()) != null) {
- tokens.add(new Token(TokenType.COMMENT, matcher.group(TokenType.COMMENT.name())));
- continue;
- } else if (matcher.group(TokenType.KEYWORD.name()) != null){
- tokens.add(new Token(TokenType.KEYWORD, matcher.group(TokenType.KEYWORD.name())));
- continue;
- } else if (matcher.group(TokenType.OPERATOR.name()) != null) {
- tokens.add(new Token(TokenType.OPERATOR, matcher.group(TokenType.OPERATOR.name())));
- continue;
- } else if (matcher.group(TokenType.TYPE.name()) != null){
- tokens.add(new Token(TokenType.TYPE, matcher.group(TokenType.TYPE.name())));
- continue;
- } else if (matcher.group(TokenType.NUMBER.name()) != null) {
- tokens.add(new Token(TokenType.NUMBER, matcher.group(TokenType.NUMBER.name())));
- continue;
- } else if (matcher.group(TokenType.IDENTIFIER.name()) != null){
- tokens.add(new Token(TokenType.IDENTIFIER, matcher.group(TokenType.IDENTIFIER.name())));
- continue;
- } else if (matcher.group(TokenType.BRACKET.name()) != null){
- tokens.add(new Token(TokenType.BRACKET, matcher.group(TokenType.BRACKET.name())));
- continue;
- } else if (matcher.group(TokenType.PUNCTUATION.name()) != null) {
- tokens.add(new Token(TokenType.PUNCTUATION, matcher.group(TokenType.PUNCTUATION.name())));
- continue;
- } else if (matcher.group(TokenType.UNEXPECTED.name()) != null) {
- tokens.add(new Token(TokenType.UNEXPECTED, matcher.group(TokenType.UNEXPECTED.name())));
- continue;
- } else if (matcher.group(TokenType.WHITESPACE.name()) != null)
- continue;
- }
- return tokens;
- }
- public static void main(String[] args) {
- // System.out.println("Вставьте код в текстовый документ input.txt");
- String c = new String();
- StringBuilder in = new StringBuilder();
- try{
- c1=new BufferedReader(new FileReader("C:\\Users\\Максим\\Desktop\\input.txt"));
- while ((c = c1.readLine())!=null){
- in.append(c);
- }
- }
- catch(IOException ex){
- System.out.println(ex.getMessage());
- }
- ArrayList<Token> tokens = lex(in.toString());
- System.out.println();
- for (Token token: tokens) {
- System.out.println(token);
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement