Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package system.prog3;
- import java.io.BufferedReader;
- import java.io.FileReader;
- import java.io.IOException;
- import java.util.ArrayList;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- public class laba3 {
- static final String file = "D:\\input.txt";
- public static void main(String[] args) throws IOException{
- BufferedReader br = new BufferedReader(new FileReader(file));
- String line;
- String input = "";
- while ((line = br.readLine())!=null){
- input += line + "\n";
- }
- //System.out.println(input);
- ArrayList<Token> tokens = lex(input);
- for (Token token : tokens)
- System.out.println(token);
- }
- public static enum TokenType{
- STRING("\".*?\""),
- CHAR("'.*?'"),
- COMMENT("\\/\\*[\\s\\S]*?\\*\\/"),
- NUMBER("-?[0-9]?[.]?[0-9]+"),
- KEYWORD("(?<!\\w)(?:auto|break|case|const|continue|default|do|else|extern|for|goto|if|register|return|sizeof|static|switch|typedef|void|volatile|while)(?!\\w)"),
- DIRECTIVE("#[^(\\n|#|\\/\\*)]+"),
- OPERATOR("[\\!<>\\+-/%&\\|\\^]=|\\*=|<<=|>>=|={1,2}|\\+{1,2}|-{1,2}|\\*|/|%|<{1,2}|>{1,2}|!|&{1,2}|\\|{1,2}|~|\\^"),
- TYPE("(?<!\\w)(?:(signed |unsigned )?char|short( int)?|(un)?signed short( int)?|(un)?signed int|long int||long long int|(un)?signed long long( int)?|(un)?signed long( int)?|float|(long )?double|int|(un)?signed|long long|long|enum|struct|union)(?!\\w)"),
- IDENTIFIER("\\w+"),
- PUNCTUATION("[.|,|;|:]"),
- BRACKET("[\\[|\\]|\\(|\\)|\\{|\\}]"),
- WHITESPACE("\\s+"),
- UNEXPECTED("\\S+");
- public final String pattern;
- private TokenType(String pattern) {
- this.pattern = pattern;
- }
- }
- public static class Token {
- public TokenType type;
- public String data;
- public Token(TokenType type, String data) {
- this.type = type;
- this.data = data;
- }
- @Override
- public String toString() {
- return String.format("(%s %s)", type.name(), data);
- }
- }
- public static ArrayList<Token> lex(String input) {
- ArrayList<Token> tokens = new ArrayList<Token>();
- StringBuffer tokenPatternsBuffer = new StringBuffer();
- for (TokenType tokenType : TokenType.values())
- tokenPatternsBuffer.append(String.format("|(?<%s>%s)", tokenType.name(), tokenType.pattern));
- Pattern tokenPatterns = Pattern.compile(new String(tokenPatternsBuffer.substring(1)));
- Matcher matcher = tokenPatterns.matcher(input);
- while (matcher.find()) {
- if (matcher.group(TokenType.STRING.name()) != null) {
- tokens.add(new Token(TokenType.STRING, matcher.group(TokenType.STRING.name())));
- continue;
- } else if (matcher.group(TokenType.COMMENT.name()) != null) {
- tokens.add(new Token(TokenType.COMMENT, matcher.group(TokenType.COMMENT.name())));
- continue;
- }else if (matcher.group(TokenType.CHAR.name()) != null) {
- tokens.add(new Token(TokenType.CHAR, matcher.group(TokenType.CHAR.name())));
- continue;
- }else if (matcher.group(TokenType.OPERATOR.name()) != null) {
- tokens.add(new Token(TokenType.OPERATOR, matcher.group(TokenType.OPERATOR.name())));
- continue;
- }else if (matcher.group(TokenType.TYPE.name()) != null) {
- tokens.add(new Token(TokenType.TYPE, matcher.group(TokenType.TYPE.name())));
- continue;
- }else if (matcher.group(TokenType.KEYWORD.name()) != null) {
- tokens.add(new Token(TokenType.KEYWORD, matcher.group(TokenType.KEYWORD.name())));
- continue;
- }else if (matcher.group(TokenType.NUMBER.name()) != null) {
- tokens.add(new Token(TokenType.NUMBER, matcher.group(TokenType.NUMBER.name())));
- continue;
- }else if (matcher.group(TokenType.IDENTIFIER.name()) != null) {
- tokens.add(new Token(TokenType.IDENTIFIER, matcher.group(TokenType.IDENTIFIER.name())));
- continue;
- }else if (matcher.group(TokenType.PUNCTUATION.name()) != null) {
- tokens.add(new Token(TokenType.PUNCTUATION, matcher.group(TokenType.PUNCTUATION.name())));
- continue;
- }else if (matcher.group(TokenType.BRACKET.name()) != null) {
- tokens.add(new Token(TokenType.BRACKET, matcher.group(TokenType.BRACKET.name())));
- continue;
- }else if (matcher.group(TokenType.UNEXPECTED.name()) != null) {
- tokens.add(new Token(TokenType.UNEXPECTED, matcher.group(TokenType.UNEXPECTED.name())));
- continue;
- }else if (matcher.group(TokenType.DIRECTIVE.name()) != null) {
- tokens.add(new Token(TokenType.DIRECTIVE, matcher.group(TokenType.DIRECTIVE.name())));
- continue;
- }else if (matcher.group(TokenType.WHITESPACE.name()) != null) {
- //tokens.add(new Token(TokenType.WHITESPACE, matcher.group(TokenType.WHITESPACE.name())));
- continue;
- }
- }
- return tokens;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement