Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /////////////////////////// Tokens.java
- package LispRegexLab;
- public enum Tokens {
- Keyword,
- Atom,
- Int,
- Hex,
- Double,
- Comment,
- String,
- Char,
- Directive,
- Operator,
- Punctuation
- }
- /////////////////////////// Main.java
- package LispRegexLab;
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.FileNotFoundException;
- import java.io.IOException;
- import java.util.*;
- import java.util.regex.*;
- public class Main {
- static Map<Tokens , Pattern> regular = new HashMap<Tokens , Pattern>();
- static Map<Tokens , Integer> priority = new HashMap<Tokens , Integer>();
- static void InitRegex(){
- regular.put(Tokens.Atom , Pattern.compile("^(_|[a-z])([a-z]|[0-9]|-)*"));
- regular.put(Tokens.Keyword , Pattern.compile("^(eq|car|cdr|defun|defvar|setq|and|not|or|xor|atom|equal|nil|t|defmacro|cond|if|when|case|loop|do|return|let|list|do|length|setf|format|terpri|dotimes|write-char)"));
- regular.put(Tokens.Int , Pattern.compile("^(0|[1-9]\\d*)"));
- regular.put(Tokens.String, Pattern.compile("^((\"\"([^\\\"\"\\n\\r\\\\]*|\\\\[\\\\0abfnrtuUxv]|\\\\x[\\da-fA-F]{1,4}|\\\\u[\\da-fA-F]{4}|\\\\U[\\da-fA-F]{8})*\"\")|(@\"\"([^\\\"\"]|\"\"\"\")*\"\"))"));
- regular.put(Tokens.Comment, Pattern.compile("^(;+[^\n\r]*)"));
- regular.put(Tokens.Punctuation, Pattern.compile("^[():]"));
- regular.put(Tokens.Hex, Pattern.compile("^0[xX]((0|[1-9a-fA-F][\\da-fA-F]*))"));
- regular.put(Tokens.Operator, Pattern.compile("^\\+ |\\- |\\* |mod |rem |incf |decf |= |> |< |>= |<= |max |min "));
- regular.put(Tokens.Double, Pattern.compile("^(((0|[1-9]\\d*)?\\.\\d+([eE][+-]?\\d+)?[FfDdMm]?)|((0|[1-9]\\d*)([eE][+-]?\\d+)[FfDdMm]?)|((0|[1-9]\\d*)[FfDdMm]))"));
- regular.put(Tokens.Directive, Pattern.compile("^mp:.*"));
- regular.put(Tokens.Char , Pattern.compile("^'[a-z]*"));
- }
- static void InitPriority(){
- priority.put(Tokens.Operator, 1);
- priority.put(Tokens.Punctuation, 2);
- priority.put(Tokens.Double, 3);
- priority.put(Tokens.Int, 4);
- priority.put(Tokens.Hex, 4);
- priority.put(Tokens.Atom, 5);
- priority.put(Tokens.Keyword, 6);
- priority.put(Tokens.String, 7);
- priority.put(Tokens.Char, 7);
- priority.put(Tokens.Directive, 8);
- priority.put(Tokens.Comment, 9);
- }
- static void Tokenize(String text){
- String code = text;
- while(code.length() > 0 && (code.charAt(0) == ' ' || code.charAt(0) == '\n' || code.charAt(0) == '\t'))code = code.substring(1);
- while(code.length() > 0){
- Tokens tokType = Tokens.Comment;
- int len = -1;
- for(Tokens tok : regular.keySet()){
- Pattern trg = regular.get(tok);
- int t = 1 , tmax = 0;
- Matcher tmat = trg.matcher(code.substring(0, t));
- while(t <= code.length()){
- Matcher m = trg.matcher(code.substring(0, t));
- if(m.matches() == false){
- if(t > 20)break;
- ++t;
- continue;
- }
- tmat = m;
- tmax = t;
- ++t;
- if(t == code.length()){
- break;
- }
- }
- if(tmat.matches() == false)continue;
- int tlen = tmax;
- if(tlen > len || (tlen == len && priority.get(tokType) < priority.get(tok))){
- tokType = tok;
- len = tlen;
- }
- }
- if(len <= 0){
- System.out.println("Unknown lexeme " + code);
- return;
- }
- System.out.println(code.substring(0 , len) + " " + tokType.toString());
- code = code.substring(len);
- code = code.trim();
- }
- }
- public static void main(String[] args) {
- InitRegex();
- InitPriority();
- File file = new File("input.txt");
- try {
- FileInputStream fis = new FileInputStream(file);
- byte[] data = new byte[(int) file.length()];
- fis.read(data);
- fis.close();
- fis.close();
- String str = new String(data, "UTF-8");
- Tokenize(str);
- }catch(FileNotFoundException e){
- System.out.print("File not found");
- }catch(IOException e) {
- e.printStackTrace();
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement