Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.FileNotFoundException;
- import java.io.IOException;
- import java.util.regex.*;
- enum Tokens {
- Keyword,
- Atom,
- Int,
- Hex,
- Double,
- Comment,
- String,
- Char,
- Directive,
- Operator,
- Punctuation,
- CONST_NAME,
- DIGIT,
- IDN,
- LITER_STR,
- PROC_NAME,
- FUNC_NAME,
- TYPE_NAME
- }
- class LexPascal{
- private String path = "";
- private static Map<Tokens , Pattern> regular = new HashMap<Tokens , Pattern>();
- private static Map<Tokens , Integer> priority = new HashMap<Tokens , Integer>();
- public LexPascal(String _path){
- path = _path.trim();
- InitRegex();
- InitPriority();
- }
- private void InitRegex(){
- regular.put(Tokens.IDN , Pattern.compile("^(_|[a-z])([a-z]|[0-9]|_)*"));
- regular.put(Tokens.Keyword , Pattern.compile("^(uses|program|var|const|type|begin|repeat|until|end|if|then|else|while|do|for|of|record|with|procedure|function|case|in|set|array|nil|true|false)"));
- regular.put(Tokens.DIGIT , Pattern.compile("^(0|[1-9]\\d*)"));
- regular.put(Tokens.LITER_STR, Pattern.compile("^'(?:[^']+|(''))*'$"));
- regular.put(Tokens.Comment, Pattern.compile("^(//(.*?)//|\\{(.*?)\\}|\\(\\*(.*?)\\*\\))"));
- regular.put(Tokens.Punctuation, Pattern.compile("^(\\(|\\)|;|,|\\.|:|\\[|\\]|\\^|\\.\\.)"));
- // regular.put(Tokens.Hex, Pattern.compile("^0[xX]((0|[1-9a-fA-F][\\da-fA-F]*))"));
- regular.put(Tokens.Operator, Pattern.compile("^\\+|\\-|\\*|/|:=|<>|=|>|<|>=|<=|!=|div|mod|and|not|~"));
- regular.put(Tokens.Double, Pattern.compile("^(((0|[1-9]\\d*)?\\.\\d+([eE][+-]?\\d+)?[FfDdMm]?)|((0|[1-9]\\d*)([eE][+-]?\\d+)[FfDdMm]?)|((0|[1-9]\\d*)[FfDdMm]))"));
- // regular.put(Tokens.Directive, Pattern.compile("^mp:.*"));
- regular.put(Tokens.TYPE_NAME , Pattern.compile("^(byte|integer|real|char|boolean|string)"));
- // regular.put(Tokens.PROC_NAME , Pattern.compile("^(randomize)"));
- }
- private void InitPriority(){
- priority.put(Tokens.Operator, 10);
- priority.put(Tokens.Punctuation, 2);
- priority.put(Tokens.Double, 3);
- priority.put(Tokens.DIGIT, 4);
- priority.put(Tokens.Hex, 4);
- priority.put(Tokens.IDN, 5);
- priority.put(Tokens.Keyword, 6);
- priority.put(Tokens.LITER_STR, 7);
- priority.put(Tokens.TYPE_NAME, 8);
- priority.put(Tokens.Comment, 9);
- }
- private String Prepare(String text){
- text = text.toLowerCase();
- String code = "";
- boolean ck = false;
- // text = text.replace(";",";;");
- text = text.replace("clrscr;","clrscr();");
- text = text.replace("writeln;","writeln();");
- text = text.replace("write;","write();");
- text = text.replace("readln;","readln();");
- text = text.replace("read;","read();");
- text = text.replace("randomize;","randomize();");
- text = text.replace("function","func");
- text = text.replace("procedure","proc");
- text = text.replace("true","1");
- text = text.replace("false","0");
- for(int i = 0 ; i < text.length() ; ++i){
- char ch = text.charAt(i);
- String small = "йцукенгшщзхъфывапролджэёячсмитьбю";
- String big = "ЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЁЯЧСМИТЬБЮ";
- if(small.indexOf(ch) != -1 && !ck){
- code += 'a';
- ck = true;
- }else if(big.indexOf(ch) != -1 && !ck){
- code += 'A';
- ck = true;
- }else if(small.indexOf(ch) == -1 && big.indexOf(ch) == -1){
- code += ch;
- ck = false;
- }
- }
- String temp = "";
- boolean add = false;
- int st = 0;
- for(int i = 0 ; i < code.length() ; ++i){
- char ch = code.charAt(i);
- if(st == 0 && ch == '\''){
- add = false;
- temp += ch;
- st = 1;
- continue;
- }
- if(st == 1 && ch == '\''){
- st = 0;
- temp += ch;
- continue;
- }
- if(st == 1 && ch == '\\'){
- st = 2;
- continue;
- }
- if(st == 2){
- st = 1;
- continue;
- }
- if(st == 1) {
- if(add == false){
- temp += ch;
- add = true;
- }
- continue;
- }
- temp += ch;
- }
- return temp.toLowerCase();
- }
- public ArrayList<Map<Tokens , String>> Tokenize() throws Exception{
- int wasbegin = 0;
- Set<String> consts = new HashSet<>();
- String last_key = "";
- File file = new File(path);
- String text = "";
- try {
- FileInputStream fis = new FileInputStream(new File(path));
- byte[] data = new byte[(int) file.length()];
- fis.read(data);
- fis.close();
- text = new String(data, "UTF-8");
- }catch(FileNotFoundException e){
- System.out.print(path + " File not found");
- }catch(IOException e) {
- e.printStackTrace();
- }
- ArrayList<Map<Tokens , String>> ans = new ArrayList<>();
- String code = Prepare(text);
- // System.out.println(code);
- while(code.length() > 0 && (code.charAt(0) == ' ' || code.charAt(0) == '\n' || code.charAt(0) == '\t'))code = code.substring(1);
- while(code.length() > 0){
- Tokens tokType = Tokens.Comment;
- int len = -1;
- for(Tokens tok : regular.keySet()){
- Pattern trg = regular.get(tok);
- String tmp = code;
- for(int j = code.length() ; j > 0 ; --j){
- Matcher tmat = trg.matcher(tmp);
- if(tmat.matches()){
- if(j > len || (j == len && priority.get(tok) > priority.get(tokType))){
- len = j;
- tokType = tok;
- break;
- }
- }
- tmp = tmp.substring(0 , tmp.length() - 1);
- }
- Matcher tmat = trg.matcher(code);
- MatchResult res1 = tmat.toMatchResult();
- try{
- System.out.println(res1.end());
- }catch(java.lang.IllegalStateException e){}
- if(tmat.matches()){
- MatchResult res = tmat.toMatchResult();
- if(res.start() == 0){
- if(res.end() > len){
- len = res.end();
- tokType = tok;
- }
- }
- }
- }
- if(len <= 0){
- throw new Exception("Unknown lexeme " + code);
- }
- if(tokType == Tokens.IDN){
- if(ans.size() > 0) {
- for (Tokens tk : ans.get(ans.size() - 1).keySet()) {
- if (tk == Tokens.Keyword && (ans.get(ans.size() - 1).get(tk).equals("procedure")
- || ans.get(ans.size() - 1).get(tk).equals("function"))) {
- tokType = ans.get(ans.size() - 1).get(tk).equals("function") ? Tokens.FUNC_NAME : Tokens.PROC_NAME;
- }
- }
- }
- }
- /* if(tokType == Tokens.Keyword && code.substring(0 , 2).equals("if")){
- ++opif;
- }
- if(opif > 0 && tokType == Tokens.Keyword && code.substring(0 , 4).equals("then")){
- ++opth;
- }*/
- if(wasbegin == 0 && tokType == Tokens.Keyword && code.substring(0 , 5).equals("begin")){
- ++wasbegin;
- }
- if(tokType == Tokens.Double){
- tokType = Tokens.DIGIT;
- }
- if(wasbegin > 0 && tokType == Tokens.Punctuation && code.substring(0 , 1).equals("(")){
- if(ans.size() > 1){
- for (Tokens tk : ans.get(ans.size() - 1).keySet()) {
- if(tk == Tokens.IDN){
- boolean ck = false;
- String str = "";
- str = ans.get(ans.size() - 1).get(tk);
- for(Tokens tk2 : ans.get(ans.size() - 2).keySet()){
- if(tk2 == Tokens.Operator || (tk2 == Tokens.Punctuation && (",(").contains(ans.get(ans.size() - 2).get(tk2)))){
- ck = true;//function
- }
- }
- // System.out.println(ck);
- ans.get(ans.size() - 1).clear();
- if(ck){
- ans.get(ans.size() - 1).put(Tokens.FUNC_NAME , str);
- }else{
- ans.get(ans.size() - 1).put(Tokens.PROC_NAME , str);
- }
- }
- }
- }
- }
- if(tokType == Tokens.Keyword){
- last_key = code.substring(0 , len);
- }
- if(last_key.equals("const") && tokType == Tokens.IDN){
- consts.add(code.substring(0 , len));
- // tokType = Tokens.CONST_NAME;
- }
- if(!last_key.equals("const") && tokType == Tokens.IDN && consts.contains(code.substring(0 , len))){
- tokType = Tokens.CONST_NAME;
- }
- Map<Tokens , String> curans = new HashMap<>();
- curans.put(tokType , code.substring(0 , len));
- // System.out.println(curans);
- // if(tokType != Tokens.Comment)
- int repeat = 1;
- if(wasbegin > 0 && tokType == Tokens.Punctuation && code.substring(0 , len).equals(";")){
- ++repeat;
- }
- for(int kk = 0 ; kk < repeat ; ++kk)
- ans.add(curans);
- // System.out.println(code.substring(0 , len) + " " + tokType.toString());
- code = code.substring(len);
- code = code.trim();
- }
- int sz = ans.size() - 1;
- int st = 0;
- lab:
- while(true){
- for(Tokens tk : ans.get(sz).keySet()){
- if(tk == Tokens.Comment) {
- --sz;
- continue lab;
- }
- }
- for(Tokens tk : ans.get(sz).keySet()) {
- if (st == 0 && tk == Tokens.Punctuation && ans.get(sz).get(tk).equals(".")) {
- st = 1;
- --sz;
- continue lab;
- }
- if (st == 1 && tk == Tokens.Keyword && ans.get(sz).get(tk).equals("end")) {
- st = 2;
- --sz;
- continue lab;
- }
- if (st == 2 && (tk != Tokens.Punctuation || !ans.get(sz).get(tk).equals(";"))) {
- Map<Tokens, String> curans = new HashMap<>();
- curans.put(Tokens.Punctuation, ";");
- ans.add(sz + 1, curans);ans.add(sz + 1, curans);
- }
- return ans;
- }
- }
- // return ans;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement