Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.util.ArrayList;
- import java.util.List;
- interface Token {}
- class T_Semicolon implements Token {} // represents ;
- class T_LeftBracket implements Token {} // represents (
- class T_RightBracket implements Token {} // represents )
- class T_EqualDefines implements Token {} // represents =
- class T_Equal implements Token {} // represents ==
- class T_LessThan implements Token {} // represents <
- class T_GreaterThan implements Token {} // represents >
- class T_LessEq implements Token {} // represents <=
- class T_GreaterEq implements Token {} // represents >=
- class T_Comma implements Token {} // represents ,
- class T_LeftCurlyBracket implements Token {} // represents {
- class T_RightCurlyBracket implements Token {} // represents }
- class T_Assign implements Token {} // represents :=
- class T_Plus implements Token {} // represents +
- class T_Times implements Token {} // represents *
- class T_Minus implements Token {} // represents -
- class T_Div implements Token {} // represents /
- class T_Identifier implements Token { // represents names like x, i, n, numberOfNodes ...
- public String s;
- public T_Identifier ( String _s ) { s = _s; } }
- class T_Integer implements Token { // represents non-negative numbers like 0, 1, 2, 3, ...
- public int n;
- public T_Integer ( int _n ) { n = _n; } }
- class T_Def implements Token {} // represents def
- class T_Skip implements Token {} // represents skip
- class T_If implements Token {} // represents if
- class T_Then implements Token {} // represents then
- class T_Else implements Token {} // represents else
- class T_While implements Token {} // represents while
- class T_Do implements Token {} // represents do
- class T_Repeat implements Token {} // represents repeat
- class T_Until implements Token {} // represents until
- class T_Break implements Token {} // represents break
- class T_Continue implements Token {} // represents continue
- // The next two token classes are auxiliary. They do NOT represent
- // language syntax. They may be used in lexer construction, but they
- // do not have to be used in the implementation. If you use them, make
- // sure they do not appear in the token list that your lexer returns.
- // DO NOT REMOVE THEIR CLASS DEFINITIONS FROM YOUR SUBMISSION EVEN IF
- // YOU DONT USE THEM.
- class T_EOF implements Token {}
- class T_Error implements Token {
- public String msg;
- public T_Error ( String _msg ) { msg = _msg; } }
- // Note that we are using one class per token here. This is good for
- // conceptual clarity, but objects instantiating the classes take up a
- // lot of memory in Java. For this reason, an industrial strength
- // lexer would probably take a different approach and represent token
- // by Enums.
- /**
- *
- * @author gs366
- */
- class LexicalException extends Exception {
- public String msg;
- public LexicalException ( String _msg ) { msg = _msg; } }
- class Task2Exception extends Exception {
- public String msg;
- public Task2Exception ( String _msg ) { msg = _msg; } }
- interface Lexer {
- public List<Token> lex ( String input ) throws LexicalException, Task2Exception; }
- class Task2other {
- public static Lexer create() {
- return new Lexer() {
- @Override
- public List<Token> lex(String input) throws LexicalException, Task2Exception {
- String digits = "[0-9]+"; //regular expression for digits
- String specials = ";()=<>,{}:+*-/"; //special notations
- String identifiers = "[a-z]\\w*"; //regular expression for identifiers
- String temp_token = "";
- List<Token> tokensList = new ArrayList<>(); //list of tokens
- ArrayList<String> token_input = new ArrayList<>(); //list of tokens in input
- ArrayList<Character> input_chars = new ArrayList<>(); //list of characters in the input
- for(char chars: input.toCharArray()) { //returns an Array of chars after converting a String into sequence of characters
- input_chars.add(chars);
- }
- try {
- //this analyses the input and then splits it into tokens
- for(char c: input_chars) {
- if(specials.indexOf(c) != -1) {
- if(c == '=' || c == '<' || c == '>' || c == ':') {
- switch (temp_token) {
- case "":
- temp_token = temp_token + c;
- break;
- case "=":
- case "<":
- case ">":
- case ":":
- temp_token = temp_token + c;
- token_input.add(temp_token);
- temp_token = "";
- break;
- default:
- token_input.add(temp_token);
- token_input.add(Character.toString(c));
- break;
- }
- } else {
- if(temp_token.equals("")) {
- token_input.add(Character.toString(c));
- } else {
- token_input.add(temp_token);
- temp_token = "";
- token_input.add(Character.toString(c));
- }
- }
- } else if(c == ' ') {
- if(!temp_token.equals("")) {
- token_input.add(temp_token);
- temp_token = "";
- }
- } else {
- if(c != ' ') {
- temp_token += c;
- }
- }
- }
- if(!temp_token.equals("")) {
- token_input.add(temp_token);
- }
- } catch(Exception e) {
- throw new Task2Exception("");
- }
- //for each token in the tokenised input create a token object of relevant class for the final list of tokens
- for(String token: token_input) {
- Token t = null;
- if(token.equals(";")) {
- t = new T_Semicolon();
- }
- else if(token.equals("(")) {
- t = new T_LeftBracket();
- }
- else if(token.equals(")")) {
- t = new T_RightBracket();
- }
- else if(token.equals("=")) {
- t = new T_EqualDefines();
- }
- else if(token.equals("==")) {
- t = new T_Equal();
- }
- else if(token.equals("<")) {
- t = new T_LessThan();
- }
- else if(token.equals(">")) {
- t = new T_GreaterThan();
- }
- else if(token.equals("<=")) {
- t = new T_LessEq();
- }
- else if(token.equals(">=")) {
- t = new T_GreaterEq();
- }
- else if(token.equals(",")) {
- t = new T_Comma();
- }
- else if(token.equals("{")) {
- t = new T_LeftCurlyBracket();
- }
- else if(token.equals("}")) {
- t = new T_RightCurlyBracket();
- }
- else if(token.equals(":=")) {
- t = new T_Assign();
- }
- else if(token.equals("+")) {
- t = new T_Plus();
- }
- else if(token.equals("*")) {
- t = new T_Times();
- }
- else if(token.equals("-")) {
- t = new T_Minus();
- }
- else if(token.equals("/")) {
- t = new T_Div();
- }
- else if(token.matches(digits)) {
- t = new T_Integer(Integer.parseInt(token));
- }
- else if(token.matches(identifiers)) {
- t = new T_Identifier(token);
- }
- else if(token.equals("def")) {
- t = new T_Def();
- }
- else if(token.equals("if")) {
- t = new T_If();
- }
- else if(token.equals("then")) {
- t = new T_Then();
- }
- else if(token.equals("else")) {
- t = new T_Else();
- }
- else if(token.equals("skip")) {
- t = new T_Skip();
- }
- else if(token.equals("while")) {
- t = new T_While();
- }
- else if(token.equals("do")) {
- t = new T_Do();
- }
- else if(token.equals("repeat")) {
- t = new T_Repeat();
- }
- else if(token.equals("until")) {
- t = new T_Until();
- }
- else if(token.equals("break")) {
- t = new T_Break();
- }
- else if(token.equals("continue")) {
- t = new T_Continue();
- }
- else {
- throw new LexicalException("");
- }
- tokensList.add(t);
- if(t instanceof T_Identifier || t instanceof T_Integer) {
- System.out.println(t.getClass().getSimpleName() + "(" + token + ")");
- }
- else {
- System.out.println(t.getClass().getSimpleName());
- }
- }
- return tokensList;
- }
- };
- }
- public static void main(String[] args) throws LexicalException, Task2Exception {
- String str = "g{(99;def)+}"; //pass
- String strTwo = "until def if then"; //pass
- String strThree = "g (;"; //pass
- String strFour = "g;{ def if ;";//only accepts the 'g'
- String strFive = "G"; // fails which it should
- String strSix = "gG_"; // pass
- create().lex(strFour);
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement