Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // javac scan.java // compile
- // java scan example1.x // run
- import java.io.*; // nice way to import everything
- public class scan // needs to match the file name as in scan.java
- {
- public static void main(String[] args) throws IOException
- {
- token[] identified = new token[999]; // array of identified tokens
- int line = 1; // line number
- int pos = 0; // position on line
- char sym = 0; // symbol as a part of token
- String[] tokens = new String[999]; // array of all tokens
- tokens[0] = "";
- int tokenNum = 0; // number of tokens
- FileReader scanner = new FileReader(args[0]); // Java thing used for input
- while(sym != (char) -1) // scanner.read returns -1 when reaches eof, thus this will read till eof
- {
- token curr = new token();
- pos++;
- sym = (char) scanner.read(); // reads a char, returns it as an int. Thus has to be casted as char
- if(tokenNum > 1 && tokens[tokenNum - 2] == "") // these two blocks are used to delete "empty" tokens caused by spaces, tabs and \n
- {
- tokens[tokenNum - 2] = tokens[tokenNum - 1];
- tokens[tokenNum - 1] = "";
- tokenNum--;
- }
- if(tokenNum > 0 && tokens[tokenNum - 1] == "")
- {
- tokens[tokenNum - 1] = "";
- tokenNum--;
- }
- if(Character.isLetterOrDigit(sym) || sym == '_') { // if token is an Identifier
- tokens[tokenNum] += sym;
- pos++;
- }
- else if (sym == '(' || // if token is an operator
- sym == ')' ||
- sym == '{' ||
- sym == '}' ||
- sym == '+' ||
- sym == '-' ||
- sym == '*' ||
- sym == '/' ||
- sym == '<' ||
- sym == '>' ||
- sym == ';')
- {
- curr.value = tokens[tokenNum]; // takes the current string and store it in curr.value
- curr.pos = pos - tokens[tokenNum].length(); // gets the position of where the current token started
- curr.line = line; // line number
- identified[tokenNum] = curr;
- pos++;
- tokenNum++; // has to be a new token
- tokens[tokenNum] = "";
- tokens[tokenNum] += sym; // stores operator as a token
- curr.value = tokens[tokenNum];
- curr.pos = pos;
- curr.line = line;
- identified[tokenNum] = curr;
- tokenNum++; // all operators are 1 symbol, thus next symbol will be in a different token
- tokens[tokenNum] = "";
- }
- else if (sym == '\n')
- {
- curr.value = tokens[tokenNum];
- curr.pos = pos - tokens[tokenNum].length();
- curr.line = line;
- identified[tokenNum] = curr;
- pos = 0; // reset column position because its a new line
- line++; // increment line count
- tokenNum++;
- tokens[tokenNum] = "";
- } else if (sym == ' ') {
- curr.value = tokens[tokenNum];
- curr.pos = pos - tokens[tokenNum].length();
- curr.line = line;
- identified[tokenNum] = curr;
- pos++; //inc position but not line count
- tokenNum++;
- tokens[tokenNum] = "";
- } else if (sym == '\t') {
- curr.value = tokens[tokenNum];
- curr.pos = pos - tokens[tokenNum].length();
- curr.line = line;
- identified[tokenNum] = curr;
- pos += 2; //increment position count by 2
- tokenNum++;
- tokens[tokenNum] = "";
- }
- } // while
- for(int i = 0; i <= tokenNum; i++) // prints all tokens
- {
- System.out.println(tokens[i]);
- System.out.println(identified[i].value);
- }
- } // main
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement