Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- //list of keywords
- //does not include OPERATORS or CONSTANTS or fake keywords TO/STEP
- var KEYWORDS=[
- "BREAK","CALL","COMMON","CONTINUE","DATA","DEC","DEF","DIM","ELSE","ELSEIF","END","ENDIF","EXEC","FOR","GOSUB","GOTO","IF","INC",
- "INPUT","LINPUT","NEXT","ON","OUT","PRINT","READ","REM","REPEAT","RESTORE","RETURN","STOP","SWAP","THEN","UNTIL","USE","VAR","WEND",
- "WHILE"
- ];
- var T_BREAK=0,T_CALL=1,T_COMMON=2,T_CONTINUE=3,T_DATA=4,T_DEC=5,T_DEF=6,T_DIM=7,T_ELSE=8,T_ELSEIF=9,T_END=10,T_ENDIF=11,T_EXEC=12,T_FOR=13;
- var T_GOSUB=14,T_GOTO=15,T_IF=16,T_INC=17,T_INPUT=18,T_LINPUT=19,T_NEXT=20,T_ON=21,T_OUT=22,T_PRINT=23,T_READ=24,T_REM=25,T_REPEAT=26;
- var T_RESTORE=27,T_RETURN=28,T_STOP=29,T_SWAP=30,T_THEN=31,T_UNTIL=32,T_USE=33,T_VAR=34,T_WEND=35,T_WHILE=36;
- var T_UNARY=40,T_OPERATOR=41,T_NUMBER=42,T_WORD=43,T_ERROR=44,T_STRING=45,T_COMMENT=46,T_LABEL=47,T_LINEBREAK=47,T_TEXT=48,T_OPENPAREN=49;
- var T_CLOSEPAREN=50,T_OPENBRACKET=51,T_CLOSEBRACKET=52,T_COMMA=53,T_SEMICOLON=54,T_COLON=55,T_MINUS=56,T_EQUALS=57,T_EOF=58;
- //parser
- //nextToken: function that returns the next token
- //callback: output function
- function parse(nextToken,callback){
- //current token
- var type,text,word; //NOTE: word is only update right after next()ing. don't rely on it laaaaater
- //stored tokens
- var newType,newText;
- var oldType,oldText;
- //keep track of stored tokens
- var readNext=1;
- //inside def
- var inDef=false;
- while(1){ // <3
- try{
- //read a "line" of code
- next();
- switch(type){
- //keywords with no arguments
- case T_BREAK:case T_CONTINUE:case T_ELSE:case T_ENDIF:case T_STOP:case T_REPEAT:case T_WEND:
- output("keyword");
- //CALL
- break;case T_CALL:
- output("keyword");
- //CALL SPRITE and CALL BG
- if(peekWord("SPRITE")||peekWord("BG")){
- readToken("word","keyword");
- //regular CALL
- }else{
- readList(readExpression);
- if(readToken(T_OUT,"keyword"))
- readList(readVariable);
- }
- //COMMON
- break;case T_COMMON:
- output("keyword");
- assert(peekToken(T_DEF),"COMMON without DEF");
- //DATA
- break;case T_DATA:
- output("keyword");
- assert(readList(readExpression,true),"Missing list"); //should read constant expression
- //SWAP
- break;case T_SWAP:
- output("keyword");
- assert(readVariable(),"Missing variable in SWAP");
- assert(readToken(T_COMMA),"Missing comma in SWAP");
- assert(readVariable(),"Missing variable in SWAP");
- //READ
- break;case T_READ:
- output("keyword");
- assert(readList(readVariable,true),"Missing list");
- //DEC/INC
- break;case T_DEC:case T_INC:
- output("keyword");
- assert(readVariable(),"Missing INC/DEC variable");
- if(readToken(T_COMMA,""))
- assert(readExpression(),"Missing INC/DEC amount");
- //DEF
- break;case T_DEF:
- output("keyword");
- assert(!inDef,"Nested DEF");
- inDef=true;
- //read function name
- assert(readToken(T_WORD,"function"),"Missing DEF name");
- //() form
- if(readToken(T_OPENPAREN,"")){
- //read argument list
- readList(readArgument,true);
- //read )
- assert(readToken(T_CLOSEPAREN,""),"Missing \")\" after DEF arguments"
- );
- //subroutine/out form
- }else{
- //read argument list
- readList(readArgument,true);
- //OUT
- if(readToken(T_OUT,"keyword"))
- //read output list
- readList(readArgument,true);
- }
- //VAR
- break;case T_VAR:
- //assignment form
- if(peekToken(T_OPENPAREN)){
- output("keyword");
- readToken(T_OPENPAREN,"");
- assert(readExpression(),"Missing var name");
- assert(readToken(T_CLOSEPAREN,""),"missing )")
- while(readToken(T_OPENBRACKET,"")){
- assert(readList(readExpression,true),"Missing array index");
- assert(readToken(T_CLOSEBRACKET,""),"Missing \"]\"");
- }
- assert(readToken("=",""),"missing =");
- readExpression();
- //normal form
- }else{
- output("keyword");
- assert(readList(readDeclaration,true),"Missing variable list");
- }
- //DIM
- break;case T_DIM:
- output("keyword");
- assert(readList(readDeclaration,true),"Missing variable list");
- //IF, ELSEIF
- break;case T_ELSEIF:case T_IF:
- output("keyword");
- assert(readExpression(),"Missing IF condition");
- assert(readToken(T_THEN,"keyword")||readToken(T_GOTO,"keyword"),"IF without THEN");
- readToken(T_LABEL,"label");//optional
- //note: check if IF/GOTO supports expressions!
- //END
- break;case T_END:
- output("keyword");
- if(inDef)
- inDef=false;
- //EXEC/USE
- break;case T_EXEC:case T_USE:
- output("keyword");
- readExpression();
- //FOR
- break;case T_FOR:
- output("keyword");
- assert(readVariable(),"Missing FOR variable");
- assert(readToken(T_EQUALS,""),"Missing = in FOR");
- readExpression();
- assert(readToken(T_WORD) && word==="TO","Missing TO in FOR");
- output("keyword");
- readExpression();
- if(readToken(T_WORD) && word==="STEP"){
- output("keyword");
- readExpression();
- }else
- readNext=0; //heck
- //GOSUB GOTO RESTORE(?)
- break;case T_GOSUB:case T_GOTO:case T_RESTORE:
- output("keyword");
- if(!readToken(T_LABEL,"label"))
- assert(readExpression(),"Missing argument to keyword");
- //WHILE, UNTIL
- break;case T_UNTIL:case T_WHILE:
- output("keyword");
- assert(readExpression(),"Missing argument to keyword");
- //INPUT
- break;case T_INPUT:
- output("keyword");
- readPrintList(readExpression);
- //LINPUT
- break;case T_LINPUT:
- output("keyword");
- readPrintList(readExpression);
- //NEXT
- break;case T_NEXT:
- output("keyword");
- readExpression();
- //ON
- break;case T_ON:
- output("keyword");
- readExpression();
- assert(readToken(T_GOTO,"keyword")||readToken(T_GOSUB,"keyword"),"ON without GOTO/GOSUB");
- assert(readList(readExpression,true),"Missing label list after ON");
- //PRINT
- break;case T_PRINT:
- output("keyword");
- readPrintList(readExpression);
- //REM
- break;case T_REM:
- output("keyword");
- do{
- next();
- output("comment");
- }while(type!==T_LINEBREAK && type!==T_EOF);;;
- //RETURN
- break;case T_RETURN:
- output("keyword");
- if(inDef)
- readExpression();
- //OUT/THEN
- break;case T_OUT:case T_THEN:
- output("error");
- assert(false,"Illegal OUT/THEN");
- //other words
- break;case T_WORD:case T_OPENPAREN:
- //var name=text;
- readNext--;
- var oldWord=word; //this is, the variable name! :D
- switch(readVariable(true)){
- //variable assignment
- case true:
- assert(readToken(T_EQUALS,"equals"),"missing =");
- readExpression();
- //should not happen!
- break;case false:
- alert("what");
- //might be function or variable
- break;default:
- //is variable
- if(peekToken(T_EQUALS)){
- output("variable");
- readToken(T_EQUALS,"equals");
- readExpression();
- //function or option
- }else{
- //HORRIBLE STUPID FAKE KEYWORDS!!!
- //XON/XOFF/OPTION
- // I hate you! :(
- // not nice >:[
- //error highlighting might be a bit off here since it's not worth making the code 10x more complex
- // just to get slightly better coloring when your code is wrong.
- switch(oldWord){
- case "XON":
- output("keyword");
- //normal XON
- if(readToken(T_WORD)){
- output("keyword");
- assert(word==="MOTION"||word==="EXPAD"||word==="MIC"||word==="WIIU"||word=="COMPAT","invalid option");
- //XON 3DS
- }else{
- //note: XON 3DS is really bad
- //XON was dumb enough before, but now it has to support a weird syntax that
- //doesn't exist anywhere else (NUMBER TYPE FAKE KEYWORDS AAAAAAAA)
- assert(readToken(T_NUMBER,"keyword"),"invalid option");
- assert(text.trimLeft()==="3","invalid option");
- assert(readToken(T_WORD,"keyword"),"invalid option");
- assert(word==="DS","invalid option");
- }
- break;case "XOFF":
- output("keyword");
- assert(readToken(T_WORD,"keyword")&&(word==="MOTION"||word==="EXPAD"||word==="MIC"||word=="COMPAT"),"invalid option")
- break;case "OPTION":
- output("keyword");
- assert(readToken(T_WORD,"keyword"),"invalid option");
- assert(word==="STRICT"||word==="DEFINT"||word==="TOOL","invalid option");
- //return to sanity, normal function call!
- break;default:
- output("function");
- readList(readExpression);
- if(readToken(T_OUT,"keyword"))
- readList(readVariable); //------- (fence to stop people from falling off the indent cliff)
- }
- }
- }
- //label
- break;case T_LABEL:
- output("label");
- //comment
- break;case T_COMMENT:
- output("comment");
- //end
- break;case T_EOF:
- output("");
- return; //THE END
- //line break, colon
- break;case T_LINEBREAK:case T_COLON:
- output("");
- //other
- break;default:
- output("error");
- assert(false,"Expected statement");
- }
- }catch(error){
- //normal parsing error
- if(error.name==="ParseError"){
- //read until the end of the line
- while(1){
- next();
- if(type===T_LINEBREAK||type==T_EOF)
- break;
- output("error");
- }
- //show error message
- callback(error.message,"errormessage");
- output(""); //line break
- //bad error!!!
- }else{
- alert("real actual error!!! "+error);
- return;
- }
- }
- }
- //check if next token is of a specific type
- function peekToken(wantedType){
- var prevType=type,prevText=text;
- next();
- readNext=-1;
- newType=type;
- newText=text;
- type=prevType;
- text=prevText;
- return newType===wantedType;
- }
- //check if next token is of a specific type
- function peekWord(wantedWord){
- var prevType=type,prevText=text;
- next();
- readNext=-1;
- newType=type;
- newText=text;
- type=prevType;
- text=prevText;
- return newType===T_WORD && newText.trimLeft().toUpperCase()===wantedWord;
- }
- //Try to read a specific token
- function readToken(wantedType,outputType){
- next();
- if(type===wantedType){
- readNext=1;
- if(outputType!==undefined)
- output(outputType);
- return true;
- }
- readNext=0;
- return false;
- }
- //Read list
- //reader: function to read item (readExpression etc.)
- //noNull: throw an error if a null value is found
- function readList(reader,noNull){
- var ret=reader();
- if(readToken(T_COMMA,"")){
- assert(ret||!noNull,"Null value not allowed");
- ret=1;
- do{
- assert(reader()||!noNull,"Null value not allowed");
- }while(readToken(T_COMMA,""));;; //Lumage told me to
- }
- return ret;
- }
- //read list of PRINT arguments
- function readPrintList(reader){
- var ret=false;
- if(!reader())
- return;
- while((readToken(T_COMMA,"")||readToken(T_SEMICOLON,""))&&reader()); //nothing to see here
- }
- //read normal expression
- function readExpression(){
- next();
- switch(type){
- //VAR
- case T_VAR:
- readVar();
- //function or variable
- break;case T_WORD:
- if(peekToken(T_OPENPAREN)){
- output("function");
- readToken(T_OPENPAREN,"");
- readList(readExpression);
- assert(readToken(T_CLOSEPAREN,""),"Missing \")\" in function call");
- }else
- output("variable");
- //number
- break;case T_NUMBER:
- output("number");
- //string/labelstring
- break;case T_STRING:case T_LABEL:
- output("string");
- //operator (unary)
- break;case T_UNARY:case T_MINUS:
- output("operator");
- assert(readExpression(),"Missing operator argument");
- //open parenthesis
- break;case T_OPENPAREN:
- output("");
- readExpression();
- assert(readToken(T_CLOSEPAREN,""),"Missing \")\"");
- //other crap
- break;default:
- readNext=0;
- return false;
- }
- //read []s
- while(readToken(T_OPENBRACKET,"")){
- assert(readList(readExpression,true),"Missing array index");
- assert(readToken(T_CLOSEBRACKET,""),"Missing \"]\"");
- }
- //read infix operators
- while(readToken(T_OPERATOR,"operator")||readToken(T_MINUS,"operator"))
- assert(readExpression(),"Operator missing second argument");
- return true;
- }
- //read function definition argument
- function readArgument(){
- if(readToken(T_WORD,"variable")){
- if(readToken(T_OPENBRACKET,""))
- assert(readToken(T_CLOSEBRACKET,""),"Missing \"]\"");
- return true;
- }
- return false;
- }
- //read variable declaration
- function readDeclaration(){
- if(readToken(T_WORD,"variable")){
- if(readToken(T_OPENBRACKET,"")){
- assert(readList(readExpression,true),"Missing array size");
- assert(readToken(T_CLOSEBRACKET,""),"Missing \"]\"");
- }else if(readToken(T_EQUALS,""))
- readExpression();
- return true;
- }
- return false;
- }
- //read function VAR()
- function readVar(){
- //"function" form of VAR
- if(peekToken(T_OPENPAREN)){
- output("keyword");
- readToken(T_OPENPAREN,"");
- assert(readExpression(),"Missing VAR argument");
- assert(readToken(T_CLOSEPAREN,""),"Missing \")\" in VAR()");
- ret=true;
- //bad VAR
- }else{
- output("error");
- assert(false,"invalid VAR");
- }
- }
- //return values:
- // false - bad
- // true - definitely a variable
- // string - single word (could be function call or variable)
- // if true is passed to function, variable name will not be outputted when it might be a function call (for use in handling =)
- function readVariable(noPrintVarName){
- var ret=false;
- next();
- switch(type){
- case T_VAR:
- readVar();
- break;case T_WORD:
- if(!noPrintVarName){
- output("variable");
- ret=true;
- }else
- ret=text;
- break;case T_OPENPAREN:
- output("");
- assert(readVariable(),"missing variable");
- assert(readToken(T_CLOSEPAREN,""),"missing )");
- ret=true;
- break;default:
- readNext=0;
- return false;
- }
- if(peekToken(T_OPENBRACKET)){
- if(ret!==true && ret!==false)
- output("variable");
- while(readToken(T_OPENBRACKET,"")){
- assert(readList(readExpression,true),"Missing array index");
- assert(readToken(T_CLOSEBRACKET,""),"Missing \"]\"");
- }
- ret=true;
- }
- return ret;
- }
- //throw error with message if condition is false
- function assert(condition,message){
- if(!condition){
- console.log(message);
- var error=new Error(message);
- error.name="ParseError";
- throw error;
- }
- }
- function output(type){
- callback(text,type);
- }
- //I forgot how this works...
- function next(){
- if(readNext===1){
- var items=nextToken();
- type=items.type;
- text=items.text;
- word=items.word; //careful!
- }else if(readNext===-1){
- type=newType;
- text=newText;
- readNext=1;
- //I don't think this ever happens?
- }else if(readNext===-2)
- readNext=-1;
- else
- readNext=1;
- }
- }
- //convert code into tokens
- //in: (string) code - program to highlight
- //out: (function) nextToken - returns next token when called
- function tokenize(code){
- var i=-1,c,isAlpha,isDigit,textStart;
- function next(){
- i++;
- c=code.charAt(i);
- isAlpha=(c>='A'&&c<='Z'||c>='a'&&c<='z');
- isDigit=(c>='0'&&c<='9');
- }
- function jump(pos){
- i=pos-1;
- next();
- }
- var prev=0;
- function pushWord(){
- var start=prev;
- prev=i;
- var upper=code.substring(textStart,i).toUpperCase();
- var type;
- //bitwise not
- if(upper==="NOT")
- type=T_UNARY;
- //word operators
- else if(upper==="DIV"||upper==="MOD"||upper==="AND"||upper==="OR"||upper==="XOR")
- type=T_OPERATOR;
- //true/false
- else if(upper==="TRUE"||upper==="FALSE")
- type=T_NUMBER;
- //other keyword
- else{
- type=KEYWORDS.indexOf(upper);
- if(type===-1)
- type=T_WORD;
- }
- return {type:type,text:code.substring(start,i),word:upper};
- }
- function push(type){
- var start=prev;
- prev=i;
- return {type:type,text:code.substring(start,i)};
- }
- next();
- return function(){
- whitespace=0;
- while(c===" "||c==="\t")
- next();
- if(c==='')
- return push(T_EOF);
- textStart=i;
- //keywords, functions, variables
- if(isAlpha||c==='_'){
- next();
- while(isAlpha||isDigit||c==='_')
- next();
- if(c==='#'||c==='%'||c==='$'){
- next();
- return push(T_WORD);
- }
- return pushWord();
- //numbers
- }else if(isDigit||c==='.'){
- while(isDigit)
- next();
- if(c==='.'){
- next();
- if(isDigit){
- next();
- while(isDigit)
- next();
- }else{
- if(c==='#')
- next();
- return push(T_NUMBER);
- }
- }
- if(c==='E'||c==='e'){
- var ePos=i;
- next();
- if(c==='+'||c==='-')
- next();
- if(isDigit){
- next();
- while(isDigit)
- next();
- }else{
- jump(ePos);
- return push(T_ERROR);
- }
- }
- if(c==='#')
- next();
- return push(T_NUMBER);
- }else switch(c){
- //strings
- case '"':
- next();
- while(c && c!=='"' && c!=='\n' && c!=='\r')
- next();
- if(c==='"')
- next();
- return push(T_STRING);
- //comments
- break;case '\'':
- next();
- while(c && c!=='\n' && c!=='\r')
- next();
- return push(T_COMMENT);
- //logical AND, hexadecimal, binary
- break;case '&':
- next();
- switch(c){
- case '&':
- next();
- return push(T_OPERATOR);
- break;case 'H':case 'h':
- var hPos=i;
- next();
- if(isDigit||c>='A'&&c<='F'||c>='a'&&c<='f'){
- next();
- while(isDigit||c>='A'&&c<='F'||c>='a'&&c<='f')
- next();
- return push(T_NUMBER);
- }
- jump(hPos);
- return push(T_ERROR);
- break;case 'B':case 'b':
- var bPos=i;
- next();
- if(c==='0'||c==='1'){
- next();
- while(c==='0'||c==='1')
- next();
- return push(T_NUMBER);
- }
- jump(bPos);
- return push(T_ERROR);
- break;default:
- return push(T_ERROR);
- }
- //labels
- break;case '@':
- next();
- if(isDigit||isAlpha||c==="_"){
- next();
- while(isDigit||isAlpha||c==="_")
- next();
- return push(T_LABEL);
- }
- return push(T_ERROR);
- //constants
- break;case '#':
- next();
- if(isDigit||isAlpha){
- next();
- while(isDigit||isAlpha)
- next();
- return push(T_NUMBER);
- }
- return push(T_ERROR);
- //logical or
- break;case '|':
- next();
- if(c==='|'){
- next();
- return push(T_OPERATOR);
- }
- return push(T_ERROR);
- //less than, less than or equal, left shift
- break;case '<':
- next();
- if(c==='='||c==='<')
- next();
- return push(T_OPERATOR);
- //greater than, greater than or equal, right shift
- break;case '>':
- next();
- if(c==='='||c==='>')
- next();
- return push(T_OPERATOR);
- //equal, equal more
- break;case '=':
- next();
- if(c==='='){
- next();
- return push(T_OPERATOR);
- }
- return push(T_EQUALS);
- //logical not, not equal
- break;case '!':
- next();
- if(c==='='){
- next();
- return push(T_OPERATOR);
- }
- return push(T_UNARY);
- break;case '-':
- next();
- return push(T_MINUS);
- //add, subtract, multiply, divide
- break;case '+':case '*':case '/':
- next();
- return push(T_OPERATOR);
- //other
- break;case '\n':case '\r':
- next();
- return push(T_LINEBREAK);
- //characters
- break;case '(':
- next();
- return push(T_OPENPAREN);
- break;case ')':
- next();
- return push(T_CLOSEPAREN);
- break;case '[':
- next();
- return push(T_OPENBRACKET);
- break;case ']':
- next();
- return push(T_CLOSEBRACKET);
- break;case ',':
- next();
- return push(T_COMMA);
- break;case ';':
- next();
- return push(T_SEMICOLON);
- break;case ':':
- next();
- return push(T_COLON);
- //print shortcut
- break;case '?':
- next();
- return push(T_PRINT);
- //other
- break;default:
- next();
- return push(T_TEXT);
- }
- }
- }
- //"Example" usage: applying syntax highlighting to an html element.
- //Uses the type for the css class name.
- function applySyntaxHighlighting(element){
- var html="",prevType;
- //this is called for each highlightable token
- function callback(value,type){
- //only make a new span if the CSS class has changed
- if(type!==prevType){
- //close previous span
- if(prevType)
- html+="</span>";
- //open new span
- if(type)
- html+="<span class=\""+type+"\">";
- }
- html+=escapeHTML(value);
- prevType=type;
- }
- parse(tokenize(element.textContent),callback);
- //close last span
- if(prevType)
- html+="</span>";
- //set html
- element.innerHTML=html;
- }
- //escape & and <
- function escapeHTML(text){
- return text.replace(/&/g,"&").replace(/</g,"<");
- }
- //escape &, ", and '
- function escapeHTMLAttribute(text){
- return text.replace(/&/g,"&").replace(/"/g,""").replace(/'/g,"'");
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement