Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- "IsLetter", FACharacterClasses.isLetter);
- result.set("IsDigit", FACharacterClasses.isDigit);
- result.set("IsLetterOrDigit", FACharacterClasses.isLetterOrDigit);
- result.set("IsWhiteSpace", FACharacterClasses.isWhiteSpace);
- result.set("alnum", FACharacterClasses.alnum);
- result.set("alpha", FACharacterClasses.alpha);
- result.set("cntrl", FACharacterClasses.cntrl);
- result.set("digit", FACharacterClasses.digit);
- result.set("graph", FACharacterClasses.graph);
- result.set("ascii", FACharacterClasses.ascii);
- result.set("blank", FACharacterClasses.blank);
- result.set("lower", FACharacterClasses.lower);
- result.set("print", FACharacterClasses.print);
- result.set("punct", FACharacterClasses.punct);
- result.set("space", FACharacterClasses.space);
- result.set("upper", FACharacterClasses.upper);
- result.set("word", FACharacterClasses.word);
- result.set("xdigit", FACharacterClasses.xdigit);
- FACharacterClasses._known = result;
- }
- return FACharacterClasses._known;
- }
- };
- export class FAMatch {
- public symbolId: number;
- public value: string;
- public position: number;
- public line: number;
- public column: number;
- constructor(symbolId:number=-1,value:string=null,position:number=0,line:number=1,column:number=1) {
- this.symbolId = symbolId;
- this.value = value;
- this.position = position;
- this.line = line;
- this.column = column;
- }
- public toString() {
- let sb: string = "";
- sb+="[SymbolId: ";
- sb+=this.symbolId;
- sb+=", Value: ";
- if (this.value !== null && this.value!==undefined)
- {
- sb+="\"";
- sb+=this.value.replace("\r", "\\r").replace("\t", "\\t").replace("\n", "\\n").replace("\v", "\\v");
- sb+="\", Position: ";
- }
- else
- {
- sb+="null, Position: ";
- }
- sb+=this.position;
- sb+=" (";
- sb+=this.line;
- sb+=", ";
- sb+=this.column;
- sb+=")]";
- return sb;
- }
- };
- export abstract class FARunner implements Iterable<FAMatch> {
- protected tabWidth: number = 4;
- protected position: number = -1;
- protected line: number = 1;
- protected column: number = 1;
- public abstract nextMatch() : FAMatch;
- public abstract reset() : void;
- public *[Symbol.iterator](): IterableIterator<FAMatch> {
- let match: FAMatch = new FAMatch();
- match=this.nextMatch();
- while (match.symbolId!==-2) {
- yield match;
- match = this.nextMatch();
- }
- }
- };
- export abstract class FAStringRunner extends FARunner {
- protected input_string: string;
- public set(str: string): void {
- this.input_string = str;
- this.position = -1;
- this.line = 1;
- this.column = 1;
- }
- public override reset() : void {
- this.position = -1;
- this.line = 1;
- this.column = 1;
- }
- protected advance(s: string, cp: number, len: number, first: boolean) : [number, number] {
- if(!first) {
- switch(String.fromCodePoint(cp)) {
- case '\n':
- ++this.line;
- this.column = 1;
- break;
- case '\r':
- this.column = 1;
- break;
- case '\t':
- this.column = ((this.column-1)/this.tabWidth)*(this.tabWidth+1);
- break;
- default:
- if(cp>31) {
- ++this.column;
- }
- break;
- }
- ++len;
- if(cp>65535) {
- ++this.position;
- ++len;
- }
- ++this.position;
- }
- if(this.position<s.length) {
- cp = s.codePointAt(this.position)
- } else {
- cp = -1;
- }
- return [cp, len];
- }
- };
- export class FAStringStateRunner extends FAStringRunner
- {
- readonly _fa: FA;
- readonly _blockEnds: FA[];
- readonly _states: FA[];
- readonly _nexts: FA[];
- readonly _initial: FA[];
- constructor(fa: FA, blockEnds: FA[] = null)
- {
- super();
- if (null === fa || undefined===fa)
- {
- throw "fa was not valid";
- }
- this._fa = fa;
- this._blockEnds = blockEnds;
- this._states = new Array<FA>();
- this._nexts = new Array<FA>();
- this._initial = new Array<FA>();
- }
- public override nextMatch() : FAMatch
- {
- return this._nextImpl(this.input_string);
- }
- private _nextImpl(
- s: string
- ): FAMatch
- {
- let dfaState: FA = null, dfaNext: FA = null, dfaInitial: FA = null;
- if (this.position == -1)
- {
- // first read
- ++this.position;
- }
- let len:number = 0;
- let cursor_pos: number = this.position;
- let line: number = this.line;
- let column: number = this.column;
- let ch: number = -1;
- let advr= this.advance(s, ch, len, true);ch = advr[0];len = advr[1];
- if (this._fa.isDeterministic)
- {
- dfaState = this._fa;
- dfaInitial = this._fa;
- }
- else
- {
- this._initial.length = 0;
- if (this._fa.isCompact)
- {
- this._initial.push(this._fa);
- }
- else
- {
- this._fa.fillEpsilonClosure(this._initial);
- }
- this._states.length = 0;
- this._states.push(...this._initial);
- }
- while (true)
- {
- if (dfaState != null)
- {
- dfaNext = dfaState.move(ch);
- }
- else
- {
- dfaNext = null;
- this._nexts.length = 0;
- FA.fillMove(this._states, ch, this._nexts);
- }
- if (dfaNext != null)
- {
- advr= this.advance(s, ch, len, false);ch = advr[0];len = advr[1];
- if (dfaNext.isDeterministic)
- {
- dfaState = dfaNext;
- }
- else
- {
- this._states.length = 0;
- if (dfaNext.isCompact)
- {
- this._states.push(dfaNext);
- }
- else
- {
- dfaNext.fillEpsilonClosure(this._states);
- }
- dfaState = null;
- }
- dfaNext = null;
- }
- else if (this._nexts.length > 0)
- {
- advr= this.advance(s, ch, len, false);ch = advr[0];len = advr[1];
- if (this._nexts.length === 1)
- {
- var ffa = this._nexts[0];
- // switch to deterministic mode if needed
- if (ffa.isDeterministic)
- {
- dfaState = ffa;
- }
- else
- {
- dfaNext = null;
- this._states.length = 0;
- if (!ffa.isCompact)
- {
- ffa.fillEpsilonClosure(this._states);
- }
- else
- {
- this._states.push(ffa);
- }
- dfaState = null;
- }
- }
- else
- {
- this._states.length = 0;
- FA.fillEpsilonClosureStates(this._nexts, this._states);
- }
- this._nexts.length = 0;
- }
- else
- {
- let acc: number;
- if (dfaState !== null)
- {
- acc = dfaState.acceptSymbol;
- }
- else
- {
- acc = FA.getFirstAcceptSymbol(this._states);
- }
- if (acc > -1)
- {
- if (this._blockEnds != null && this._blockEnds.length > acc && this._blockEnds[acc] !== null)
- {
- var be = this._blockEnds[acc];
- if (be.isDeterministic)
- {
- dfaState = be;
- dfaInitial = be;
- this._states.length = 0;
- }
- else
- {
- dfaState = null;
- dfaInitial = null;
- this._initial.length = 0;
- if (be.isCompact)
- {
- this._initial.push(be);
- }
- else
- {
- be.fillEpsilonClosure(this._initial);
- }
- this._states.length=0;
- this._states.push(...this._initial);
- }
- while (true)
- {
- if (dfaState !== null)
- {
- dfaNext = dfaState.move(ch);
- }
- else
- {
- dfaNext = null;
- this._nexts.length = 0;
- FA.fillMove(this._states, ch, this._nexts);
- }
- if (dfaNext !== null)
- {
- advr= this.advance(s, ch, len, false);ch = advr[0];len = advr[1];
- if (dfaNext.isDeterministic)
- {
- dfaState = dfaNext;
- }
- else
- {
- this._states.length = 0;
- if (dfaNext.isCompact)
- {
- this._states.push(dfaNext);
- }
- else
- {
- dfaNext.fillEpsilonClosure(this._states);
- }
- dfaState = null;
- }
- dfaNext = null;
- }
- else if (this._nexts.length > 0)
- {
- advr= this.advance(s, ch, len, false);ch = advr[0];len = advr[1];
- if (this._nexts.length == 1)
- {
- const ffa: FA = this._nexts[0];
- // switch to deterministic mode if needed
- if (ffa.isDeterministic)
- {
- dfaState = ffa;
- this._states.length = 0;
- }
- else
- {
- dfaNext = null;
- this._states.length = 0;
- if (!ffa.isCompact)
- {
- ffa.fillEpsilonClosure(this._states);
- }
- else
- {
- this._states.push(ffa);
- }
- dfaState = null;
- }
- }
- else
- {
- this._states.length = 0;
- FA.fillEpsilonClosureStates(this._nexts, this._states);
- }
- this._nexts.length=0;
- }
- else
- {
- if (dfaState !== null)
- {
- if (dfaState.isAccepting())
- {
- return new FAMatch(acc,
- s.substring(cursor_pos, cursor_pos+len)
- , cursor_pos, line, column);
- }
- }
- else
- {
- if (-1 < FA.getFirstAcceptSymbol(this._states))
- {
- return new FAMatch(acc,
- s.substring(cursor_pos,cursor_pos+ len)
- , cursor_pos, line, column);
- }
- }
- advr= this.advance(s, ch, len, false);ch = advr[0];len = advr[1];
- if (dfaInitial !== null)
- {
- this._states.length = 0;
- dfaState = dfaInitial;
- }
- else
- {
- dfaState = null;
- this._states.length = 0;
- this._states.push(...this._initial);
- }
- if (ch == -1)
- {
- return new FAMatch(-1,
- s.substring(cursor_pos, cursor_pos+len)
- , cursor_pos, line, column);
- }
- }
- }
- }
- else
- {
- return new FAMatch(acc,
- s.substring(cursor_pos, cursor_pos+len)
- , cursor_pos, line, column);
- }
- }
- else
- {
- if (dfaInitial !== null)
- {
- while (ch != -1 && dfaInitial.move(ch) === null)
- {
- advr= this.advance(s, ch, len, false);ch = advr[0];len = advr[1];
- }
- }
- else
- {
- this._states.length = 0;
- while (ch != -1 && FA.fillMove(this._initial, ch, this._states).length === 0)
- {
- advr= this.advance(s, ch, len, false);ch = advr[0];len = advr[1];
- }
- }
- if (len == 0)
- {
- return new FAMatch(-2, null, 0, 0, 0);
- }
- return new FAMatch(-1,
- s.substring(cursor_pos, cursor_pos+len)
- , cursor_pos, line, column);
- }
- }
- }
- }
- }
- export class FAStringDfaTableRunner extends FAStringRunner {
- private readonly _dfa : number[];
- private readonly _blockEnds: number[][];
- constructor(dfa: number[],blockEnds:number[][] = null) {
- super();
- this._dfa = dfa;
- this._blockEnds = blockEnds;
- }
- public nextMatch(): FAMatch {
- return this._nextImpl(this.input_string);
- }
- private _nextImpl(s: string) : FAMatch {
- let tlen: number;
- let tto: number;
- let prlen: number;
- let pmin: number;
- let pmax: number;
- let i: number;
- let j: number;
- let state: number = 0;
- let acc: number;
- if (this.position === -1)
- {
- // first read
- ++this.position;
- }
- let len: number = 0;
- let cursor_pos: number = this.position;
- let line: number = this.line;
- let column: number = this.column;
- let ch: number = -1;
- let cont: boolean = false;
- let advr= this.advance(s, ch, len, true);ch = advr[0];len = advr[1];
- while(true) {
- // accept symbol
- acc = this._dfa[state];
- ++state;
- // transitions count
- tlen = this._dfa[state];
- ++state;
- for (let i: number = 0; i < tlen; ++i)
- {
- // destination state
- tto = this._dfa[state];
- ++state;
- // number of ranges to check
- prlen = this._dfa[state];
- ++state;
- for (let j:number = 0; j < prlen; ++j)
- {
- pmin = this._dfa[state];
- ++state;
- pmax = this._dfa[state];
- ++state;
- if (ch < pmin)
- {
- // early out
- state += ((prlen - (j + 1)) * 2);
- j = prlen;
- }
- else if (ch <= pmax)
- {
- advr= this.advance(s, ch, len, false);ch = advr[0];len = advr[1];
- state = tto;
- cont = true;
- i=tlen;
- break;
- }
- }
- }
- if(cont===true) {
- cont = false;
- continue;
- }
- break;
- }
- if (acc !== -1)
- {
- const sym: number = acc;
- const be: number[] = (this._blockEnds !== null && this._blockEnds.length > acc) ? this._blockEnds[acc] : null;
- if (be !== null)
- {
- state = 0;
- while(true) {
- acc = be[state];
- ++state;
- tlen = be[state];
- ++state;
- for (let i: number = 0; i < tlen; ++i)
- {
- tto = be[state];
- ++state;
- prlen = be[state];
- ++state;
- for (let j: number = 0; j < prlen; ++j)
- {
- pmin = be[state];
- ++state;
- pmax = be[state];
- ++state;
- if (ch < pmin)
- {
- state += ((prlen - (j + 1)) * 2);
- j = prlen;
- }
- else if (ch <= pmax)
- {
- advr= this.advance(s, ch, len, false);ch = advr[0];len = advr[1];
- state = tto;
- i=tlen;
- break;
- }
- }
- }
- if (acc !== -1)
- {
- return new FAMatch(sym,
- s.substring(cursor_pos,cursor_pos+len)
- , cursor_pos, line, column);
- }
- if (ch === -1)
- {
- return new FAMatch(-1,
- s.substring(cursor_pos, cursor_pos+len)
- , cursor_pos, line, column);
- }
- advr= this.advance(s, ch, len, false);ch = advr[0];len = advr[1];
- state = 0;
- // restart the state machine
- }
- }
- return new FAMatch(acc,
- s.substring(cursor_pos, cursor_pos+len)
- , cursor_pos, line, column);
- }
- // error. keep trying until we find a potential transition.
- while (ch !== -1)
- {
- var moved = false;
- state = 1;
- tlen = this._dfa[state];
- ++state;
- for (let i: number = 0; i < tlen; ++i)
- {
- ++state;
- prlen = this._dfa[state];
- ++state;
- for (let j: number = 0; j < prlen; ++j)
- {
- pmin = this._dfa[state];
- ++state;
- pmax = this._dfa[state];
- ++state;
- if (ch < pmin)
- {
- state += ((prlen - (j + 1)) * 2);
- j = prlen;
- }
- else if (ch <= pmax)
- {
- moved = true;
- }
- }
- }
- if (moved)
- {
- break;
- }
- advr= this.advance(s, ch, len, false);ch = advr[0];len = advr[1];
- }
- if (len === 0)
- {
- return new FAMatch(-2, null, 0, 0, 0);
- }
- return new FAMatch(-1,
- s.substring(cursor_pos, cursor_pos+len)
- , cursor_pos, line, column);
- }
- };
- class _ParseContext {
- public input: string;
- public codepoint: number = -2;
- public position: number = 0;
- public line: number = 1;
- public column: number = 0;
- public tabWidth: number = 4;
- public capture_buffer: string = "";
- public constructor(input: string, tabWidth: number = 4, position: number = 0, line: number = 1, column: number = 0) {
- this.input = input;
- this.tabWidth = tabWidth;
- this.position = position;
- this.line = line;
- this.column = column;
- }
- public advance(): number {
- if (this.position>= this.input.length) {
- this.codepoint = -1;
- } else {
- this.codepoint = this.input.codePointAt(this.position);
- }
- switch (this.codepoint) {
- case 10:
- ++this.line;
- this.column = 0;
- break;
- case 13:
- this.column = 0;
- break;
- case 9:
- this.column = (((this.column - 1) / this.tabWidth) + 1) * this.tabWidth + 1;
- break;
- default:
- if (this.codepoint > 31)
- ++this.column;
- break;
- }
- ++this.position;
- return this.codepoint;
- }
- public capture() : void {
- this.capture_buffer+=String.fromCodePoint(this.codepoint);
- }
- public ensureStarted() : void {
- if(this.codepoint==-2) {
- this.advance();
- }
- }
- private static _getCodepoints(values: string[]): number[] {
- const result: number[] = new Array<number>(values.length);
- for (let i: number = 0; i < values.length; ++i) {
- result[i] = values[i].codePointAt(0);
- }
- return result;
- }
- private _getErrorMessage(expecting: number[]): string {
- let sb: string = null;
- switch (expecting.length) {
- case 0:
- break;
- case 1:
- sb = "";
- if (-1 == expecting[0])
- sb += "end of input";
- else {
- sb += "\"";
- sb += String.fromCodePoint(expecting[0]);
- sb += "\"";
- }
- break;
- case 2:
- sb = "";
- if (-1 == expecting[0])
- sb += "end of input";
- else {
- sb += "\"";
- sb += String.fromCodePoint(expecting[0]);
- sb += "\"";
- }
- sb += " or ";
- if (-1 == expecting[1])
- sb += "end of input";
- else {
- sb += "\"";
- sb += String.fromCodePoint(expecting[1]);
- sb += "\"";
- }
- break;
- default: // length > 2
- sb = ""
- if (-1 == expecting[0])
- sb += "end of input";
- else {
- sb += "\"";
- sb += String.fromCodePoint(expecting[0]);
- sb += "\"";
- }
- const l: number = expecting.length - 1;
- let i: number = 1;
- for (; i < l; ++i) {
- sb += ", ";
- if (-1 == expecting[i])
- sb += "end of input";
- else {
- sb += "\"";
- sb += String.fromCodePoint(expecting[1]);
- sb += "\"";
- }
- }
- sb += ", or ";
- if (-1 == expecting[i])
- sb += "end of input";
- else {
- sb += "\"";
- sb += String.fromCodePoint(expecting[i]);
- sb += "\"";
- }
- break;
- }
- if (-1 == this.codepoint) {
- if (0 == expecting.length)
- return "Unexpected end of input";
- return "Unexpected end of input. Expecting " + sb;
- }
- if (0 == expecting.length)
- return "Unexpected character \"" + String.fromCodePoint(this.codepoint) + "\" in input";
- return "Unexpected character \"" + String.fromCodePoint(this.codepoint) + "\" in input. Expecting " + sb;
- }
- public expecting(...values: string[]) {
- const codepoints: number[] = _ParseContext._getCodepoints(values);
- if (this.codepoint === -2)
- throw "The cursor is before the beginning of the input at " + this.position;
- switch (codepoints.length) {
- case 0:
- if (this.codepoint === -1)
- throw "Unexpected end of input at " + this.position;
- break;
- case 1:
- if (codepoints[0] != this.codepoint)
- throw this._getErrorMessage(codepoints) + " at " + this.position;
- break;
- default:
- if (!codepoints.includes(this.codepoint))
- throw this._getErrorMessage(codepoints) + " at " + this.position;
- break;
- }
- }
- public trySkipWhiteSpace(): boolean
- {
- this.ensureStarted();
- if (this.codepoint==-1) {
- return false;
- }
- if(!FACharacterClasses.space.includes(this.codepoint)) {
- return false;
- }
- this.advance();
- while (this.codepoint!=-1 && FACharacterClasses.space.includes(this.codepoint)) {
- this.advance();
- }
- return true;
- }
- public tryReadDigits(): boolean
- {
- this.ensureStarted();
- if (this.codepoint==-1) {
- return false;
- }
- if(!FACharacterClasses.digit.includes(this.codepoint)) {
- return false;
- }
- this.capture();
- this.advance();
- while (this.codepoint!=-1 && FACharacterClasses.digit.includes(this.codepoint))
- {
- this.capture();
- this.advance();
- }
- return true;
- }
- public tryReadUntil(character: number, readCharacter: boolean = true) : boolean
- {
- this.ensureStarted();
- if (0 > character) character = -1;
- this.capture();
- if (this.codepoint === character)
- {
- return true;
- }
- while (-1 != this.advance() && this.codepoint != character)
- this.capture();
- //
- if (this.codepoint == character)
- {
- if (readCharacter)
- {
- this.capture();
- this.advance();
- }
- return true;
- }
- return false;
- }
- };
- class _ExpEdge {
- public exp : string;
- public from : FA;
- public to : FA;
- };
- export class FADotGraphOptions {
- public dpi: number = 300;
- public statePrefix: string = "q";
- public debugString: string = null;
- public blockEnds: FA[] = null;
- public debugSourceNfa: FA = null;
- public debugShowNfa: boolean = false;
- public hideAcceptSymbolIds: boolean = false;
- public acceptSymbolNames: string[] = null;
- public vertical: boolean = false;
- };
- export interface FAProgressCallbackType { (value: number): void };
- export class FAProgress {
- private _callback: FAProgressCallbackType = null;
- public value: number = 0;
- public constructor(callback: FAProgressCallbackType = null) {
- this._callback = callback;
- }
- public report(value: number): void {
- this.value = value;
- if (this._callback !== null) {
- this._callback(value)
- }
- }
- };
- export class FARange {
- public min: number;
- public max: number;
- public constructor(min: number, max: number) {
- this.min = min;
- this.max = max;
- }
- public intersects(arg: FARange | number): boolean {
- let range: FARange;
- let codepoint: number;
- if (typeof arg === 'number') {
- codepoint = arg;
- return codepoint >= this.min && codepoint <= this.max;
- }
- range = arg;
- return (range.min >= this.min && range.min <= this.max) ||
- (range.max >= this.min && range.max <= this.max);
- }
- public static toUnpacked(packedRanges: number[]): FARange[] {
- const result: FARange[] = new Array<FARange>(packedRanges.length / 2);
- for (let i: number = 0; i < result.length; ++i) {
- const j: number = i * 2;
- result[i] = new FARange(packedRanges[j], packedRanges[j + 1]);
- }
- return result;
- }
- public static toPacked(pairs: FARange[]): number[] {
- const result: number[] = new Array<number>(pairs.length * 2);
- for (let ic: number = pairs.length, i: number = 0; i < ic; ++i) {
- var pair = pairs[i];
- var j = i * 2;
- result[j] = pair.min;
- result[j + 1] = pair.max;
- }
- return result;
- }
- public static *toNotRanges(ranges: Iterable<FARange>): Iterable<FARange> {
- // expects ranges to be normalized
- let last: number = 0x10ffff;
- const e: Iterator<FARange> = ranges[Symbol.iterator]();
- let cur = e.next();
- if (cur.done) {
- yield new FARange(0x0, 0x10ffff);
- return;
- }
- if (cur.value.min > 0) {
- yield new FARange(0, cur.value.min - 1);
- last = cur.value.max;
- if (0x10ffff <= last)
- return;
- }
- else if (cur.value.min == 0) {
- last = cur.value.max;
- if (0x10ffff <= last)
- return;
- }
- while (!cur.done) {
- if (0x10ffff <= last)
- return;
- if (last + 1 < cur.value.min)
- yield new FARange(last + 1, (cur.value.min - 1));
- last = cur.value.max;
- cur = e.next();
- }
- if (0x10ffff >= last)
- yield new FARange((last + 1), 0x10ffff);
- }
- }
- export class FATransition {
- public min: number;
- public max: number;
- public to: FA;
- public constructor(to: FA, min: number = -1, max: number = -1) {
- this.min = min;
- this.max = max;
- this.to = to;
- }
- public isEpsilon(): boolean {
- return this.min == -1 || this.max == -1;
- }
- }
- class _FListNode {
- public state: FA = null;
- public stateList: _FList = null;
- public next: _FListNode = null;
- public prev: _FListNode = null;
- public constructor(q: FA, sl: _FList) {
- this.state = q;
- this.stateList = sl;
- if (sl.count++ === 0) {
- sl.first = this;
- sl.last = this;
- }
- else {
- sl.last.next = this;
- this.prev = sl.last;
- sl.last = this;
- }
- }
- public remove(): void {
- --this.stateList.count;
- if (this.stateList.first === this) {
- this.stateList.first = this.next;
- } else {
- this.prev.next = this.next;
- }
- if (this.stateList.last === this) {
- this.stateList.last = this.prev;
- } else {
- this.next.prev = this.prev;
- }
- }
- };
- class _FList {
- public count: number = 0;
- public first: _FListNode = null;
- public last: _FListNode = null;
- public add(q: FA): _FListNode {
- return new _FListNode(q, this);
- }
- };
- class _FKeyPair {
- public key: number;
- public value: number;
- constructor(key: number, value: number) {
- this.key = key;
- this.value = value;
- }
- };
- export class FA {
- public isCompact: boolean = true;
- public isDeterministic: boolean = true;
- public tag: number = -1;
- public acceptSymbol: number = -1;
- public id: number = -1;
- public transitions: FATransition[] = [];
- public fromStates: FA[] = null;
- private _minimizationTag: number = -1;
- constructor(accept: number = -1) {
- this.acceptSymbol = accept;
- }
- public fillClosure(result: FA[] = []): FA[] {
- if (result.includes(this)) {
- return result;
- }
- // add this instance
- result.push(this);
- // add sub instances
- for (const transition of this.transitions) {
- const state: FA = transition.to;
- state.fillClosure(result);
- }
- // Return the list of fa instances
- return result;
- }
- public fillEpsilonClosure(result: FA[] = []): FA[] {
- if (result.includes(this)) {
- return result;
- }
- // add this instance
- result.push(this);
- // add sub instances
- for (const transition of this.transitions) {
- if (transition.isEpsilon()) {
- const state: FA = transition.to;
- state.fillEpsilonClosure(result);
- }
- }
- // Return the list of fa instances
- return result;
- }
- public static fillEpsilonClosureStates(states: Iterable<FA>, result: FA[] = []): FA[] {
- for (let state of states) {
- state.fillEpsilonClosure(result);
- }
- return result;
- }
- public isAccepting(): boolean {
- return this.acceptSymbol > -1;
- }
- public addEpsilon(to: FA, compact: boolean = true): void {
- if (to === undefined || to === null) {
- throw "to was not set";
- }
- if (compact) {
- if (this.acceptSymbol < 0 && to.acceptSymbol > -1) {
- this.acceptSymbol = to.acceptSymbol;
- }
- for (let i: number = 0; i < to.transitions.length; ++i) {
- var fat = to.transitions[i];
- if (!fat.isEpsilon()) {
- this.addTransition(new FARange(fat.min, fat.max), fat.to);
- }
- else {
- this.addEpsilon(fat.to, true);
- }
- }
- }
- else {
- var found = false;
- let i = 0;
- for (; i < this.transitions.length; ++i) {
- var fat = this.transitions[i];
- if (!fat.isEpsilon()) break;
- if (fat.to === to) {
- found = true;
- break;
- }
- }
- if (!found) {
- this.transitions.splice(i, 0, new FATransition(to));
- this.isCompact = false;
- this.isDeterministic = false;
- }
- }
- }
- public addTransition(range: FARange, to: FA, compact: boolean = true): void {
- if (to === undefined || to === null) {
- throw "to was not set";
- }
- if (range.min == -1 || range.max == -1) {
- this.addEpsilon(to, compact);
- return;
- }
- if (range.min > range.max) {
- const tmp: number = range.min;
- range.min = range.max;
- range.max = tmp;
- }
- var insert = -1;
- for (let i: number = 0; i < this.transitions.length; ++i) {
- const fat: FATransition = this.transitions[i];
- if (to === fat.to) {
- if (range.min === fat.min &&
- range.max == fat.max) {
- return;
- }
- }
- if (this.isDeterministic) {
- if (range.intersects(
- new FARange(fat.min, fat.max))) {
- this.isDeterministic = false;
- }
- }
- if (range.max > fat.max) {
- insert = i;
- }
- if (!this.isDeterministic &&
- range.max < fat.min) {
- break;
- }
- }
- this.transitions.splice(insert + 1, 0, new FATransition(to, range.min, range.max));
- }
- private static _crackSet(str: string, closure: FA[]): FA[] {
- const result: FA[] = [];
- if (str == "") {
- return result;
- }
- const sa: string[] = str.split(" ");
- for (const s of sa) {
- result.push(closure[Number.parseInt(s, 10)]);
- }
- return result;
- }
- private static _makeSet(closure: FA[], states: Iterable<FA>): string {
- let result: string = "";
- let delim: string = "";
- let ns: number[] = [];
- for (const fa of states) {
- const idx = closure.indexOf(fa);
- ns.push(idx);
- }
- ns.sort((x, y) => x - y);
- for (const i of ns) {
- result += (delim + i.toString(10));
- delim = " ";
- }
- return result;
- }
- private static _determinize(target: FA, progress: FAProgress = null): FA {
- if (progress === null) {
- progress = new FAProgress();
- }
- let prog: number = 0;
- progress.report(0);
- const closure: FA[] = target.fillClosure();
- const p: Set<number> = new Set<number>();
- for (const ffa of closure) {
- p.add(0);
- for (const t of ffa.transitions) {
- p.add(t.min);
- if (t.max < 0x10ffff) {
- p.add(t.max + 1);
- }
- }
- }
- const points: number[] = [...p];
- points.sort((x, y) => x - y);
- ++prog; progress.report(prog);
- const sets: Map<string, Set<FA>> = new Map<string, Set<FA>>();
- const working: string[] = [];
- const dfaMap: Map<string, FA> = new Map<string, FA>();
- const initStates: FA[] = closure[0].fillEpsilonClosure();
- let initial: string = FA._makeSet(closure, initStates);
- sets.set(initial, new Set<FA>(initStates));
- working.push(initial);
- const result: FA = new FA();
- result.isDeterministic = true;
- result.fromStates = initStates;
- result.acceptSymbol = FA.getFirstAcceptSymbol(initStates);
- dfaMap.set(initial, result);
- while (working.length > 0) {
- const s: string = working[0];
- working.shift();
- const dfa: FA = dfaMap.get(s)!;
- const crackedS1: FA[] = FA._crackSet(s, closure);
- for (const q of crackedS1) {
- if (q.isAccepting()) {
- dfa.acceptSymbol = q.acceptSymbol;
- break;
- }
- }
- let i: number = 0;
- for (const pnt of points) {
- const set: Set<FA> = new Set<FA>();
- for (const c of crackedS1) {
- const ecs: FA[] = c.fillEpsilonClosure();
- for (const efa of ecs) {
- for (let trns of efa.transitions) {
- if (!trns.isEpsilon()) {
- if (trns.min <= pnt && pnt <= trns.max) {
- for (const eefa of trns.to.fillEpsilonClosure()) {
- set.add(eefa);
- }
- }
- }
- }
- }
- }
- const skey: string = FA._makeSet(closure, set);
- if (!sets.has(skey)) {
- sets.set(skey, set);
- working.push(skey);
- let newFa: FA = new FA();
- newFa.isDeterministic = true;
- newFa.isCompact = true;
- newFa.fromStates = Array.from(set.values());
- dfaMap.set(skey, newFa);
- }
- const dst: FA = dfaMap.get(skey)!;
- const first: number = pnt;
- let last: number;
- if (i + 1 < points.length) {
- last = points[i + 1] - 1;
- } else {
- last = 0x10ffff;
- }
- dfa.transitions.push(new FATransition(dst, first, last));
- ++i;
- }
- ++prog; progress.report(prog);
- }
- for (const ffa of result.fillClosure()) {
- const itrns: FATransition[] = [...ffa.transitions];
- for (const trns of itrns) {
- const acc: FA[] = trns.to.fillClosure().filter((value: FA, index: number, array: FA[]) => value.isAccepting());
- if (acc.length == 0) {
- ffa.transitions.splice(ffa.transitions.indexOf(trns), 1);
- }
- }
- ++prog; progress.report(prog);
- }
- ++prog; progress.report(prog);
- return result;
- }
- public isDfa(): boolean {
- for (const ffa of this.fillClosure()) {
- if (!ffa.isDeterministic) {
- return false;
- }
- }
- return true;
- }
- public toDfa(progress: FAProgress = null): FA {
- return FA._determinize(this, progress);
- }
- public totalize(closure: FA[] = null): void {
- if (closure === null) {
- closure = this.fillClosure();
- }
- const s: FA = new FA();
- s.transitions.push(new FATransition(s, 0, 0x10ffff));
- for (const p of closure) {
- let maxi: number = 0;
- const sortedTrans: FATransition[] = [...p.transitions];
- sortedTrans.sort((x: FATransition, y: FATransition) => {
- let c: number = x.max - y.max; if (0 != c) return c; return x.min - y.min;
- });
- for (const t of sortedTrans) {
- if (!t.isEpsilon()) {
- if (t.min > maxi) {
- p.transitions.push(new FATransition(s, maxi, t.min - 1));
- }
- if (t.max + 1 > maxi) {
- maxi = t.max + 1;
- }
- }
- }
- if (maxi <= 0x10ffff) {
- p.transitions.push(new FATransition(s, maxi, 0x10ffff));
- }
- }
- }
- private _step(input: number): FA {
- for (let ic: number = this.transitions.length, i: number = 0; i < ic; ++i) {
- var t = this.transitions[i];
- if (input >= t.min && input <= t.max)
- return t.to;
- }
- return null;
- }
- private static _minimize(a: FA, progress: FAProgress): FA {
- let prog: number = 0;
- if (progress === null) {
- progress = new FAProgress();
- }
- progress.report(prog);
- a = a.toDfa(progress);
- const tr: FATransition[] = a.transitions;
- if (tr.length == 1) {
- const t: FATransition = tr[0];
- if (t.to === a && t.min == 0 && t.max == 0x10ffff) {
- return a;
- }
- }
- a.totalize();
- ++prog; progress.report(prog);
- const cl: FA[] = a.fillClosure();
- const states: FA[] = new Array<FA>();
- let mtag: number = 0;
- for (const q of cl) {
- q._minimizationTag = mtag;
- states.push(q);
- ++mtag;
- }
- let pp: number[] = [];
- for (let ic: number = cl.length, i = 0; i < ic; ++i) {
- const ffa: FA = cl[i];
- pp.push(0);
- for (const t of ffa.transitions) {
- pp.push(t.min);
- if (t.max < 0x10ffff) {
- pp.push(t.max + 1);
- }
- }
- }
- const sigma: number[] = new Array<number>();
- for (let i = 0; i < pp.length; ++i) {
- sigma.push(pp[i]);
- }
- sigma.sort();
- // initialize the data structures
- const reverse: FA[][][] = new Array<Array<Array<FA>>>();
- for (const s of states) {
- const arr: FA[][] = new Array<Array<FA>>(sigma.length);
- arr.fill(null, 0, arr.length - 1);
- reverse.push(arr);
- }
- ++prog; progress.report(prog);
- const reverseNonempty: boolean[][] = Array<Array<boolean>>();//[states.length,sigma.length];
- for (let i: number = 0; i < states.length; ++i) {
- const arr: boolean[] = new Array<boolean>(sigma.length);
- arr.fill(false, 0, arr.length - 1);
- reverseNonempty.push(arr);
- }
- const partition: FA[][] = new Array<Array<FA>>(states.length);
- partition.fill(null, 0, partition.length - 1);
- ++prog; progress.report(prog);
- const block: number[] = new Array<number>(states.length);
- const active: _FList[][] = new Array<Array<_FList>>(); // [states.length,sigma.length];
- for (let i: number = 0; i < states.length; ++i) {
- const arr: _FList[] = new Array<_FList>(sigma.length);
- arr.fill(null, 0, arr.length - 1);
- active.push(arr);
- }
- const active2: _FListNode[][] = new Array<Array<_FListNode>>()//[states.length,sigma.length];
- for (let i: number = 0; i < states.length; ++i) {
- const arr: _FListNode[] = new Array<_FListNode>(sigma.length);
- arr.fill(null, 0, arr.length - 1);
- active2.push(arr);
- }
- const pending: _FKeyPair[] = new Array<_FKeyPair>();
- const pending2: boolean[][] = Array<Array<boolean>>();//[sigma.length,states.length];
- for (let i: number = 0; i < sigma.length; ++i) {
- const arr: boolean[] = new Array<boolean>(states.length);
- arr.fill(false, 0, arr.length - 1);
- pending2.push(arr);
- }
- let split: FA[] = new Array<FA>();
- const split2: boolean[] = new Array<boolean>(states.length);
- split2.fill(false, 0, split2.length - 1);
- let refine: number[] = new Array<number>();
- const refine2: boolean[] = new Array<boolean>(states.length);
- split2.fill(false, 0, refine2.length - 1);
- const splitBlock: FA[][] = new Array<Array<FA>>(states.length);
- splitBlock.fill(null, 0, splitBlock.length - 1);
- ++prog; progress.report(prog);
- for (let q: number = 0; q < states.length; ++q) {
- splitBlock[q] = new Array<FA>();
- partition[q] = new Array<FA>();
- for (let x: number = 0; x < sigma.length; ++x) {
- reverse[q][x] = new Array<FA>();
- active[q][x] = new _FList();
- }
- }
- // Find the initial partition and reverse edges
- for (const qq of states) {
- const j: number = qq.isAccepting() ? 0 : 1;
- partition[j].push(qq);
- block[qq._minimizationTag] = j;
- for (let x: number = 0; x < sigma.length; ++x) {
- const y: number = sigma[x];
- const p: FA = qq._step(y);
- const pn: number = p._minimizationTag;
- if (reverse[pn] !== null && reverse[pn][x] !== null) {
- reverse[pn][x].push(qq);
- }
- reverseNonempty[pn][x] = true;
- }
- ++prog; progress.report(prog);
- }
- // initialize the active sets
- for (let j: number = 0; j <= 1; ++j) {
- for (let x: number = 0; x < sigma.length; ++x) {
- const part: FA[] = partition[j];
- for (const qq of part) {
- if (reverseNonempty[qq._minimizationTag][x] === true) {
- active2[qq._minimizationTag][x] = active[j][x].add(qq);
- }
- }
- }
- ++prog; progress.report(prog);
- }
- // initialize pending
- for (let x: number = 0; x < sigma.length; ++x) {
- const a0: number = active[0][x].count;
- const a1: number = active[1][x].count;
- const j: number = a0 <= a1 ? 0 : 1;
- pending.push(new _FKeyPair(j, x));
- pending2[x][j] = true;
- }
- // process pending until fixed point
- let k: number = 2;
- while (pending.length > 0) {
- const ip: _FKeyPair = pending.shift();
- const p: number = ip.key;
- const x: number = ip.value;
- pending2[x][p] = false;
- for (let m: _FListNode = active[p][x].first; m !== null; m = m.next) {
- for (const s of reverse[m.state._minimizationTag][x]) {
- if (!split2[s._minimizationTag]) {
- split2[s._minimizationTag] = true;
- split.push(s);
- const j: number = block[s._minimizationTag];
- splitBlock[j].push(s);
- if (refine2[j] !== true) {
- refine2[j] = true;
- refine.push(j);
- }
- }
- }
- }
- ++prog; progress.report(prog);
- // refine blocks
- for (const j of refine) {
- if (splitBlock[j].length < partition[j].length) {
- const b1: Array<FA> = partition[j];
- const b2: Array<FA> = partition[k];
- const e: Array<FA> = splitBlock[j];
- for (const s of e) {
- b1.splice(b1.indexOf(s), 1);
- b2.push(s);
- block[s._minimizationTag] = k;
- for (let c: number = 0; c < sigma.length; ++c) {
- const sn: _FListNode = active2[s._minimizationTag][c];
- if (sn !== null && sn!==undefined && sn.stateList === active[j][c]) {
- sn.remove();
- active2[s._minimizationTag][c] = active[k][c].add(s);
- }
- }
- }
- // update pending
- for (let c: number = 0; c < sigma.length; ++c) {
- const aj: number = active[j][c].count;
- const ak: number = active[k][c].count;
- if (!pending2[c][j] && 0 < aj && aj <= ak) {
- pending2[c][j] = true;
- pending.push(new _FKeyPair(j, c));
- } else {
- pending2[c][k] = true;
- pending.push(new _FKeyPair(k, c));
- }
- }
- ++k;
- }
- const sbj: Array<FA> = splitBlock[j];
- for (const s of sbj) {
- split2[s._minimizationTag] = false;
- }
- refine2[j] = false;
- splitBlock[j].length = 0;
- ++prog; progress.report(prog);
- }
- split.length = 0;
- refine.length = 0;
- }
- ++prog; progress.report(prog);
- // make a new state for each equiv class, set initial state
- const newstates: FA[] = new Array<FA>();
- for (let n: number = 0; n < k; ++n) {
- const s: FA = new FA();
- s.isDeterministic = true;
- newstates.push(s);
- const pn: Array<FA> = partition[n];
- for (const q of pn) {
- if (q === a) {
- a = s;
- }
- s.id = q.id;
- s.acceptSymbol = q.acceptSymbol;
- s._minimizationTag = q._minimizationTag;
- q._minimizationTag = n;
- }
- ++prog; progress.report(prog);
- }
- // build transitions and set acceptance
- for (const s of newstates) {
- const st: FA = states[s._minimizationTag];
- s.acceptSymbol = st.acceptSymbol;
- for (const t of st.transitions) {
- s.transitions.push(new FATransition(newstates[t.to._minimizationTag], t.min, t.max));
- }
- ++prog; progress.report(prog);
- }
- // remove dead transitions
- for (const ffa of a.fillClosure()) {
- const itrns: FATransition[] = new Array<FATransition>(...ffa.transitions)
- ffa.transitions = new Array<FATransition>();
- for (const trns of itrns) {
- if (!trns.to.isTrap()) {
- ffa.transitions.push(trns);
- }
- }
- }
- return a;
- }
- public toMinimizedDfa(progress: FAProgress = null) {
- return FA._minimize(this, progress);
- }
- public clone(): FA {
- const result: FA[] = [];
- const closure = this.fillClosure();
- for (let j = 0; j < closure.length; ++j) {
- result.push(new FA());
- }
- let i: number = 0;
- for (const ffa of closure) {
- result[i].isCompact = ffa.isCompact;
- result[i].isDeterministic = ffa.isDeterministic;
- result[i].acceptSymbol = ffa.acceptSymbol;
- result[i].tag = ffa.tag;
- for (const trns of ffa.transitions) {
- result[i].transitions.push(new FATransition(result[closure.indexOf(trns.to)], trns.min, trns.max));
- }
- ++i;
- }
- return result[0];
- }
- private static _toExpressionFillEdgesIn(edges: _ExpEdge[], node: FA) : _ExpEdge[]
- {
- const result:_ExpEdge[] = new Array<_ExpEdge>();
- for (let i: number = 0; i < edges.length; ++i)
- {
- const edge: _ExpEdge = edges[i];
- if (edge.to === node)
- {
- result.push(edge);
- }
- }
- return result;
- }
- private static _toExpressionFillEdgesOut(edges: _ExpEdge[], node: FA) : _ExpEdge[]
- {
- const result:_ExpEdge[] = new Array<_ExpEdge>();
- for (let i: number = 0; i < edges.length; ++i)
- {
- const edge: _ExpEdge = edges[i];
- if (edge.from === node)
- {
- result.push(edge);
- }
- }
- return result;
- }
- private static _toExpressionOrJoin(strings: string[]) : string
- {
- if (strings.length == 0) return ""
- if (strings.length == 1) return strings[0];
- return "("+ strings.join("|")+ ")";
- }
- private static _toExpressionKleeneStar(sb : string,s: string,noWrap: boolean) : string
- {
- if (s===undefined || s===null || s.length==0) return "";
- if (noWrap || s.length == 1)
- {
- sb+=s;
- sb+="*";
- return;
- }
- sb+="(";
- sb+=s;
- sb+=")*";
- }
- private static _toExpressionFillEdgesOrphanState(edges: _ExpEdge[], node: FA,) :_ExpEdge[]
- {
- const result: _ExpEdge[] = new Array<_ExpEdge>();
- for (let i: number = 0; i < edges.length; ++i)
- {
- var edge = edges[i];
- if (edge.from === node || edge.to === node)
- {
- continue;
- }
- result.push(edge);
- }
- return result;
- }
- private static _toExpression(fa: FA) : string
- {
- const closure: FA[] = new Array<FA>();
- let fsmEdges: _ExpEdge[] = new Array<_ExpEdge>();
- let first: FA, final: FA = null;
- first = fa;
- let acc:FA[] = first.fillClosure().filter((elem)=>elem.isAccepting());
- if (acc.length == 1)
- {
- final = acc[0];
- }
- else if (acc.length > 1)
- {
- fa = fa.clone();
- first = fa;
- acc = fa.fillClosure().filter((elem)=>elem.isAccepting());
- final = new FA(acc[0].acceptSymbol);
- for (let i: number = 0; i < acc.length; ++i)
- {
- let a:FA = acc[i];
- a.addEpsilon(final, false);
- a.acceptSymbol = -1;
- }
- }
- closure.length=0;
- first.fillClosure(closure);
- let sb: string = "";
- // build the machine from the FA
- var trnsgrp = new Map<FA, FARange[]>();
- for (let q: number = 0; q < closure.length; ++q)
- {
- var cfa = closure[q];
- trnsgrp.clear();
- for(const trns of cfa.fillInputTransitionRangesGroupedByState(true,trnsgrp))
- {
- sb="";
- if (trns[1].length == 1 && trns[1][0].min == trns[1][0].max)
- {
- const range: FARange = trns[1][0];
- if (range.max==-1||range.min==-1)
- {
- var eedge = new _ExpEdge();
- eedge.exp = "";
- eedge.from = cfa;
- eedge.to = trns[0];
- fsmEdges.push(eedge);
- continue;
- }
- sb+=FA._appendCharTo(String.fromCodePoint(range.min));
- }
- else
- {
- sb+="[";
- sb+=FA._appendRangeTo(trns[1]);
- sb+="]";
- }
- var edge = new _ExpEdge();
- edge.exp = sb;
- edge.from = cfa;
- edge.to = trns[0];
- fsmEdges.push(edge);
- }
- }
- let tmp: FA = new FA();
- tmp.addEpsilon(first, false);
- const q0: FA = first;
- first = tmp;
- tmp = new FA(final.acceptSymbol);
- const qLast: FA = final;
- final.acceptSymbol = -1;
- final.addEpsilon(tmp, false);
- final = tmp;
- // add first and final
- let newEdge: _ExpEdge = new _ExpEdge();
- newEdge.exp = "";
- newEdge.from = first;
- newEdge.to = q0;
- fsmEdges.push(newEdge);
- newEdge = new _ExpEdge();
- newEdge.exp = "";
- newEdge.from = qLast;
- newEdge.to = final;
- fsmEdges.push(newEdge);
- closure.splice(0,0,first);
- closure.push(final);
- let inEdges: _ExpEdge[] = new Array<_ExpEdge>();
- inEdges.fill(null,0,fsmEdges.length-1);
- let outEdges: _ExpEdge[] = new Array<_ExpEdge>();
- outEdges.fill(null,0,fsmEdges.length-1);
- while (closure.length > 2)
- {
- var node = closure[1];
- var loops = new Array<string>();
- inEdges.length=0;
- inEdges=FA._toExpressionFillEdgesIn(fsmEdges, node);
- for (let i: number = 0; i < inEdges.length; ++i)
- {
- var edge = inEdges[i];
- if (edge.from === edge.to)
- {
- loops.push(edge.exp);
- }
- }
- sb=FA._toExpressionKleeneStar(sb,FA._toExpressionOrJoin(loops), loops.length > 1);
- const middle: string = sb;
- for (let i: number = 0; i < inEdges.length; ++i)
- {
- var inEdge = inEdges[i];
- if (inEdge.from == inEdge.to)
- {
- continue;
- }
- outEdges.length = 0;
- outEdges=FA._toExpressionFillEdgesOut(fsmEdges, node);
- for (let j: number = 0; j < outEdges.length; ++j)
- {
- var outEdge = outEdges[j];
- if (outEdge.from === outEdge.to)
- {
- continue;
- }
- var expEdge = new _ExpEdge();
- expEdge.from = inEdge.from;
- expEdge.to = outEdge.to;
- sb="";
- sb+=inEdge.exp;
- sb+=middle;
- sb+=outEdge.exp;
- expEdge.exp = sb;
- fsmEdges.push(expEdge);
- }
- }
- inEdges = FA._toExpressionFillEdgesOrphanState(fsmEdges, node);
- fsmEdges.length = 0;
- fsmEdges.push(...inEdges);
- closure.splice(closure.indexOf(node),1);
- }
- sb="";
- if(fsmEdges.length==1)
- {
- return fsmEdges[0].exp;
- }
- if (fsmEdges.length > 1)
- {
- sb+="(";
- sb+=fsmEdges[0].exp;
- for (let i: number = 1; i < fsmEdges.length; ++i)
- {
- sb+="|";
- sb+=fsmEdges[i].exp;
- }
- sb+=")";
- }
- return sb;
- }
- public toString(format: string, options: FADotGraphOptions = null): string {
- if (format.toLowerCase() === "d") {
- if (options != null && options.debugSourceNfa != null && options.debugShowNfa) {
- return this._buildCompoundDot(this.fillClosure(), options);
- }
- else {
- return this._buildDot(this.fillClosure(), options, -1);
- }
- } else if(format.toLowerCase()=="e") {
- return FA._toExpression(this);
- } else {
- if (this.id < 0) {
- return "[FA]";
- } else {
- return "q" + this.id.toString();
- }
- }
- }
- public setIds(): void {
- let i: number = 0;
- for (const fa of this.fillClosure()) {
- fa.id = i;
- ++i;
- }
- }
- public static literal(codepoints: Iterable<number> | string, accept: number = 0, compact: boolean = true): FA {
- if (typeof codepoints == "string") {
- codepoints = this.toUtf32(codepoints);
- }
- const result: FA = new FA();
- let current: FA = result;
- for (const cp of codepoints) {
- current.acceptSymbol = -1;
- const newFa: FA = new FA();
- newFa.acceptSymbol = accept;
- current.addTransition(new FARange(cp, cp), newFa,compact);
- current = newFa;
- }
- return result;
- }
- public static set(ranges: Iterable<FARange>, accept: number = 0, compact: boolean = true): FA {
- const result: FA = new FA();
- const final: FA = new FA();
- final.acceptSymbol = accept;
- for (const range of [...ranges].sort((x, y) => x.max - y.max)) {
- result.addTransition(range, final,compact);
- }
- return result;
- }
- public static concat(exprs: Iterable<FA>, accept: number = 0, compact: boolean = true): FA {
- let result: FA | null = null;
- let left: FA | null = null;
- let right: FA | null = null;
- for (const expr of exprs) {
- let nval = expr.clone();
- if (null === left) {
- if (null === result) {
- result = nval;
- }
- left = nval;
- continue;
- }
- if (null === right) {
- right = nval;
- }
- nval = right.clone();
- FA._concat(left!, nval, compact);
- }
- const target: FA = null !== right ? right! : result!;
- const acc: FA[] = target.fillClosure().filter((value: FA, index: number, array: FA[]) => value.isAccepting());
- for (const afa of acc) {
- afa.acceptSymbol = accept;
- }
- return result!;
- }
- public static or(exprs: Iterable<FA>, accept: number = 0, compact: boolean = true): FA {
- const result: FA = new FA();
- const final: FA = new FA();
- final.acceptSymbol = accept;
- for (const expr of exprs) {
- const nfa = expr.clone();
- const nacc: FA[] = nfa.fillClosure().filter((value: FA, index: number, array: FA[]) => value.isAccepting());
- for (const afa of nacc) {
- afa.acceptSymbol = -1;
- afa.addEpsilon(final, compact);
- }
- result.addEpsilon(nfa, compact);
- }
- return result;
- }
- public static optional(expr: FA, accept: number = 0, compact: boolean = true): FA {
- const result: FA = expr.clone();
- const acc: FA[] = result.fillClosure().filter((value: FA, index: number, array: FA[]) => value.isAccepting());
- for (const afa of acc) {
- afa.acceptSymbol = accept;
- result.addEpsilon(afa, compact);
- }
- return result;
- }
- public static repeat(expr: FA, minOccurs: number = -1, maxOccurs: number = -1, accept: number = 0, compact: boolean = true): FA {
- expr = expr.clone();
- if (minOccurs > 0 && maxOccurs > 0 && minOccurs > maxOccurs) {
- throw Error("Minimum is greater than maximum");
- }
- let result: FA;
- switch (minOccurs) {
- case -1:
- case 0:
- switch (maxOccurs) {
- case -1:
- case 0:
- result = new FA();
- result.acceptSymbol = accept;
- result.addEpsilon(expr, compact);
- for (const afa of expr.fillClosure()) {
- if (afa.isAccepting()) {
- afa.acceptSymbol = -1;
- afa.addEpsilon(result, compact);
- }
- }
- return result;
- case 1:
- result = this.optional(expr, accept, compact);
- return result;
- default:
- const l: FA[] = [];
- expr = this.optional(expr, accept, compact);
- l.push(expr);
- for (let i = 1; i < maxOccurs; ++i) {
- l.push(expr.clone());
- }
- result = this.concat(l, accept, compact);
- return result;
- }
- case 1:
- switch (maxOccurs) {
- case -1:
- case 0:
- result = this.concat([expr, FA.repeat(expr, 0, 0, accept, compact)], accept, compact);
- return result;
- case 1:
- return expr;
- default:
- result = this.concat([expr, FA.repeat(expr, 0, maxOccurs - 1, accept, compact)], accept, compact);
- return result;
- }
- default:
- switch (maxOccurs) {
- case -1:
- case 0:
- result = this.concat([FA.repeat(expr, minOccurs, minOccurs, accept, compact), FA.repeat(expr, 0, 0, accept, compact)], accept, compact);
- return result;
- case 1:
- throw new Error("Should never get here");
- default:
- if (minOccurs == maxOccurs) {
- const l: FA[] = [];
- expr = this.optional(expr, accept, compact);
- l.push(expr);
- for (let i = 1; i < maxOccurs; ++i) {
- l.push(expr.clone());
- }
- result = this.concat(l, accept, compact);
- return result;
- }
- result = this.concat([this.repeat(expr, minOccurs, minOccurs, accept, compact), FA.repeat(FA.optional(expr, accept, compact), maxOccurs - minOccurs, maxOccurs - minOccurs, accept, compact)], accept, compact);
- return result;
- }
- }
- // throw new Error("Should never get here");
- }
- public run(value: string, blockEnds: FA[] = null) : FAStringRunner {
- const result: FAStringRunner = new FAStringStateRunner(this,blockEnds);
- result.set(value);
- return result;
- }
- public static runDfa(value: string, dfa: number[], blockEnds: number[][]=null) : FAStringRunner {
- const result: FAStringRunner = new FAStringDfaTableRunner(dfa,blockEnds);
- result.set(value);
- return result;
- }
- public isNeutral(): boolean {
- return !this.isAccepting && this.transitions.length == 1 && this.transitions[0].isEpsilon();
- }
- public isFinal(): boolean {
- return this.transitions.length == 0;
- }
- public isTrap(): boolean {
- if (this.isAccepting()) {
- return false;
- }
- for (const ffa of this.fillClosure()) {
- if (ffa.isAccepting()) {
- return false;
- }
- }
- return true;
- }
- public fillInputTransitionRangesGroupedByState(includeEpsilonTransitions: boolean = false, result: Map<FA, FARange[]> = null): Map<FA, FARange[]> {
- if (result === null) {
- result = new Map<FA, FARange[]>();
- }
- for (const fat of this.transitions) {
- if (includeEpsilonTransitions || !fat.isEpsilon()) {
- const res: FARange[] | undefined = result.get(fat.to);
- if (res === undefined) {
- const ndata: FARange[] = [new FARange(fat.min, fat.max)];
- result.set(fat.to, ndata);
- } else {
- res.push(new FARange(fat.min, fat.max));
- }
- }
- }
- for (const values of result.values()) {
- values.sort((x: FARange, y: FARange) => { var c = x.min - y.min; if (0 != c) return c; return x.max - y.max; });
- }
- return result;
- }
- public toArray(): number[] {
- const fa: FA = this;
- const result: number[] = [];
- const closure: FA[] = fa.fillClosure();
- const stateIndices: number[] = [];
- for (let i: number = 0; i < closure.length; ++i) {
- const cfa: FA = closure[i];
- stateIndices.push(result.length);
- result.push(cfa.isAccepting()?cfa.acceptSymbol:-1);
- const itrgp: Map<FA, FARange[]> = cfa.fillInputTransitionRangesGroupedByState();
- result.push(itrgp.size);
- for (const itr of itrgp.entries()) {
- result.push(closure.indexOf(itr[0]));
- result.push(itr[1].length);
- result.push(...FARange.toPacked(itr[1]));
- }
- }
- let state: number = 0;
- while (state < result.length) {
- ++state;
- const tlen = result[state++];
- for (let i: number = 0; i < tlen; ++i) {
- result[state] = stateIndices[result[state]];
- ++state;
- const prlen: number = result[state++];
- state += prlen * 2;
- }
- }
- return result;
- }
- public static fromArray(fa: number[]): FA {
- if (fa.length == 0) {
- return new FA();
- }
- let si: number = 0;
- const states: Map<number, FA> = new Map<number, FA>();
- while (si < fa.length) {
- const newFa: FA = new FA();
- states.set(si, newFa);
- newFa.acceptSymbol = fa[si++];
- const tlen: number = fa[si++];
- for (let i: number = 0; i < tlen; ++i) {
- ++si; // tto
- const prlen: number = fa[si++];
- si += prlen * 2;
- }
- }
- si = 0;
- while (si < fa.length) {
- var state = states.get(si)!;
- ++si;
- const tlen: number = fa[si++];
- for (let i: number = 0; i < tlen; ++i) {
- const tto: number = fa[si++];
- const to: FA = states.get(tto)!;
- const prlen: number = fa[si++];
- for (let j: number = 0; j < prlen; ++j) {
- state.addTransition(new FARange(fa[si++], fa[si++]), to);
- }
- }
- }
- return states.get(0)!;
- }
- public move(codepoint: number) : FA {
- if (!this.isDeterministic)
- {
- throw "The state machine must be deterministic";
- }
- for (let i: number = 0; i < this.transitions.length; ++i)
- {
- const fat: FATransition = this.transitions[i];
- if (codepoint < fat.min)
- {
- return null;
- }
- if (codepoint <= fat.max)
- {
- return fat.to;
- }
- }
- return null;
- }
- public static fillMove(states: Iterable<FA>, codepoint: number, result: FA[] = []): FA[] {
- for (const ffa of states) {
- for (const fat of ffa.transitions) {
- if (!fat.isEpsilon() && (codepoint >= fat.min && codepoint <= fat.max)) {
- fat.to.fillEpsilonClosure(result);
- }
- }
- }
- return result;
- }
- public static getFirstAcceptSymbol(states: Iterable<FA>): number {
- for (const fa of states) {
- if (fa.isAccepting()) {
- return fa.acceptSymbol;
- }
- }
- return -1;
- }
- private static _concat(lhs: FA, rhs: FA, compact: boolean): void {
- const acc: FA[] = lhs.fillClosure().filter((value: FA, index: number, array: FA[]) => value.isAccepting());
- for (const afa of acc) {
- afa.acceptSymbol = -1;
- afa.addEpsilon(rhs, compact);
- }
- }
- public static *toUtf32(str: string): Iterable<number> {
- for (const character of str) {
- let cp: number = character.codePointAt(0)!;
- if (cp >= 0xD800 && cp <= 0xDBFF) { // hi surrogate
- const cpl: number = character.codePointAt(1)!
- if (!(cpl >= 0xDC00 && cpl <= 0xDFFF)) { // not a low surrogate
- throw new Error("Unicode stream error. Unterminated high surrogate");
- }
- const highValue = cp & 0b11_11111111;
- const lowValue = cpl & 0b11_11111111;
- const magicAdd = 0b1_00000000_00000000;
- cp = (highValue << 10) | lowValue + magicAdd;
- }
- yield cp;
- }
- }
- private static *_invertRanges(ranges: Iterable<FARange>): Iterable<FARange> {
- if (ranges == null) {
- return;
- }
- let last: number = 0x10ffff;
- let e: Iterator<FARange> = ranges[Symbol.iterator]();
- let cur = e.next();
- if (cur.done) {
- yield new FARange(0, 0x10ffff);
- return;
- }
- if (cur.value.min > 0) {
- yield new FARange(0, cur.value.min - 1);
- last = cur.value.max;
- if (0x10ffff <= last)
- return;
- }
- else if (cur.value.min == 0) {
- last = cur.value.max;
- if (0x10ffff <= last)
- return;
- }
- cur = e.next();
- while (!cur.done) {
- if (0x10ffff <= last)
- return;
- if ((last + 1) < cur.value.min) {
- yield new FARange(last + 1, cur.value.min - 1);
- }
- last = cur.value.max;
- cur = e.next();
- }
- if (0x10ffff > last) {
- yield new FARange(last + 1, 0x10ffff);
- }
- }
- private static _appendRangeTo(ranges: FARange[], index: number = -1): string {
- let result: string = "";
- if (index === -1) {
- for (let i: number = 0; i < ranges.length; ++i) {
- result += FA._appendRangeTo(ranges, i);
- }
- return result;
- }
- var first = ranges[index].min;
- var last = ranges[index].max;
- result += FA._appendRangeCharTo(String.fromCodePoint(first));
- if (last === first) return result;
- if (last === first + 1) // spit out 1 and 2 length ranges as flat chars
- {
- result += FA._appendRangeCharTo(String.fromCodePoint(last));
- return result;
- }
- else if (last === first + 2) {
- result += FA._appendRangeCharTo(String.fromCodePoint(first + 1));
- result += FA._appendRangeCharTo(String.fromCodePoint(last));
- return result;
- }
- result += "-";
- result += FA._appendRangeCharTo(String.fromCodePoint(last));
- return result;
- }
- private static _appendCharTo(ch: string): string {
- let result: string = "";
- switch (ch) {
- case '.':
- case '[':
- case ']':
- case '^':
- case '-':
- case '+':
- case '?':
- case '(':
- case ')':
- case '\\':
- result += '\\';
- result += ch;
- break;
- case '\t':
- result += "\\t";
- break;
- case '\n':
- result += "\\n";
- break;
- case '\r':
- result += "\\r";
- break;
- case '\0':
- result += "\\0";
- break;
- case '\f':
- result += "\\f";
- break;
- case '\v':
- result += "\\v";
- break;
- case '\b':
- result += "\\b";
- break;
- default:
- const s: string = ch;
- const isletter: boolean = s.toLowerCase() != s.toUpperCase();
- const isdigit = !isletter && (s >= '0' && s <= '9');
- const issym = !isletter && !isdigit && ",._-<>/\\=+~`!@#$%^&*(){}[]\"\';:?".indexOf(s) > -1;
- if (!isletter && !isdigit && !issym) {
- if (s.length == 1) {
- result += "\\u";
- const d: number = s.codePointAt(0);
- result += ("0000" + (+d).toString(16)).slice(-4);
- }
- else {
- result += "\\U";
- const d: number = s.codePointAt(0);
- result += ("00000000" + (+d).toString(16)).slice(-8);
- }
- }
- else {
- result += s;
- }
- break;
- }
- return result;
- }
- private static _appendRangeCharTo(ch: string): string {
- let result: string = "";
- switch (ch) {
- case '.':
- case '[':
- case ']':
- case '^':
- case '-':
- case '(':
- case ')':
- case '{':
- case '}':
- case '\\':
- result += '\\';
- result += ch;
- break;
- case '\t':
- result += "\\t";
- break;
- case '\n':
- result += "\\n";
- break;
- case '\r':
- result += "\\r";
- break;
- case '\0':
- result += "\\0";
- break;
- case '\f':
- result += "\\f";
- break;
- case '\v':
- result += "\\v";
- break;
- case '\b':
- result += "\\b";
- break;
- default:
- const s: string = ch;
- const isletter: boolean = s.toLowerCase() != s.toUpperCase();
- const isdigit = !isletter && (s >= '0' && s <= '9');
- const issym = !isletter && !isdigit && ",._-<>/\\=+~`!@#$%^&*(){}[]\"\';:?".indexOf(s) > -1;
- if (!isletter && !isdigit && !issym) {
- if (s.length == 1) {
- result += "\\u";
- const d: number = s.codePointAt(0);
- result += ("0000" + (+d).toString(16)).slice(-4);
- }
- else {
- result += "\\U";
- const d: number = s.codePointAt(0);
- result += ("00000000" + (+d).toString(16)).slice(-8);
- }
- }
- else
- result += s;
- break;
- }
- return result;
- }
- private static _escapeLabel(label: string): string {
- if (label === null || label.length == 0) return label;
- let result: string = label.replace("\\", "\\\\");
- result = result.replace("\"", "\\\"");
- result = result.replace("\n", "\\n");
- result = result.replace("\r", "\\r");
- result = result.replace("\0", "\\0");
- result = result.replace("\v", "\\v");
- result = result.replace("\t", "\\t");
- result = result.replace("\f", "\\f");
- return result;
- }
- private _buildDot(closure: FA[], options: FADotGraphOptions, clusterIndex: number): string {
- let result: string = "";
- if (options === null) {
- options = new FADotGraphOptions();
- }
- const hasBlockEnds: boolean = options.debugShowNfa === false && options.debugString === null && options.blockEnds !== null;
- const spfx: string = options.statePrefix === null ? "q" : options.statePrefix;
- let pfx: string = "";
- if (clusterIndex !== -1) {
- result += ("subgraph cluster_" + clusterIndex.toString(10) + " {\n");
- pfx = "c" + clusterIndex.toString(10);
- } else {
- result += "digraph FA {\n";
- }
- if (!options.vertical) {
- result += "rankdir=LR\n";
- }
- result += "node [shape=circle]\n";
- let accepting: FA[] = [];
- let finals: FA[] = [];
- let neutrals: FA[] = [];
- for (let i: number = 0; i < closure.length; ++i) {
- let ffa = closure[i];
- if (ffa.isAccepting()) {
- accepting.push(ffa);
- } else if (ffa.isNeutral()) {
- neutrals.push(ffa);
- } else if (ffa.isFinal()) {
- finals.push(ffa);
- }
- }
- let fromStates: FA[] = null;
- let toStates: FA[] = null;
- let tchar: number = -1;
- if (options.debugString != null) {
- for (let ch of FA.toUtf32(options.debugString)) {
- if (fromStates === null) {
- fromStates = [];
- closure[0].fillEpsilonClosure(fromStates);
- } else {
- fromStates = toStates;
- }
- tchar = ch;
- toStates = FA.fillMove(fromStates, ch);
- if (0 == toStates.length) {
- break;
- }
- }
- }
- if (fromStates == null) {
- fromStates = [];
- closure[0].fillEpsilonClosure(fromStates);
- }
- if (toStates != null) {
- toStates = FA.fillEpsilonClosureStates(toStates);
- }
- let i: number = 0;
- for (const ffa of closure) {
- const isfrom: boolean = fromStates !== null && FA.fillEpsilonClosureStates(fromStates).includes(ffa);
- const rngGrps: Map<FA, FARange[]> = ffa.fillInputTransitionRangesGroupedByState();
- for (const rngGrp of rngGrps) {
- const istrns: boolean = isfrom && toStates !== null && options.debugString !== null && toStates.includes(rngGrp[0]);
- const di: number = closure.indexOf(rngGrp[0]);
- result += (pfx + spfx);
- result += i.toString(10);
- result += "->";
- result += (pfx + spfx);
- result += di.toString(10);
- result += " [label=\"";
- let sb: string;
- let notRanges: FARange[] = Array.from(FA._invertRanges(rngGrp[1]));
- if (notRanges.length > rngGrp[1].length) {
- sb = FA._appendRangeTo(rngGrp[1]);
- } else {
- result += "^";
- sb = FA._appendRangeTo(notRanges);
- }
- if (sb.length != 1 || sb === " ") {
- result += '[';
- if (sb.length > 16) {
- sb = sb.substring(0, 16);
- sb += "...";
- }
- result += FA._escapeLabel(sb);
- result += "]";
- } else {
- result += FA._escapeLabel(sb);
- }
- if (!istrns) {
- result += "\"]\n";
- } else {
- result += "\",color=green]\n";
- }
- }
- // do epsilons
- for (const fat of ffa.transitions) {
- if (fat.isEpsilon()) {
- var istrns = null != toStates && options.debugString != null && toStates.includes(ffa) && toStates.includes(fat.to);
- result += (pfx + spfx);
- result += i.toString(10);
- result += "->";
- result += (pfx + spfx);
- result += (closure.indexOf(fat.to)).toString(10);
- if (!istrns) {
- result += " [style=dashed,color=gray]\n";
- } else {
- result += " [style=dashed,color=green]\n";
- }
- }
- }
- // do block ends
- if (hasBlockEnds && ffa.isAccepting && options.blockEnds?.length > ffa.acceptSymbol && options.blockEnds[ffa.acceptSymbol] !== null) {
- result += (pfx + spfx + i.toString(10) + "->");
- result += (pfx + "blockEnd" + ffa.acceptSymbol.toString(10) + spfx + "0");
- result += (" [style=dashed,label=\".*?\"]\n");
- }
- ++i;
- }
- let delim: string;
- if (hasBlockEnds) {
- for (i = 0; i < options.blockEnds?.length; ++i) {
- const bfa: FA = options.blockEnds[i];
- if (bfa !== null) {
- const bclose = bfa.fillClosure();
- delim = "";
- for (let qi: number = 0; qi < bclose.length; ++qi) {
- const cbfa: FA = bclose[qi];
- const rngGrps = cbfa.fillInputTransitionRangesGroupedByState();
- for (const rngGrp of rngGrps) {
- const di: number = bclose.indexOf(rngGrp[0]);
- result += (pfx + "blockEnd" + i.toString(10) + spfx + qi.toString(10));
- result += "->";
- result += (pfx + "blockEnd" + i.toString(10) + spfx + di.toString(10));
- result += " [label=\"";
- let sb: string = FA._appendRangeTo(rngGrp[1]);
- if (sb.length != 1 || sb === " ") {
- result += "[";
- if (sb.length > 16) {
- sb = sb.substring(0, 16);
- sb += "...";
- }
- result += FA._escapeLabel(sb);
- result += "]";
- } else {
- result += FA._escapeLabel(sb);
- }
- result += "\"]\n";
- }
- // do epsilons
- for (const fat of cbfa.transitions) {
- if (fat.isEpsilon()) {
- result += ("pfx" + "blockEnd" + i.toString(10) + spfx + qi.toString(10));
- result += "->";
- const di: number = bclose.indexOf(fat.to);
- result += ("pfx" + "blockEnd" + i.toString(10) + spfx + di.toString(10));
- result += " [style=dashed,color=gray]\n";
- }
- }
- }
- for (let qi: number = 0; qi < bclose.length; ++qi) {
- const cbfa: FA = bclose[qi];
- result += (pfx + "blockEnd" + i.toString(10) + spfx + qi.toString(10) + " [label=<");
- result += "<TABLE BORDER=\"0\"><TR><TD>";
- result += ("(be)" + spfx);
- result += "<SUB>";
- result += qi.toString(10);
- result += "</SUB></TD></TR>";
- if (cbfa.isAccepting() && !options.hideAcceptSymbolIds) {
- result += "<TD><TD>";
- let acc: string = null;
- if (options.acceptSymbolNames != null && options.acceptSymbolNames.length > i) {
- acc = options.acceptSymbolNames[i];
- }
- if (acc === null) {
- acc = i.toString(10);
- }
- result += acc.replace("\"", """);
- result += "</TD></TR>";
- }
- result += "</TABLE>";
- result += ">";
- if (cbfa.isAccepting()) {
- result += ",shape=doublecircle";
- } else if (cbfa.isFinal() || cbfa.isNeutral()) {
- result += ",color=gray";
- }
- result += "]";
- }
- }
- }
- }
- delim = "";
- i = 0;
- for (const ffa of closure) {
- result += (pfx + spfx);
- result += i.toString(10);
- result += " [";
- if (options.debugString !== null) {
- if (toStates !== null) {
- const epstates: FA[] = FA.fillEpsilonClosureStates(toStates, null);
- if (epstates.includes(ffa)) {
- if (!toStates.includes(ffa)) {
- result += "color=darkgreen,"
- } else {
- result += "color=greeen,";
- }
- }
- }
- }
- result += "label=<";
- result += "<TABLE BORDER=\"0\"><TR><TD>";
- result += spfx;
- result += "<SUB>";
- result += i.toString(10);
- result += "</SUB></TD></TR>";
- if (options.debugSourceNfa !== null) {
- const from: FA[] = ffa.fromStates;
- let brk: number = Math.floor(Math.sqrt(from.length));
- if (from.length <= 3) brk = 3;
- for (let j: number = 0; j < from.length; ++j) {
- if (j === 0) {
- result += "<TR><TD>";
- result += "{ ";
- delim = "";
- } else if ((j % brk) == 0) {
- delim = "";
- result += "</TD></TR><TR><TD>";
- }
- const fromFa: FA = from[j];
- result += delim;
- result += "q<SUB>";
- result += options.debugSourceNfa.fillClosure().indexOf(fromFa).toString(10);
- result += "</SUB>";
- delim = " ";
- if (j === from.length - 1) {
- result += " }";
- result += "</TD></TR>";
- }
- }
- }
- if (ffa.isAccepting() && !options.hideAcceptSymbolIds && !(hasBlockEnds && options.blockEnds?.length > ffa.acceptSymbol && options.blockEnds[ffa.acceptSymbol] !== null)) {
- result += "<TR><TD>";
- let acc: string = null;
- if (options.acceptSymbolNames !== null && options.acceptSymbolNames.length > ffa.acceptSymbol) {
- acc = options.acceptSymbolNames[ffa.acceptSymbol];
- }
- if (acc === null) {
- acc = ffa.acceptSymbol.toString(10);
- }
- result += acc.replace("\"", """);
- result += "</TD></TR>";
- }
- result += "</TABLE>";
- result += ">";
- let isFinal: boolean = false;
- if (accepting.includes(ffa) && ((!hasBlockEnds || options.blockEnds?.length <= ffa.acceptSymbol || options.blockEnds[ffa.acceptSymbol] === null))) {
- result += ",shape=doublecircle";
- } else if (isFinal || neutrals.includes(ffa)) {
- if ((fromStates === null || !fromStates.includes(ffa)) &&
- (toStates === null) || !toStates.includes(ffa)) {
- result += ",color=gray";
- }
- }
- result += "]\n";
- ++i;
- }
- delim = "";
- if (accepting.length > 0) {
- for (const ntfa of accepting) {
- if (!hasBlockEnds || options.blockEnds?.length <= ntfa.acceptSymbol || options.blockEnds[ntfa.acceptSymbol] === null) {
- result += delim;
- result += (pfx + spfx);
- result += closure.indexOf(ntfa).toString(10);
- delim = ",";
- }
- }
- if (delim != "") {
- result += " [shape=doublecircle]\n";
- }
- }
- delim = "";
- if (neutrals.length > 0) {
- for (const ntfa of neutrals) {
- if ((fromStates === null || !fromStates.includes(ntfa)) &&
- (toStates == null || !toStates.includes(ntfa))) {
- result += delim;
- result += (pfx + spfx);
- result += closure.indexOf(ntfa).toString(10);
- delim = ",";
- }
- }
- result += " [color=gray]\n";
- delim = "";
- }
- delim = "";
- if (finals.length > 0) {
- for (const ntfa of finals) {
- result += delim;
- result += (pfx + spfx);
- result += closure.indexOf(ntfa).toString(10);
- delim = ",";
- }
- result += " [shape=circle,color=gray]\n";
- }
- result += "}\n";
- return result;
- }
- private _buildCompoundDot(closure: FA[], options: FADotGraphOptions): string {
- let result: string = "digraph FA {\n";
- let vert: boolean = true;
- if (!options.vertical) {
- result += "rank=LR\n";
- }
- result += "node [shape=circle]\n";
- const opt2: FADotGraphOptions = new FADotGraphOptions();
- opt2.debugSourceNfa = null;
- opt2.statePrefix = options.statePrefix;
- opt2.debugString = options.debugString;
- opt2.debugShowNfa = false;
- opt2.dpi = options.dpi;
- opt2.acceptSymbolNames = options.acceptSymbolNames;
- opt2.hideAcceptSymbolIds = options.hideAcceptSymbolIds;
- opt2.blockEnds = options.blockEnds;
- if (!vert) {
- result += this._buildDot(closure, options, 2);
- result += this._buildDot(options.debugSourceNfa.fillClosure(), opt2, 1);
- } else {
- result += this._buildDot(options.debugSourceNfa.fillClosure(), opt2, 2);
- result += this._buildDot(closure, options, 1);
- }
- result += "}\n";
- return result;
- }
- static _normalizeSortedRangeList(pairs: FARange[]) : void
- {
- for (let i: number = 1; i < pairs.length; ++i)
- {
- if (pairs[i - 1].max + 1 >= pairs[i].min)
- {
- const nr: FARange = new FARange(pairs[i - 1].min, pairs[i].max);
- pairs[i - 1] = nr;
- pairs.splice(i,1);
- --i; // compensated for by ++i in for loop
- }
- }
- }
- static _fromHexChar(hex: number): number {
- if (':'.codePointAt(0) > hex && '/'.codePointAt(0) < hex)
- return (hex - '0'.codePointAt(0));
- if ('G'.codePointAt(0) > hex && '@'.codePointAt(0) < hex)
- return (hex - '7'.codePointAt(0)); // 'A'-10
- if ('g'.codePointAt(0) > hex && '`'.codePointAt(0) < hex)
- return (hex - 'W'.codePointAt(0)); // 'a'-10
- throw "The value was not hex.";
- }
- static _isHexChar(hex: number): boolean {
- if (':'.charCodeAt(0) > hex && '/'.codePointAt(0) < hex)
- return true;
- if ('G'.codePointAt(0) > hex && '@'.codePointAt(0) < hex)
- return true;
- if ('g'.codePointAt(0) > hex && '`'.codePointAt(0) < hex)
- return true;
- return false;
- }
- static _parseModifier(expr: FA, pc: _ParseContext, accept: number, compact: boolean): FA
- {
- var position = pc.position;
- switch (pc.codepoint)
- {
- case '*'.codePointAt(0):
- expr = FA.repeat(expr, 0, 0, accept, compact);
- pc.advance();
- break;
- case '+'.codePointAt(0):
- expr = FA.repeat(expr, 1, 0, accept, compact);
- pc.advance();
- break;
- case '?'.codePointAt(0):
- expr = FA.optional(expr, accept, compact);
- pc.advance();
- break;
- case '{'.codePointAt(0):
- pc.advance();
- pc.trySkipWhiteSpace();
- pc.expecting('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ',', '}');
- var min = -1;
- var max = -1;
- if (','.codePointAt(0) != pc.codepoint && '}'.codePointAt(0) != pc.codepoint)
- {
- var l = pc.capture_buffer.length;
- if (!pc.tryReadDigits())
- {
- pc.expecting('0', '1', '2', '3', '4', '5', '6', '7', '8', '9');
- }
- min = Number.parseFloat(pc.capture_buffer.substring(l));
- pc.trySkipWhiteSpace();
- }
- if (','.codePointAt(0) == pc.codepoint)
- {
- pc.advance();
- pc.trySkipWhiteSpace();
- pc.expecting('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '}');
- if ('}'.charCodeAt(0) != pc.codepoint)
- {
- var l = pc.capture_buffer.length;
- pc.tryReadDigits();
- max = Number.parseFloat(pc.capture_buffer.substring(l));
- pc.trySkipWhiteSpace();
- }
- }
- else { max = min; }
- pc.expecting('}');
- pc.advance();
- expr = FA.repeat(expr, min, max, accept, compact);
- break;
- }
- return expr;
- }
- static _parseEscapePart(pc : _ParseContext) : number
- {
- if (-1 == pc.codepoint) return -1;
- switch (String.fromCodePoint(pc.codepoint))
- {
- case 'f':
- pc.advance();
- return '\f'.codePointAt(0);
- case 'v':
- pc.advance();
- return '\v'.codePointAt(0);
- case 't':
- pc.advance();
- return '\t'.codePointAt(0);
- case 'n':
- pc.advance();
- return '\n'.codePointAt(0);
- case 'r':
- pc.advance();
- return '\r'.codePointAt(0);
- case 'x':
- if (-1 == pc.advance() || !FA._isHexChar(pc.codepoint))
- return 'x'.codePointAt(0);
- let b: number = FA._fromHexChar(pc.codepoint);
- if (-1 == pc.advance() || !FA._isHexChar(pc.codepoint))
- return b;
- b <<= 4;
- b |= FA._fromHexChar(pc.codepoint);
- if (-1 == pc.advance() || !FA._isHexChar(pc.codepoint))
- return b;
- b <<= 4;
- b |= FA._fromHexChar(pc.codepoint);
- if (-1 == pc.advance() || !FA._isHexChar(pc.codepoint))
- return b;
- b <<= 4;
- b |= FA._fromHexChar(pc.codepoint);
- return b;
- case 'u':
- if (-1 == pc.advance())
- return 'u'.codePointAt(0);
- let u: number = FA._fromHexChar(pc.codepoint);
- u <<= 4;
- if (-1 == pc.advance())
- return u;
- u |= FA._fromHexChar(pc.codepoint);
- u <<= 4;
- if (-1 == pc.advance())
- return u;
- u |= FA._fromHexChar(pc.codepoint);
- u <<= 4;
- if (-1 == pc.advance())
- return u;
- u |= FA._fromHexChar(pc.codepoint);
- return u;
- default:
- const i: number = pc.codepoint;
- pc.advance();
- return i;
- }
- }
- static _parseRangeEscapePart(pc: _ParseContext) : number
- {
- if (-1 == pc.codepoint)
- return -1;
- switch (String.fromCodePoint(pc.codepoint))
- {
- case '0':
- pc.advance();
- return '\0'.codePointAt(0);
- case 'f':
- pc.advance();
- return '\f'.codePointAt(0);
- case 'v':
- pc.advance();
- return '\v'.codePointAt(0);
- case 't':
- pc.advance();
- return '\t'.codePointAt(0);
- case 'n':
- pc.advance();
- return '\n'.codePointAt(0);
- case 'r':
- pc.advance();
- return '\r'.codePointAt(0);
- case 'x':
- if (-1 == pc.advance() || !FA._isHexChar(pc.codepoint))
- return 'x'.codePointAt(0);
- let b: number = FA._fromHexChar(pc.codepoint);
- if (-1 == pc.advance() || !FA._isHexChar(pc.codepoint))
- return b;
- b <<= 4;
- b |= FA._fromHexChar(pc.codepoint);
- if (-1 == pc.advance() || !FA._isHexChar(pc.codepoint))
- return b;
- b <<= 4;
- b |= FA._fromHexChar(pc.codepoint);
- if (-1 == pc.advance() || !FA._isHexChar(pc.codepoint))
- return b;
- b <<= 4;
- b |= FA._fromHexChar(pc.codepoint);
- return b;
- case 'u':
- if (-1 == pc.advance())
- return 'u'.codePointAt(0);
- let u: number = FA._fromHexChar(pc.codepoint);
- u <<= 4;
- if (-1 == pc.advance())
- return u;
- u |= FA._fromHexChar(pc.codepoint);
- u <<= 4;
- if (-1 == pc.advance())
- return u;
- u |= FA._fromHexChar(pc.codepoint);
- u <<= 4;
- if (-1 == pc.advance())
- return u;
- u |= FA._fromHexChar(pc.codepoint);
- return u;
- default:
- const i: number = pc.codepoint;
- pc.advance();
- return i;
- }
- }
- static _parseSet(pc: _ParseContext) : [boolean, FARange[]]
- {
- const result: FARange[] = new Array<FARange>();
- pc.ensureStarted();
- pc.expecting('[');
- pc.advance();
- pc.expecting();
- let firstRead: boolean = true;
- let firstChar: number = 0;
- let readFirstChar: boolean = false;
- let wantRange: boolean = false;
- var isNot = false;
- if ('^' == String.fromCodePoint(pc.codepoint))
- {
- isNot = true;
- pc.advance();
- pc.expecting();
- }
- while (-1 != pc.codepoint && (firstRead || ']' != String.fromCodePoint(pc.codepoint)))
- {
- if (!wantRange)
- {
- // can be a single char,
- // a range
- // or a named character class
- if ('[' == String.fromCodePoint(pc.codepoint)) // named char class
- {
- const epos: number = pc.position;
- pc.advance();
- pc.expecting();
- if (':' != String.fromCodePoint(pc.codepoint))
- {
- firstChar = '['.codePointAt(0);
- readFirstChar = true;
- }
- else
- {
- firstRead = false;
- pc.advance();
- pc.expecting();
- var ll = pc.capture_buffer.length;
- if (!pc.tryReadUntil(':'.codePointAt(0), false))
- throw "Expecting character class at "+pc.position;
- pc.expecting(':');
- pc.advance();
- pc.expecting(']');
- pc.advance();
- var cls = pc.capture_buffer.substring(ll);
- let ranges: number[] = null;
- if (!FACharacterClasses.known().has(cls))
- throw "Unknown character class \"" + cls + "\" specified at "+ epos;
- ranges = FACharacterClasses.known().get(cls);
- if (ranges != null) {
- result.push(...FARange.toUnpacked(ranges));
- }
- readFirstChar = false;
- wantRange = false;
- firstRead = false;
- continue;
- }
- }
- if (!readFirstChar)
- {
- if ('\\' == String.fromCodePoint(pc.codepoint))
- {
- pc.advance();
- firstChar = FA._parseRangeEscapePart(pc);
- }
- else
- {
- firstChar = pc.codepoint;
- pc.advance();
- pc.expecting();
- }
- readFirstChar = true;
- }
- else
- {
- if ('-' == String.fromCodePoint(pc.codepoint))
- {
- pc.advance();
- pc.expecting();
- wantRange = true;
- }
- else
- {
- result.push(new FARange(firstChar, firstChar));
- readFirstChar = false;
- }
- }
- firstRead = false;
- }
- else
- {
- if ('\\' != String.fromCodePoint(pc.codepoint))
- {
- const ch: number = pc.codepoint;
- pc.advance();
- pc.expecting();
- result.push(new FARange(firstChar, ch));
- }
- else
- {
- var min = firstChar;
- pc.advance();
- result.push(new FARange(min, FA._parseRangeEscapePart(pc)));
- }
- wantRange = false;
- readFirstChar = false;
- }
- }
- if (readFirstChar)
- {
- result.push(new FARange(firstChar, firstChar));
- if (wantRange)
- {
- result.push(new FARange('-'.codePointAt(0), '-'.codePointAt(0)));
- }
- }
- pc.expecting(']');
- pc.advance();
- return [isNot, result];
- }
- private static _parse(pc: _ParseContext, accept: number = 0, compact: boolean = true): FA {
- let result: FA = null;
- let next: FA = null;
- let ich: number;
- pc.ensureStarted();
- while (pc.codepoint!==-1)
- {
- switch (pc.codepoint)
- {
- case -1:
- if (result == null)
- {
- // empty string
- result = new FA(accept);
- }
- return result;
- case '.'.codePointAt(0):
- const dot: FA = FA.set([ new FARange(0, 0x10ffff) ], accept, compact);
- if (null == result)
- result = dot;
- else
- {
- result = FA.concat([result, dot ], accept, compact);
- }
- pc.advance();
- result = FA._parseModifier(result, pc, accept, compact);
- break;
- case '\\'.codePointAt(0):
- pc.advance();
- pc.expecting();
- var isNot = false;
- switch (pc.codepoint)
- {
- case 'P'.codePointAt(0):
- isNot = true;
- // fallthrough
- case 'p'.codePointAt(0):
- pc.advance();
- pc.expecting('{');
- let uc: string = "";
- while (-1 != pc.advance() && '}' != String.fromCodePoint(pc.codepoint))
- uc+=(String.fromCodePoint(pc.codepoint));
- pc.expecting('}');
- pc.advance();
- let uci: number = 0;
- switch (uc)
- {
- case "Pe":
- uci = 21;
- break;
- case "Pc":
- uci = 19;
- break;
- case "Cc":
- uci = 14;
- break;
- case "Sc":
- uci = 26;
- break;
- case "Pd":
- uci = 19;
- break;
- case "Nd":
- uci = 8;
- break;
- case "Me":
- uci = 7;
- break;
- case "Pf":
- uci = 23;
- break;
- case "Cf":
- uci = 15;
- break;
- case "Pi":
- uci = 22;
- break;
- case "Nl":
- uci = 9;
- break;
- case "Zl":
- uci = 12;
- break;
- case "Ll":
- uci = 1;
- break;
- case "Sm":
- uci = 25;
- break;
- case "Lm":
- uci = 3;
- break;
- case "Sk":
- uci = 27;
- break;
- case "Mn":
- uci = 5;
- break;
- case "Ps":
- uci = 20;
- break;
- case "Lo":
- uci = 4;
- break;
- case "Cn":
- uci = 29;
- break;
- case "No":
- uci = 10;
- break;
- case "Po":
- uci = 24;
- break;
- case "So":
- uci = 28;
- break;
- case "Zp":
- uci = 13;
- break;
- case "Co":
- uci = 17;
- break;
- case "Zs":
- uci = 11;
- break;
- case "Mc":
- uci = 6;
- break;
- case "Cs":
- uci = 16;
- break;
- case "Lt":
- uci = 2;
- break;
- case "Lu":
- uci = 0;
- break;
- }
- if (isNot)
- {
- next = FA.set(FARange.toUnpacked(FACharacterClasses.unicodeCategories[uci]), accept, compact);
- }
- else
- next = FA.set(FARange.toUnpacked(FACharacterClasses.notUnicodeCategories[uci]), accept, compact);
- break;
- case 'd'.codePointAt(0):
- next = FA.set(FARange.toUnpacked(FACharacterClasses.digit), accept, compact);
- pc.advance();
- break;
- case 'D'.codePointAt(0):
- next = FA.set(FARange.toNotRanges(FARange.toUnpacked(FACharacterClasses.digit)), accept, compact);
- pc.advance();
- break;
- case 's'.codePointAt(0):
- next = FA.set(FARange.toUnpacked(FACharacterClasses.space), accept, compact);
- pc.advance();
- break;
- case 'S'.codePointAt(0):
- next = FA.set(FARange.toNotRanges(FARange.toUnpacked(FACharacterClasses.space)), accept, compact);
- pc.advance();
- break;
- case 'w'.codePointAt(0):
- next = FA.set(FARange.toUnpacked(FACharacterClasses.word), accept, compact);
- pc.advance();
- break;
- case 'W'.codePointAt(0):
- next = FA.set(FARange.toNotRanges(FARange.toUnpacked(FACharacterClasses.word)), accept, compact);
- pc.advance();
- break;
- default:
- if (-1 != (ich = FA._parseEscapePart(pc)))
- {
- next = FA.literal([ ich ], accept, compact);
- }
- else
- {
- pc.expecting(); // throw an error
- }
- break;
- }
- next = FA._parseModifier(next, pc, accept, compact);
- if (null != result)
- {
- result = FA.concat([result, next], accept, compact);
- }
- else
- result = next;
- break;
- case ')'.codePointAt(0):
- return result;
- case '('.codePointAt(0):
- pc.advance();
- if (String.fromCodePoint(pc.codepoint) == '?')
- {
- pc.advance();
- pc.expecting(':');
- pc.advance();
- }
- pc.expecting();
- next = FA._parse(pc, accept, compact);
- pc.expecting(')');
- pc.advance();
- next = FA._parseModifier(next, pc, accept, compact);
- if (null == result)
- result = next;
- else
- {
- result = FA.concat([ result, next ], accept, compact);
- }
- break;
- case '|'.codePointAt(0):
- if (-1 != pc.advance())
- {
- next = FA._parse(pc, accept, compact);
- result = FA.or([ result, next ], accept, compact);
- }
- else
- {
- result = FA.optional(result, accept, compact);
- }
- break;
- case '['.codePointAt(0):
- const seti = FA._parseSet(pc);
- let set: FARange[] = seti[1];
- set.sort((x, y) => { const c: number = x.min-y.min; if (0 != c) return c; return x.max-y.max; });
- FA._normalizeSortedRangeList(set);
- if (seti[0]===true) {
- set = [...FARange.toNotRanges(set)];
- }
- next = FA.set(set, accept);
- next = FA._parseModifier(next, pc, accept, compact);
- if (null == result)
- result = next;
- else
- {
- result = FA.concat([ result, next ], accept, compact);
- }
- break;
- default:
- ich = pc.codepoint;
- next = FA.literal([ ich ], accept, compact);
- pc.advance();
- next = FA._parseModifier(next, pc, accept, compact);
- if (null == result)
- result = next;
- else
- {
- result = FA.concat([ result, next ], accept, compact);
- }
- break;
- }
- }
- return result;
- }
- public static parse(expression: string, accept: number = 0, compact: boolean = true) : FA {
- const pc = new _ParseContext(expression);
- return FA._parse(pc,accept,compact);
- }
- public static toLexer(tokens: FA[], makeDfa: boolean = true, compact: boolean = true, progress: FAProgress = null) : FA {
- if (makeDfa)
- {
- for (let i: number = 0; i < tokens.length; i++)
- {
- tokens[i] = tokens[i].toMinimizedDfa(progress);
- }
- }
- var result = new FA();
- for (let i: number = 0; i < tokens.length; i++)
- {
- result.addEpsilon(tokens[i], compact);
- }
- if (makeDfa && !result.isDeterministic)
- {
- return result.toDfa(progress);
- }
- else
- {
- return result;
- }
- }
- }
- ///// Example
- // console.log("Visual FA");
- //
- //for(const match of FA.toLexer([FA.parse("\\/\\*",0), FA.parse("[A-Z_a-z][A-Z_a-z0-9]*",1),FA.parse("[0-9]+",2)]).run("foo123 456 bar /* test */ baz789",[FA.parse("\\*\\/").toMinimizedDfa()])) {
- // console.log(match.toString());
- //}
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement