Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class FAInputContext {
- input: Iterable<string>;
- codepoint: number = -2;
- position: number = 0;
- line: number = 1;
- column: number = 0;
- tabWidth: number = 4;
- constructor(input: string, tabWidth:number = 4, position: number = 0, line: number = 1, column: number = 0) {
- this.input = input;
- this.tabWidth = tabWidth;
- this.position = position;
- this.line = line;
- this.column = column;
- }
- advance(): number {
- const it = this.input[Symbol.iterator]();
- const n = it.next();
- if(true===n.done) {
- this.codepoint = -1;
- } else {
- this.codepoint = n.value.codePointAt(0)!;
- }
- switch (this.codepoint) {
- case 10:
- ++this.line;
- this.column = 0;
- break;
- case 13:
- this.column = 0;
- break;
- case 9:
- this.column = (((this.column-1)/this.tabWidth)+1)*this.tabWidth+1;
- break;
- default:
- if(this.codepoint>31)
- ++this.column;
- break;
- }
- ++this.position;
- return this.codepoint;
- }
- }
- class FARange {
- min: number;
- max: number;
- constructor(min: number, max: number) {
- this.min = min;
- this.max = max;
- }
- }
- class FATransition {
- min: number;
- max: number;
- to: FA;
- constructor(min: number, max: number, to: FA) {
- this.min = min;
- this.max = max;
- this.to = to;
- }
- }
- class FA {
- isDeterministic: boolean = true;
- tag: number = -1;
- acceptSymbol: number = -1;
- id: number = -1;
- transitions: FATransition[] = [];
- fillClosure(result: FA[] = []): FA[] {
- if (result.includes(this)) {
- return result;
- }
- // add this instance
- result.push(this);
- // add sub instances
- for (const transition of this.transitions) {
- transition.to.fillClosure(result);
- }
- // Return the list of fa instances
- return result;
- }
- isAccepting(): boolean {
- return this.acceptSymbol > -1;
- }
- addEpsilon(to: FA): void {
- this.isDeterministic = false;
- if (to.isAccepting() && !this.isAccepting()) {
- this.acceptSymbol = to.acceptSymbol;
- }
- for (const fat of to.transitions) {
- this.transitions.push(new FATransition(fat.min,fat.max,fat.to));
- }
- }
- private static _crackSet(str: string, closure:FA[]): FA[] {
- const result : FA[] = [];
- if(str=="") {
- return result;
- }
- const sa: string[] = str.split(" ");
- for(const s of sa) {
- result.push(closure[Number.parseInt(s,10)]);
- }
- return result;
- }
- private static _makeSet(closure: FA[], states: Iterable<FA>): string {
- let result: string = "";
- let delim: string = "";
- let ns : number[] = [];
- for(const fa of states) {
- const idx = closure.indexOf(fa);
- ns.push(idx);
- }
- ns.sort((x,y)=>x-y);
- for(const i of ns) {
- result+=(delim+i.toString(10));
- delim = " ";
- }
- return result;
- }
- toDfa(): FA {
- const closure: FA[] = this.fillClosure();
- const p: Set<number> = new Set<number>();
- for (const ffa of closure) {
- p.add(0);
- for (const t of ffa.transitions) {
- p.add(t.min);
- if (t.max < 0x10ffff) {
- p.add(t.max + 1);
- }
- }
- }
- const points: number[] = [...p];
- points.sort((x, y) => x - y);
- const sets: Map<string, Set<FA>> = new Map<string, Set<FA>>();
- const working: string[] = [];
- const dfaMap: Map<string, FA> = new Map<string, FA>();
- let initial: string = "0";
- sets.set(initial, new Set<FA>([this]));
- working.push(initial);
- const result: FA = new FA();
- if(this.isAccepting()) {
- result.acceptSymbol = this.acceptSymbol;
- }
- dfaMap.set(initial, result);
- while (working.length > 0) {
- const s: string = working[0];
- working.shift();
- const dfa: FA = dfaMap.get(s)!;
- const crackedS1: FA[] = FA._crackSet(s,closure);
- for (const q of crackedS1) {
- if (q.isAccepting()) {
- dfa.acceptSymbol = q.acceptSymbol;
- break;
- }
- }
- let i: number = 0;
- for (const pnt of points) {
- const set: Set<FA> = new Set<FA>();
- for (const c of crackedS1) {
- for (let trns of c.transitions) {
- if (trns.min <= pnt && pnt <= trns.max) {
- set.add(trns.to);
- }
- }
- }
- const skey: string = FA._makeSet(closure,set);
- if (!sets.has(skey)) {
- sets.set(skey, set);
- working.push(skey);
- let newFa: FA = new FA();
- dfaMap.set(skey, newFa);
- }
- const dst: FA = dfaMap.get(skey)!;
- const first: number = pnt;
- let last: number;
- if (i + 1 < points.length) {
- last = points[i + 1] - 1;
- } else {
- last = 0x10ffff;
- }
- dfa.transitions.push(new FATransition(first, last, dst));
- ++i;
- }
- }
- for (const ffa of result.fillClosure()) {
- const itrns: FATransition[] = [...ffa.transitions];
- for (const trns of itrns) {
- const acc: FA[] = trns.to.fillClosure().filter((value: FA, index: number, array: FA[]) => value.isAccepting());
- if (acc.length == 0) {
- ffa.transitions.splice(ffa.transitions.indexOf(trns), 1);
- }
- }
- }
- return result;
- }
- clone(): FA {
- const result: FA[] = [];
- const closure = this.fillClosure();
- for (let j = 0; j < closure.length; ++j) {
- result.push(new FA());
- }
- let i: number = 0;
- for (const fa of closure) {
- result[i].isDeterministic = fa.isDeterministic;
- result[i].acceptSymbol = fa.acceptSymbol;
- result[i].tag = fa.tag;
- for (const trns of fa.transitions) {
- result[i].transitions.push(new FATransition(trns.min, trns.max, result[closure.indexOf(trns.to)]));
- }
- ++i;
- }
- return result[0];
- }
- toString(): string {
- if(this.id < 0) {
- return "[FA]";
- } else {
- return "q"+this.id.toString();
- }
- }
- setIds(): void {
- let i: number = 0;
- for(const fa of this.fillClosure()) {
- fa.id = i;
- ++i;
- }
- }
- static literal(accept: number, codepoints: Iterable<number>): FA {
- const result: FA = new FA();
- let current: FA = result;
- for (const cp of codepoints) {
- current.acceptSymbol = -1;
- const fa: FA = new FA();
- fa.acceptSymbol = accept;
- current.transitions.push(new FATransition(cp, cp, fa));
- current = fa;
- }
- return result;
- }
- static set(accept: number, ranges: Iterable<FARange>): FA {
- const result: FA = new FA();
- const final: FA = new FA();
- final.acceptSymbol = accept;
- for (const range of [...ranges].sort((x, y) => x.max - y.max)) {
- result.transitions.push(new FATransition(range.min, range.max, final));
- }
- return result;
- }
- static concat(accept: number, exprs: Iterable<FA>): FA {
- let result: FA | null = null;
- let left: FA | null = null;
- let right: FA | null = null;
- for (const expr of exprs) {
- let nval = expr.clone();
- if (null === left) {
- if (null === result) {
- result = nval;
- }
- left = nval;
- continue;
- }
- if (null === right) {
- right = nval;
- }
- nval = right.clone();
- FA._concat(left!, nval);
- }
- const target: FA = null !== right ? right! : result!;
- const acc: FA[] = target.fillClosure().filter((value: FA, index: number, array: FA[]) => value.isAccepting());
- for (const afa of acc) {
- afa.acceptSymbol = accept;
- }
- return result!;
- }
- static or(accept: number, exprs: Iterable<FA>): FA {
- const result: FA = new FA();
- const final: FA = new FA();
- final.acceptSymbol = accept;
- for (const expr of exprs) {
- const nfa = expr.clone();
- const nacc: FA[] = nfa.fillClosure().filter((value: FA, index: number, array: FA[]) => value.isAccepting());
- for (const afa of nacc) {
- afa.acceptSymbol = -1;
- afa.addEpsilon(final);
- }
- result.addEpsilon(nfa);
- }
- return result;
- }
- static optional(accept: number, expr: FA): FA {
- const result: FA = expr.clone();
- const acc: FA[] = result.fillClosure().filter((value: FA, index: number, array: FA[]) => value.isAccepting());
- for (const afa of acc) {
- afa.acceptSymbol = accept;
- result.addEpsilon(afa);
- }
- return result;
- }
- static repeat(accept: number, expr: FA, minOccurs: number = -1, maxOccurs: number = -1): FA {
- expr = expr.clone();
- if (minOccurs > 0 && maxOccurs > 0 && minOccurs > maxOccurs) {
- throw Error("Minimum is greater than maximum");
- }
- let result: FA;
- switch (minOccurs) {
- case -1:
- case 0:
- switch (maxOccurs) {
- case -1:
- case 0:
- result = new FA();
- result.acceptSymbol = accept;
- result.addEpsilon(expr);
- const racc: FA[] = expr.fillClosure().filter((value: FA, index: number, array: FA[]) => value.isAccepting());
- for (const afa of racc) {
- afa.addEpsilon(result);
- }
- return result;
- case 1:
- result = this.optional(accept, expr);
- return result;
- default:
- const l: FA[] = [];
- expr = this.optional(accept, expr);
- l.push(expr);
- for (let i = 1; i < maxOccurs; ++i) {
- l.push(expr.clone());
- }
- result = this.concat(accept, l);
- return result;
- }
- case 1:
- switch (maxOccurs) {
- case -1:
- case 0:
- result = this.concat(accept, [expr, FA.repeat(accept,expr, 0, 0)]);
- return result;
- case 1:
- return expr;
- default:
- result = this.concat(accept, [expr, FA.repeat(accept, expr, 0, maxOccurs - 1)]);
- return result;
- }
- default:
- switch (maxOccurs) {
- case -1:
- case 0:
- result = this.concat(accept, [FA.repeat(accept, expr, minOccurs, minOccurs), FA.repeat(accept, expr, 0, 0)]);
- return result;
- case 1:
- throw new Error("Should never get here");
- default:
- if (minOccurs == maxOccurs) {
- const l: FA[] = [];
- expr = this.optional(accept, expr);
- l.push(expr);
- for (let i = 1; i < maxOccurs; ++i) {
- l.push(expr.clone());
- }
- result = this.concat(accept, l);
- return result;
- }
- result = this.concat(accept, [this.repeat(accept, expr, minOccurs, minOccurs), FA.repeat(accept, FA.optional(accept, expr), maxOccurs - minOccurs, maxOccurs - minOccurs)]);
- return result;
- }
- }
- // throw new Error("Should never get here");
- }
- isMatch(str: string): boolean {
- let states: FA[] = [];
- states.push(this);
- let remaining: number = str.length - 1;
- for (const cp of FA.toUtf32(str)) {
- const next: FA[] = FA.fillMove(cp, states);
- if (next.length == 0) {
- if (FA.hasAnyAccepting(states)) {
- return remaining == 0;
- }
- }
- states = next;
- --remaining;
- }
- return FA.hasAnyAccepting(states);
- }
- fillInputTransitionRangesGroupedByState() : Map<FA, FARange[]> {
- const result: Map<FA, FARange[]> = new Map<FA, FARange[]>();
- for(const fat of this.transitions) {
- const res : FARange[] | undefined = result.get(fat.to);
- if(res===undefined) {
- const ndata : FARange[] = [new FARange(fat.min,fat.max)];
- result.set(fat.to, ndata);
- } else {
- res.push(new FARange(fat.min,fat.max));
- }
- }
- for(const values of result.values()) {
- values.sort((x: FARange,y: FARange)=>{ var c = x.min - y.min; if(0!=c) return c; return x.max-y.max;});
- }
- return result;
- }
- toDfaTable(): number[] {
- let fa: FA = this;
- if(!this.isDeterministic) {
- fa = this.toDfa();
- }
- const result: number[] = [];
- const closure: FA[] = fa.fillClosure();
- const stateIndices: number[] = [];
- for(let i: number = 0;i<closure.length;++i) {
- const cfa: FA = closure[i];
- stateIndices.push(result.length);
- result.push(cfa.acceptSymbol);
- const itrgp: Map<FA,FARange[]> = cfa.fillInputTransitionRangesGroupedByState();
- result.push(itrgp.size);
- for(const itr of itrgp.entries()) {
- result.push(closure.indexOf(itr[0]));
- result.push(itr[1].length);
- for(const pair of itr[1]) {
- result.push(pair.min);
- result.push(pair.max);
- }
- }
- }
- let state: number = 0;
- while(state< result.length) {
- ++state;
- const tlen = result[state++];
- for(let i: number = 0;i<tlen;++i) {
- result[state]=stateIndices[result[state]];
- ++state;
- const prlen: number = result[state++];
- state += prlen * 2;
- }
- }
- return result;
- }
- static fromDfaTable(dfa: number[]) : FA {
- if(dfa.length==0) {
- return new FA();
- }
- let si: number = 0;
- const states: Map<number, FA> = new Map<number, FA>();
- while(si<dfa.length) {
- const fa: FA = new FA();
- states.set(si,fa);
- fa.acceptSymbol = dfa[si++];
- const tlen: number = dfa[si++];
- for(let i: number = 0; i< tlen; ++i) {
- ++si; // tto
- const prlen: number = dfa[si++];
- si+=prlen*2;
- }
- }
- si = 0;
- while(si<dfa.length) {
- var fa = states.get(si)!;
- ++si;
- const tlen: number = dfa[si++];
- for (let i: number = 0; i < tlen; ++i) {
- const tto: number = dfa[si++];
- const to: FA = states.get(tto)!;
- const prlen: number = dfa[si++];
- for(let j: number = 0; j <prlen;++j) {
- const fat: FATransition = new FATransition(dfa[si++],dfa[si++],to);
- fa.transitions.push(fat);
- }
- }
- }
- return states.get(0)!;
- }
- static fillMove(codepoint: number, states: Iterable<FA>): FA[] {
- const result: FA[] = [];
- for (const fa of states) {
- for (const fat of fa.transitions) {
- if (fat.min <= codepoint && codepoint <= fat.max) {
- if (!result.includes(fat.to)) {
- result.push(fat.to);
- }
- }
- }
- }
- return result;
- }
- static hasAnyAccepting(states: Iterable<FA>): boolean {
- for (const fa of states) {
- if (fa.isAccepting()) {
- return true;
- }
- }
- return false;
- }
- private static _concat(lhs: FA, rhs: FA): void {
- const acc: FA[] = lhs.fillClosure().filter((value: FA, index: number, array: FA[]) => value.isAccepting() );
- for (const afa of acc) {
- afa.acceptSymbol = -1;
- afa.addEpsilon(rhs);
- }
- }
- static *toUtf32(str: string): Iterable<number> {
- for (const character of str) {
- let cp: number = character.codePointAt(0)!;
- if(cp>=0xD800 && cp<=0xDBFF) { // hi surrogate
- const cpl: number = character.codePointAt(1)!
- if(!(cpl>=0xDC00 && cpl<=0xDFFF)) { // not a low surrogate
- throw new Error("Unicode stream error. Unterminated high surrogate");
- }
- const highValue = cp & 0b11_11111111;
- const lowValue = cpl & 0b11_11111111;
- const magicAdd = 0b1_00000000_00000000;
- cp = (highValue << 10) | lowValue + magicAdd;
- }
- yield cp;
- }
- }
- }
- const foo: FA = FA.literal(0, FA.toUtf32("foo"));
- const ba: FA = FA.literal(0, FA.toUtf32("ba"));
- const rcp: number = "r".codePointAt(0)!;
- let zcp: number = 0;
- for(const num of FA.toUtf32("𠮷")) {
- zcp = num;
- }
- const rz = FA.set(0, [new FARange(rcp, rcp), new FARange(zcp, zcp)]);
- const barz = FA.concat(0,[ba,rz]);
- let fooOrBarz = FA.repeat(0,FA.or(0,[foo,barz]),1,0).toDfa();
- const table = fooOrBarz.toDfaTable();
- fooOrBarz = FA.fromDfaTable(table);
- console.log(fooOrBarz.isMatch("foo"));
- console.log(fooOrBarz.isMatch("barfooba𠮷"));
- console.log(fooOrBarz.isMatch("test"));
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement