Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // стартовый символ
- textStart = '<программа>'
- // терминал переменной
- textVar = '<ид.>'
- // терминал числа
- textNumber = '<цел.>'
- // правила
- textRules = `
- <программа> ::= <совок.опер.>
- <совок.опер.> ::= <оператор> <совок.опер.> |
- <оператор> ::= <опер.присв.> | <усл.опер.> | <опер.цикла>
- <опер.присв.> ::= <ид.> = <ар.выр.> ;
- <усл.опер.> ::= ЕСЛИ ( <лог.выр.> ) <совок.опер.> ИНАЧЕ <совок.опер.> КОНЕЦ_ЕСЛИ
- <опер.цикла> ::= ЦИКЛ_ПОКА ( <лог.выр.> ) <совок.опер.> КОНЕЦ_ЦИКЛ
- <ар.выр.> ::= <E>
- <E> ::= <T> <E-список>
- <E-список> ::= + <T> <E-список> |
- <T> ::= <F> <T-список> |
- <T-список> ::= * <F> <T-список> |
- <F> ::= <ид.> | <цел.>
- <лог.выр.> ::= <F> <лог.опер.> <F>
- <лог.опер.> ::= > | <
- `
- // код
- textCode = `
- d=2*3+1;
- a=3+2*d;
- c=2*d+3*a;
- ЕСЛИ (d>c)
- a=d*2;
- ИНАЧЕ
- a=d;
- КОНЕЦ_ЕСЛИ
- i=1;
- ЦИКЛ_ПОКА(i<10)
- i=i+1;
- КОНЕЦ_ЦИКЛ
- `
- // textRules = `
- // <start> ::= <program>
- // <program> ::= <op_list>
- // <op_list> ::= <op> <op_list>
- // <op_list> ::=
- // <op> ::= <id> := <E> ;
- // <op> ::= <if>
- // <op> ::= <for>
- // <if> ::= ЕСЛИ ( <expression> ) ТО <op> <else>
- // <else> ::= ИНАЧЕ <op>
- // <else> ::=
- // <for> ::= for ( <id> := <F> , <F> <increment> ) <op>
- // <increment> ::= , <F>
- // <increment> ::=
- // // <E> ::= <T> [ - <T> ]*
- // <E> ::= <T> <E-list>
- // <E-list> ::= - <T> <E-list>
- // <E-list> ::=
- // <T> ::= <F> <T-list>
- // <T-list> ::= % <F> <T-list>
- // <T-list> ::=
- // <F> ::= <id>
- // <F> ::= <C>
- // <expression> ::= <F> <logic> <F>
- // <logic> ::= >
- // <logic> ::= <
- // `
- // textNumber = '<C>'
- // textVar = '<id>'
- // textCode = `
- // d := 32 - 1 ;
- // a := 3 - 254 % d ;
- // c := 200 % d - 367 % a ;
- // ЕСЛИ ( d > c ) ТО a := d % 2 ; ИНАЧЕ a := d % 3 ;
- // h := 10 ;
- // for ( i := 1 , d , 2 ) h := h - 1 ;
- // `
- log = console.log
- Array.prototype.unique = function(){
- return Array.from(new Set(this))
- }
- // массив правил: [ {"index":0,"from":"<start>","to":["<program>"]}, ...]
- rules = textRules.split('\n')//
- .filter(e=>e && !e.startsWith('//'))//
- .map(e=>e.split('::='))//
- .flatMap(([k,v])=>v.split('|').map(e=>[k, e]))//
- .map(([k,v],i)=>({
- index: i,
- from: k.trim(),
- to: v.split(/\s+/).filter(Boolean)//
- }))
- log(rules)
- s = rules.map(e=>e.from).unique()
- a = rules.flatMap(e=>e.to).unique().filter(e=>!s.includes(e))
- //
- // ["for", "(", ",", ")", "<number>", "if", "then", "else", ";", "<variable>", ":=", ">", "-", "%",
- // "<start>", "<program>", "<for>", "<increment>", "<if>", "<else>", "<block>", "<statement>",
- // "<expression>", "<compare>", "<compare2>", "<minus>", "<minus2>", "<div>", "<div2>"]
- types = a.concat(s).unique()
- //
- words = a.filter(e=>!e.match(/<.*>/))
- lexer = {
- string: textCode,
- index: 0,
- finished: false,
- words: words,
- terms: types,
- get char() {
- return this.string[this.index]
- },
- get nextChar() {
- return this.string[this.index + 1]
- },
- startIndex: 0,
- get slice() {
- return this.string.slice(this.startIndex, this.index)
- },
- move() {
- this.index++
- return this
- },
- isDigit(c) {
- return '0' <= c && c <= '9'
- },
- isLetter(c) {
- return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z'
- || 'А' <= c && c <= 'Я' || c == '_'
- },
- isWhitespace(c) {
- return ' \n\t\r'.includes(c)
- },
- Lexeme: function(type, value='') {
- this.type = type
- if (value)
- this.value = value
- },
- error(name='') {
- let slice = this.string.slice(this.index < 6 ? 0 : this.index - 6, this.index + 5)
- this.move()
- return new this.Lexeme('error',//
- `Error${name && (': ' + name + ' ')}\n at char ${this.index} (${slice})`)
- },
- getLexeme() {
- while (this.isWhitespace(this.char))
- this.move()
- this.startIndex = this.index
- if (!this.char) {
- if (this.finished)
- return null
- this.finished = true
- return new this.Lexeme('◁')
- }
- if (this.isDigit(this.char)) {
- while (this.isDigit(this.char))
- this.move()
- if (this.isLetter(this.char))
- return this.error('malformed number')
- return new this.Lexeme(textNumber,+this.slice)
- }
- if (this.isLetter(this.char)) {
- while (this.isLetter(this.char) || this.isDigit(this.char))
- this.move()
- if (this.words.includes(this.slice))
- return new this.Lexeme(this.slice)
- return new this.Lexeme(textVar,this.slice)
- }
- this.move()
- if (this.words.includes(this.slice)) {
- return new this.Lexeme(this.slice)
- }
- this.move()
- if (this.words.includes(this.slice)) {
- return new this.Lexeme(this.slice)
- }
- return this.error('unknown character')
- },
- [Symbol.iterator]: function*() {
- for (let lexeme = this.getLexeme(); lexeme; lexeme = this.getLexeme()) {
- yield lexeme
- }
- },
- reset() {
- this.index = 0
- this.finished = false
- return this
- },
- }
- //lexer.string = lexer.string.replace(/(?<=\W|^)\s+|\s+(?=\W|$)/g,'')
- log(lexer)
- a = [...lexer]
- log(a)
- rules = textRules.split('\n')//
- .filter(e=>e && !e.startsWith('//'))//
- .map(e=>e.split('::='))//
- .flatMap(([k,v])=>v.split('|').map(e=>[k, e]))//
- .map(([k,v],i)=>({
- index: i,
- from: k.trim(),
- to: v.split(/\s+/).filter(Boolean)
- }))
- void function() {
- let Rule = class Rule {
- toString() {
- return `#${this.index} ${this.from} → ${this.to.join(' ')}`
- }
- toJSON() {
- return `#${this.index} ${this.from} → ${this.to.join(' ')} `
- }
- }
- rules.map(e=>Object.setPrototypeOf(e, Rule.prototype))
- }()
- log(rules)
- s = rules.map(e=>e.from).unique()
- a = rules.flatMap(e=>e.to).unique().filter(e=>!s.includes(e))
- types = a.concat(s).unique()
- words = a.filter(e=>!e.match(/<.*>/))
- nlist = s
- NonTerminal = function(type) {
- this.type = type
- }
- NonTerminal.prototype.isNonTerminal = true
- parser = {
- NonTerminal,
- rules: rules,
- nlist: nlist,
- nonts: Object.assign({
- [Symbol.iterator]: function*() {
- yield*Object.values(this)
- },
- }, Object.fromEntries(nlist.map(e=>[e, new NonTerminal(e)]))),
- calcFirstOfNont(n) {
- for (let r of n.rules)
- if (r.nullable)
- n.nullable = true
- return n.first = n.rules.flatMap(r=>this.calcFirstOfRule(r)).unique()
- },
- calcFirstOfRule(r) {
- let first = []
- let to = r.to.slice()
- // can skip nullable nonterminals
- while (to.length && this.nonts[to[0]] && this.nonts[to[0]].nullable)
- first.push(this.nonts[to.shift()].first)
- if (!to.length)
- r.nullable = true
- else
- first.push(this.nonts[to[0]] ? this.nonts[to[0]].first : [to[0]])
- return r.first = first.flat().unique()
- },
- calcFirsts() {
- for (let n of this.nonts)
- n.first = []
- for (let r of this.rules)
- r.first = []
- for (let n of this.nonts) {
- for (let n of this.nonts) {
- this.calcFirstOfNont(n)
- }
- }
- },
- calcFollowInSeq(n, to, from) {
- let follow = []
- // to is whatever goes after this nonterminal in rule
- while (this.nonts[to[0]] && this.nonts[to[0]].nullable)
- follow.push(this.nonts[to.shift()].first)
- if (!to.length)
- // all are nullable, going up
- follow.push(this.nonts[from].follow)
- else
- follow.push(this.nonts[to[0]] ? this.nonts[to[0]].first : [to[0]])
- return follow.flat().unique()
- },
- calcFollowOfNont(n) {
- // find possible first values after nullable nonterminal
- // if (!n.nullable)
- // return;
- let follow = rules.flatMap(r=>r.to.map((e,i,a)=>e == n.type && a.slice(i + 1)).filter(Boolean).flatMap(to=>this.calcFollowInSeq(n, to, r.from)))
- return n.follow = follow.concat(n.start ? [null] : []).flat().unique()
- },
- calcFollows() {
- for (let n of this.nonts)
- n.follow = []
- this.nonts[textStart].start = true
- for (let n of this.nonts)
- for (let n of this.nonts)
- this.calcFollowOfNont(n)
- },
- calcChoiceOfRule(r) {
- return r.choice = !r.nullable ? r.first : r.first.concat(this.nonts[r.from].follow).unique()
- },
- calcChoiceOfNont(n) {
- let choice = n.rules.flatMap(e=>e.choice)
- if (choice.unique().length != choice.length) {
- if (n.rules.length == 2 && n.rules.find(e=>!e.to.length)) {
- console.warn('choice is not strict, choosing longer', n.type, ...n.rules.flatMap(e=>[e, e.choice]))
- let short = n.rules.find(r=>!r.to.length)
- let long = n.rules.find(r=>r.to.length)
- short.choice = short.choice.filter(e=>!long.choice.includes(e))
- short = short
- } else {
- console.error(n, n.n, n.rules.map(e=>e.choice))
- throw new Error('choice is not strict')
- }
- }
- return n.choice = Object.fromEntries(n.rules.flatMap(r=>r.choice.map(c=>[c, r])))
- },
- calcChoice() {
- for (let r of this.rules)
- this.calcChoiceOfRule(r)
- for (let n of this.nonts)
- this.calcChoiceOfNont(n)
- },
- calcTable() {
- let terminals = parser.rules.flatMap(e=>e.to).filter(e=>!parser.nonts[e]).unique();
- terminals.push('◁')
- let stackSymbols = Object.keys(this.nonts).concat()
- let stackTerminals = parser.rules.map(e=>e.to.slice(1)).flat().filter(e=>!parser.nonts[e]).unique()
- stackSymbols = stackSymbols.concat(stackTerminals, ['◁'])
- log(stackSymbols)
- let row = ()=>Object.fromEntries(terminals.map(k=>[k, null]))
- let table = Object.fromEntries(stackSymbols.map(k=>[k, row()]))
- for (let k of stackTerminals) {
- table[k][k] = -1
- }
- table['◁']['◁'] = 'Допустить'
- for (let rule of this.rules) {
- for (let c of rule.choice) {
- let v = table[rule.from][c || '◁']
- table[rule.from][c || '◁'] = v ? v + ' ' + rule.index : rule.index
- }
- }
- console.table(Object.fromEntries(Object.entries(table).map(([k,v])=>[k,Object.fromEntries(Object.entries(v).map(([k2,v2])=>[k2,typeof v2=='number'&&v2>-1?v2+1:v2]))])))
- this.table = table
- },
- init() {
- for (let n of this.nonts)
- n.rules = this.rules.filter(e=>e.from == n.type)
- this.calcFirsts()
- this.calcFollows()
- this.calcChoice()
- this.calcTable()
- return this
- },
- parseStep(l, stack) {
- let v = stack.pop()
- let rn = this.table[v][l.type]
- let r = this.rules[rn] || []
- let to = r.to
- log(l, v, stack.slice().reverse(), r + '', to && to.slice().reverse())
- if (rn == 'Допустить')
- return true
- if (rn == -1) {
- return true
- // next lexeme
- }
- if (rn == null) {
- console.error('error', v, l.type, l, stack)
- return true
- }
- if (!to.length)
- return false
- if (this.nonts[to[0]]) {
- // nont
- to.slice().reverse().map(e=>stack.push(e))
- return false
- } else {
- to.slice(1).reverse().map(e=>stack.push(e))
- return true
- }
- },
- parse() {
- lexer.reset()
- let stack = ['◁', textStart]
- while (!lexer.finished && stack.length) {
- let l = lexer.getLexeme()
- while (!this.parseStep(l, stack))
- ;
- }
- },
- }
- parser.init()
- parser.parse()
- // r = Object.fromEntries(Object.entries(parser.rules).filter(([k,v])=>k != 'rules')//
- // .map(([k,v])=>[k, Object.fromEntries(Object.entries(v).map(([k,v])=>[k, typeof v == 'object' ? (''+v) : v]))]))
- // console.table(r)
- // r = Object.fromEntries(Object.entries(parser.nonts).filter(([k,v])=>k != 'rules')//
- // .map(([k,v])=>[k, Object.fromEntries(Object.entries(v).map(([k,v])=>[k, typeof v == 'object' ? (''+v) : v]))]))
- // console.table(r)
- // console.table(Object.fromEntries([...parser.nonts].map(e=>[e.type, Object.fromEntries(Object.entries(e.choice).map(([k,v])=>[k, v && (v + '')]))])))
- r = parser.rules.map(r=>[r.from + ' ::= ' + r.to.join(' '),r])//
- .map(([s,r])=>(s+' '.repeat(60)).slice(0,60)+s.slice(60) + ' >>> ' +r.choice.map(e=>e||'END_OF_FILE').join(' ')).join('\n')
- console.log(r)
- parser
- r = parser.rules.map(r=>[(' '+(1+r.index)).slice(-2) + ' ' + r.from + ' ::= ' + r.to.join(' ')])//
- //.map(([s,r])=>s + '\n\t\t ' +r.choice.map(e=>e||'END_OF_FILE').join(' '))
- .join('\n')
- console.log(r)
- r = parser.rules.map(r=>'Выбор(' + (' '+(1+r.index)).slice(-2) + ') = { ' + r.choice.map(e=>e||'⊥').join(' , ') + ' }').join('\n')
- console.log(r)
- // r = parser.rules.map(r=>[(' #'+(1+r.index)).slice(-3) + ' ' + r.to.join(' '),!!parser.nonts[r.to[0]]])//
- // .map(([s,r])=>s + ' ' + r).join('\n')
- // console.log(r)
- parser.rules.map(r=>r.hold = !r.to[0] || parser.nonts[r.to[0]])
- r = parser.rules.map(r=>[r, r.hold ? `Заменить( ${r.to.join(' ')} ), Держать` : `Заменить( ${r.to.slice(1).join(' ')} ), Сдвиг`])//
- .map(([r,s])=>` #${r.index+1} `.slice(-4) + s.replace('Заменить( ),','Вытолк,')) //
- .join('\n')
- console.log(r)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement