Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: latin-1 -*-
- try: #True/False definition
- True, False
- except NameError:
- True, False = (1==1), (0==1)
- def hex2int(str):
- hexref = {'0':0,'1':1,'2':2,'3':3,'4':4,'5':5,'6':6,'7':7,'8':8,'9':9,'A':10,'B':11,'C':12,'D':13,'E':14,'F':15}
- return hexref[str[0]]*16 hexref[str[1]]
- def str2hex(str):
- return chr(hex2int(str))
- def hex2ascii(strHex):
- out = ''
- strHex = strHex.replace("\n",'').replace("\t",'').replace(" ",'')
- for i in range(0,len(strHex),2):
- out = str2hex(strHex[i:i 2])
- return out
- def int2hex(val):
- return hex(int(val)).replace('x','').upper()
- # -----
- # ASM
- # -----
- class asm:
- pc = 0x100
- all = ''
- fixup = []
- movref = {'[AX]':'A1','AX':'B8','AL':'B0','BX':'BB','BL':'B3','CX':'B9','CH':'B5','DX':'DA','DL':'B2'}
- fmovref = {'AX':'A3'}
- pushref = {'AX':'50','DX':'52'}
- popref = {'AX':'58','BX':'5B'}
- addref = {'AX':'01','BX':'D8','CX':'C1'}
- imulref = {'BX':'EB'}
- negref = {'AX':'D8'}
- ifref = {'=':'7403','<>':'7503','<':'7C03','<=':'7E03','>':'7F03','>=':'7D03','odd':'24013C017403'}
- cmpref = {'AX':'39','BX':'C3'}
- pcon = True
- def __init__(this,fileName):
- this.comFile = file(fileName '.com','wb')
- def script(this,inst):
- if this.pcon == True:
- this.pc = len(inst)/2
- this.comFile.write(hex2ascii(inst))
- return inst
- def jmp(this,pTo,pFrom=-1):
- this.all = 'JMP(%s,%s) \n' % (pTo,pFrom)
- return this.script('E9' this.address(pTo,pFrom))
- def call(this,pTo,pFrom=-1):
- this.all = 'CALL(%s,%s) \n' % (pTo,pFrom)
- return this.script('E8' this.address(pTo,pFrom))
- def address(this,pTo,pFrom=-1):
- if pFrom < 0: pFrom = this.pc
- dis = hex(0xFFFF-pFrom pTo-2).upper()
- return dis[-2:] dis[-4:-2]
- def write(this,txt):
- this.all = 'WRITE <str> \n'
- for c in txt:
- this.mov('DL',hex(ord(c))[2:].upper())
- this.call(0x107)
- def neg(this,reg):
- this.all = 'NEG(%s) \n' % (reg)
- return this.script('F7' this.negref[reg])
- def mov(this,to,val):
- this.all = 'MOV(%s,%s) \n' % (to,val)
- if to in this.movref:
- code = this.movref[to]
- elif val in this.fmovref:
- code = this.fmovref[val]
- aux = val
- val = to
- to = aux
- if type(val)==int:
- val = int2hex(val)
- if to[-1]=='X' or to[-1]==']':
- val = '0'*(4-len(val)) val
- return this.script(code val[-2:] val[-4:-2])
- def increment(this,reg,val=1):
- this.mov('[AX]', reg)
- this.push('AX')
- this.mov('AX', val)
- this.push('AX')
- this.pop('AX')
- this.pop('BX')
- this.add('AX','BX')
- this.mov(reg, 'AX')
- def imul(this,reg):
- this.all = 'iMUL(%s) \n' % (reg)
- return this.script('F7' this.imulref[reg])
- def mul(this):
- this.all = 'MUL(AX,BX) \n'
- return this.pop('AX') this.pop('BX') this.imul('BX') this.push('AX')
- def div(this):
- this.all = 'DIV \n'
- return this.script('585B933D00007C06BA0000E90300BAFFFFF7FB50')
- def ret(this):
- this.all = 'RET \n'
- return this.script('C3')
- def cmp(this,r1,r2):
- this.all = 'CMP((%s,%s) \n' % (r1,r2)
- return this.script(this.cmpref[r1] this.cmpref[r2])
- def op(this,op):
- this.all = 'OP( %s ) \n' % (op)
- return this.script(this.ifref[op])
- def end(this):
- this.all = 'END \n'
- return this.script('B8004CCD21')
- def push(this,reg):
- this.all = 'PUSH(%s) \n' % (reg)
- return this.script(this.pushref[reg])
- def pop(this,reg):
- this.all = 'POP(%s) \n' % (reg)
- return this.script(this.popref[reg])
- def add(this,r1,r2):
- this.all = 'ADD(%s,%s) \n' % (r1,r2)
- return this.script(this.addref[r1] this.addref[r2])
- def setFixup(this,type):
- this.all = '<fixup>(%s) \n\n' % (type)
- if len(this.fixup) > 0:
- address = this.fixup.pop()
- else:
- address = 0x100
- this.comFile.seek(address-0x100)
- this.pcon = False
- if type == 'while':
- this.jmp(this.pc 3,address)
- address = this.fixup.pop()
- else:
- this.jmp(this.pc,address)
- this.pcon = True
- this.comFile.seek(0,2)
- return address
- def close(this):
- this.comFile.close()
- # -------------------------------------------
- # -----
- # LEX
- # -----
- class Lex:
- tRef = ('<op>','<del>','<nl>','<id>','<int>','<kw>','<str>')
- delDic = {0:(':=','<>','<=','>=','==','=',' ','-','*','/','<','>'), #Tuple, constantes
- 1:('[',']','(',')',';','.',','),
- 2:("\n","\t",' ')
- }
- ch = 'qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM00123456789'
- num = '0123456789'
- fileName = ''
- kw = ('procedure','const','while','begin','then','call','var','end','do','if','write','writeln','readln','for','to')
- tokenList = []
- lexemList = []
- lineList = []
- index = -1
- listCount = 0
- def __init__(this,fileName):
- try:
- this.fileName = fileName
- lexFile = file(fileName,'r') #codigo a analizar
- except IOError, e:
- import sys
- print e
- sys.exit() #termina el programa
- this.fileLines = lexFile.readlines() #lee el archivo completo y lo mete linea por linea en una lista
- this.generateTokens()
- this.token = this.tRef[this.tokenList[0]]
- this.lexem = this.lexemList[0]
- this.listCount = len(this.tokenList)
- def __repr__(this):
- return this.lexem
- def __iter__(this):
- return this
- def reset(this):
- this.index = 0
- def next(this,step=1):
- this.index = step
- if this.index>=this.listCount:
- return False
- this.token = this.tRef[this.tokenList[this.index]] #pop
- this.lexem = this.lexemList[this.index] #pop
- this.line = this.lineList[this.index] #pop
- return True
- def prev(this):
- return this.next(-1)
- def generateTokens(this):
- lineNro = 1
- for line in this.fileLines: #recorro el archivo por lineas
- startPos = endPos = 0
- lineLen = len(line)
- while endPos <= lineLen:
- isId = 1
- for lexType in this.delDic:
- for word in this.delDic[lexType]:
- endPos = startPos len(word)
- if line[startPos:endPos]==word:
- if lexType != 2: #ignora <nl>
- this.lexemList = [word]
- this.tokenList = [lexType]
- this.lineList = [lineNro]
- startPos = endPos
- isId = 0
- if isId:
- this.lineList = [lineNro]
- wordPos = startPos
- if line[startPos:endPos] == "'":
- startPos = 1
- endPos = 1
- while line[startPos:endPos] != "'":
- startPos = 1
- endPos = 1
- startPos = 1
- endPos = 1
- if wordPos != startPos:
- this.lexemList = [line[wordPos 1:endPos-2]]
- this.tokenList = [6] #str
- continue
- while line[startPos:endPos] in this.num:
- startPos = 1
- endPos = 1
- if wordPos != startPos:
- this.lexemList = [line[wordPos:endPos-1]]
- this.tokenList = [4] #int
- continue
- #wordPos = startPos
- while line[startPos:endPos] in this.ch:
- startPos = 1
- endPos = 1
- this.lexemList = [line[wordPos:endPos-1]]
- if this.lexemList[-1] in this.kw:
- this.tokenList = [5] #kw
- else:
- this.tokenList = [3] #id
- lineNro = 1
- # -----
- # SEM
- # -----
- class sem:
- id = []
- type = []
- value = []
- def add(this,id,type,value):
- this.id = [id]
- this.type = [type]
- this.value = [value]
- def __str__(this):
- print "ID\tTYPE\tVALUE"
- for i in range(len(this.id)):
- print this.id[i],"\t",this.type[i],"\t",this.value[i]
- return ''
- def search(this,id):
- for i in range(len(this.id)):
- if this.id[i] == id:
- return i
- def getType(this,id):
- return this.type[this.search(id)]
- def getVal(this,id):
- return this.value[this.search(id)]
- # -----
- # PL0
- # -----
- class PL0:
- asm
- semList = sem()
- varInit = 0x02BA
- def __init__(this,symbol):
- this.asm = asm(symbol.fileName)
- io = '043088C2B402CD21C3B402B20DCD21B20ACD21C33D0080751FB22DE8E6FFB003E8DDFFB002E8D8FFB007E8D3FFB006E8CEFFB008E8C9FFC33D00007D0A50B402B22DCD2158F7D83D0A007C433D64007C313DE8037C1F3D10277C0DBA0000BB1027F7FB52E899FF58BA0000BBE803F7FB52E88CFF58BA0000BB6400F7FB52E87FFF58BA0000BB0A00F7FB52E872FF58E86EFFC3B90000B303B407CD213C0D7503E9F8003C087503E97E003C2D7503E9D7003C307CE33C397FDF2C3080FB0074D880FB02750A81F9000075043C0074C980FB03750B3C007505B300E90200B30181F9CC0C7FB381F934F37CAD88C7B80A00F7E93DF87F74083D0880740BE9100080FF077E0BE991FF80FF087E03E989FFB50088F980FB02740501C1E9030029C89188F8E8DBFEE970FF80FB037503E968FFB402B208CD21B220CD21B208CD2180FB007505B303E950FF80FB02750B81F900007505B303E940FF89C8B90A00BA00003D00007D09F7D8F7F9F7D8E90200F7F989C181F900007403E91DFF80FB027503E915FFB303E910FF80FB037403E908FFB402B22DCD21B302E9FDFE80FB037503E9F5FE80FB02750981F900007503E9E7FEE855FE89C8C3'
- vrs = '90'*256
- this.symbol = symbol
- this.asm.jmp(0x03BA) #JMP 03CD
- this.asm.script(io vrs)
- def __str__(this):
- while this.symbol.next():
- print this.symbol,this.symbol.token
- return ''
- def die(this,msg):
- import sys
- print msg
- sys.exit()
- def check(this,s1,s2):
- if s1 in s2:
- this.symbol.next()
- return True
- else:
- this.error(s2)
- def error(this,s2):
- import os
- this.asm.close()
- os.remove(this.symbol.fileName '.com')
- this.die('Se esperaba "' s2 '", no "' this.symbol.lexem '"' ' [linea ' str(this.symbol.line) ']')
- def programa(this):
- this.symbol.next()
- this.bloque()
- if this.symbol.lexem=='.':
- #print 'Proceso exitoso!'
- this.asm.end()
- this.asm.close()
- else:
- this.error('.')
- def bloque(this):
- if this.symbol.lexem == 'const': #CONST
- this.symbol.next()
- while True:
- this.semList.id = [this.symbol.lexem]
- this.semList.type = ['const']
- this.check(this.symbol.token, ('<id>')) #check controla y lee el proximo simbolo
- this.check(this.symbol.lexem, ('='))
- this.semList.value = [int2hex(this.symbol.lexem)]
- this.check(this.symbol.token, ('<int>'))
- if this.symbol.lexem != ',': break
- this.symbol.next()
- this.check(this.symbol.lexem, (';'))
- if this.symbol.lexem == 'var': #VAR
- this.symbol.next()
- while True:
- this.semList.add(this.symbol.lexem,'var',this.varInit)
- this.varInit = 2
- this.check(this.symbol.token, ('<id>'))
- if this.symbol.lexem != ',': break
- this.symbol.next()
- this.check(this.symbol.lexem, (';'))
- while this.symbol.lexem == 'procedure': #PROCEDURE
- this.symbol.next()
- if this.symbol.token == '<id>':
- this.semList.add(this.symbol.lexem,'procedure',this.asm.pc)
- else:
- this.error('<id>')
- this.symbol.next()
- this.check(this.symbol.lexem, (';'))
- this.bloque()
- this.check(this.symbol.lexem, (';'))
- this.asm.ret()
- this.asm.setFixup('init')
- this.proposicion()
- def proposicion(this):
- lexem = this.symbol.lexem
- if this.symbol.token == '<id>': #ASSIGN
- semIndex = this.semList.search(lexem)
- if semIndex == None:
- this.error(this.symbol.token ' ' lexem)
- if this.semList.type[semIndex] == 'var':
- this.symbol.next()
- this.check(this.symbol.lexem, (':='))
- this.expresion()
- this.asm.pop('AX')
- this.asm.mov(this.semList.value[semIndex],'AX')
- else:
- this.error('<var>')
- elif lexem=='call': #CALL
- this.symbol.next()
- if this.symbol.token == '<id>':
- semIndex = this.semList.search(this.symbol.lexem)
- if semIndex == None:
- this.error(this.symbol.token ' ' this.symbol.lexem)
- this.symbol.next()
- if this.semList.type[semIndex] == 'procedure':
- this.asm.call(this.semList.value[semIndex])
- else:
- this.error('<id>')
- elif lexem == 'begin': #BEGIN
- this.symbol.next()
- this.proposicion()
- while this.symbol.lexem == ';':
- this.symbol.next()
- this.proposicion()
- this.check(this.symbol.lexem, ('end'))
- elif lexem == 'if': #IF
- this.symbol.next()
- this.condicion()
- this.check(this.symbol.lexem, ('then'))
- this.proposicion()
- this.asm.setFixup('if')
- elif lexem == 'readln': #READLN
- this.symbol.next()
- this.check(this.symbol.lexem, ('('))
- if this.symbol.token == '<id>':
- this.asm.call(0x196)
- this.asm.mov(this.semList.getVal(this.symbol.lexem),'AX')
- this.symbol.next()
- else:
- this.error('<id>')
- while this.symbol.lexem == ',':
- this.symbol.next() #tmp
- this.check(this.symbol.token, ('<id>'))
- this.check(this.symbol.lexem, (')'))
- elif lexem == 'write' or lexem == 'writeln': #WRITE & WRITELN
- this.symbol.next()
- if this.symbol.lexem == '(':
- this.symbol.next()
- if this.symbol.token == '<str>': #STRING
- this.asm.write(this.symbol.lexem)
- this.symbol.next()
- else: #EXPRESION
- this.expresion()
- this.asm.pop('AX')
- this.asm.call(0x117) #AX print nro
- while this.symbol.lexem == ',':
- this.symbol.next()
- if this.symbol.token == '<str>': #STRING
- this.asm.write(this.symbol.lexem)
- this.symbol.next()
- else: #EXPRESION
- this.expresion()
- this.asm.pop('AX')
- this.asm.call(0x117)
- this.check(this.symbol.lexem, (')'))
- if lexem=='writeln':
- this.asm.call(0x10C)
- elif lexem == 'while': #WHILE
- this.symbol.next()
- this.asm.fixup = [this.asm.pc]
- this.condicion()
- this.check(this.symbol.lexem, ('do'))
- this.proposicion()
- this.asm.jmp(this.asm.setFixup('while'))
- elif lexem == 'for': #FOR
- this.symbol.next()
- lexem = this.symbol.lexem
- this.asignacion()
- this.asm.fixup = [this.asm.pc] # for begin
- this.condicionFor(lexem)
- this.check(this.symbol.lexem, ('do'))
- this.proposicion()
- semIndex = this.semList.search(lexem)
- this.asm.increment(this.semList.value[semIndex])
- this.asm.jmp(this.asm.setFixup('while')) # end
- def condicionFor(this,lexem):
- semIndex = this.semList.search(lexem)
- if semIndex == None:
- this.error(this.symbol.token ' ' lexem)
- else:
- this.asm.mov('[AX]',this.semList.value[semIndex])
- this.asm.push('AX')
- op = '<='
- this.check(this.symbol.lexem, ('to'))
- this.expresion()
- this.asm.pop('AX')
- this.asm.pop('BX')
- this.asm.cmp('AX','BX')
- this.asm.op(op)
- this.asm.fixup = [this.asm.pc]
- this.asm.jmp(0) #fake jump
- def asignacion(this):
- lexem = this.symbol.lexem
- this.check(this.symbol.token, ('<id>'))
- semIndex = this.semList.search(lexem)
- if semIndex == None:
- this.error(this.symbol.token ' ' lexem)
- if this.semList.type[semIndex] == 'var':
- this.check(this.symbol.lexem, (':='))
- this.expresion()
- this.asm.pop('AX')
- this.asm.mov(this.semList.value[semIndex],'AX')
- else:
- this.error('<var>')
- def condicion(this):
- if this.symbol.lexem == 'odd': #ODD
- op = this.symbol.lexem
- this.symbol.next()
- else:
- this.expresion()
- op = this.symbol.lexem
- this.check(this.symbol.lexem, ('=','<>','<','<=','>','>='))
- this.expresion()
- this.asm.pop('AX')
- if op != 'odd':
- this.asm.pop('BX')
- this.asm.cmp('AX','BX')
- this.asm.op(op)
- this.asm.fixup = [this.asm.pc]
- this.asm.jmp(0) #fake jump
- def expresion(this):
- op = this.symbol.lexem
- if op == ' ': #PLUS
- this.symbol.next()
- elif op == '-':
- this.symbol.next()
- this.termino()
- if op == '-':
- this.asm.pop('AX')
- this.asm.neg('AX')
- this.asm.push('AX')
- while this.symbol.lexem in (' ','-'):
- op = this.symbol.lexem
- this.check(op, (' ','-'))
- this.termino()
- if op==' ':
- this.asm.pop('AX')
- this.asm.pop('BX')
- this.asm.add('AX','BX')
- this.asm.push('AX')
- else:
- this.asm.script('585B9329D850')
- def termino(this):
- this.factor()
- while this.symbol.lexem in ('*','/'):
- op = this.symbol.lexem
- this.check(op, ('*','/'))
- this.termino()
- if op == '*':
- this.asm.mul()
- else:
- this.asm.div()
- def factor(this):
- if this.symbol.token == '<id>':
- semIndex = this.semList.search(this.symbol.lexem)
- if semIndex == None:
- this.error(this.symbol.token ' ' this.symbol.lexem)
- if this.semList.type[semIndex] == 'const':
- this.asm.mov('AX',this.semList.value[semIndex])
- else:
- this.asm.mov('[AX]',this.semList.value[semIndex])
- this.asm.push('AX')
- this.symbol.next()
- elif this.symbol.token == '<int>': #INT
- this.asm.mov('AX',int(this.symbol.lexem))
- this.asm.push('AX')
- this.symbol.next()
- elif this.symbol.lexem == '(':
- this.symbol.next() #tmp
- this.expresion()
- this.check(this.symbol.lexem, (')'))
- else:
- this.error('<id> o <int> o (<expresion>)')
- import sys
- import os
- #print sys.argv[1]
- symbol = Lex(sys.argv[1])
- syntax = PL0(symbol)
- syntax.programa()
- os.system(sys.argv[1] '.com')
- #print syntax.semList
- #print syntax.asm.all
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement