Advertisement
movinszx

bf.asm

May 27th, 2022 (edited)
173
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.55 KB | None | 0 0
  1. global _start
  2.  
  3. section .data
  4.  
  5. ARRAYSIZE equ 16777216
  6. CODESIZE equ 65536
  7.  
  8. errmsg db "Mismatched brackets!",10
  9. ERRLEN equ $ - errmsg
  10.  
  11. fitmsg db "Error: file too big (max 65536 bytes)",10
  12. FITLEN equ $ - fitmsg
  13.  
  14. cells db ARRAYSIZE dup 0
  15.  
  16. section .bss
  17.  
  18. thread resq CODESIZE ; The thread of instruction addresses to interpret the program.
  19. tokens resb CODESIZE ; Where the original, ASCII text is read from stdin.
  20.  
  21. section .text
  22.  
  23. ; Write "Mismatched brackets!" to stdout, then exit
  24. mismatch:
  25. mov eax, 1 ; write syscall no.
  26. mov edi, 1 ; stdout file descriptor
  27. mov rsi, errmsg ; address of string
  28. mov edx, ERRLEN ; length of string
  29. syscall
  30. mov eax, 60 ; exit(EXIT_FAILURE);
  31. mov edi, 1
  32. syscall
  33.  
  34. toobig: mov eax, 1
  35. mov edi, 1
  36. mov rsi, fitmsg
  37. mov edx, FITLEN
  38. syscall
  39. mov eax, 60
  40. mov edi, 1
  41. syscall
  42.  
  43. ; ----------- How the interpreter is supposed to function ----------------
  44. ; Each one of these functions below are the functions whose addresses
  45. ; will be in the array called "thread" declared above.
  46. ; The interpreter will start with RSI pointing to the first
  47. ; element, and RDX pointing to the first element of the Brainfuck cells.
  48. ; These are like C arrays, so we just add sizeof(element), which is 8, to get the next element.
  49. ; At the end of all of these is this:
  50. ;
  51. ; lodsq
  52. ; jmp rax
  53. ;
  54. ; Which is basically the equivalent of this in C:
  55. ;
  56. ; void *rax, **rsi;
  57. ; void *thread[CODESIZE]; // Array of pointers to functions
  58. ;
  59. ; rsi = &thread[0];
  60. ; rax = *rsi++;
  61. ; goto *rax;
  62. ;
  63. ; You can think of it like an array of function pointers, except instead of calling
  64. ; and returning, you "call" (jump to) the subsequent function at the end of each function,
  65. ; never to return. Maybe like this:
  66. ;
  67. ; rsi = &thread[0];
  68. ; rax = *rsi++;
  69. ; (*rax)(); // Any code below this is never reached, because the call never returns.
  70. ;
  71.  
  72.  
  73. ; Exit with exit code 0.
  74. exit: mov eax, 60 ; exit syscall no. is 60
  75. xor edi, edi ; exit code 0: success
  76. syscall
  77.  
  78.  
  79. ; dot .
  80. putc: mov r12, rsi ; preserve registers
  81. mov r13, rdx
  82.  
  83. mov eax, 1 ; write syscall no. is 1
  84. mov edi, 1 ; stdout file descriptor
  85. mov rsi, rdx ; current cell address
  86. mov edx, 1 ; write one byte
  87. syscall
  88.  
  89. mov rsi, r12
  90. mov rdx, r13
  91. lodsq ; mov rax, [rsi]; lea rsi, [rsi+8]
  92. jmp rax
  93.  
  94.  
  95. getc: mov r12, rsi ; comma ,
  96. mov r13, rdx
  97.  
  98. xor eax, eax ; read syscall no. is 0
  99. xor edi, edi ; stdin file descriptor is 0
  100. mov rsi, rdx ; current cell address
  101. mov edx, 1 ; read one byte
  102. syscall
  103.  
  104. mov rsi, r12
  105. mov rdx, r13
  106. lodsq ; rax = *(uint64_t *)rsi++;
  107. jmp rax
  108.  
  109.  
  110. ; To branch, all we have to do is set RSI (the thread pointer)
  111. ; to a new value. After we jump to a left or right bracket instruction,
  112. ; the subsequent address is actually a pointer to a different element in the thread.
  113. ;
  114. ; void *thread[] = {
  115. ; ...
  116. ; brz, // Left bracket instruction address
  117. ; &thread[50], // Points to the getc below
  118. ; putc
  119. ; ...
  120. ; brnz, // Right bracket instruction address
  121. ; &thread[25], // Points to the putc above
  122. ; getc
  123. ; ...
  124. ; }
  125. ;
  126. ; For Brainfuck, to properly implement [ and ], the address after the left
  127. ; bracket instruction, BRZ (branch if zero), will be a pointer to the instruction
  128. ; right after ], to which it will jump if the current cell is zero.
  129. ; Likewise with the right bracket instruction, BRNZ, which jumps to the instruction
  130. ; after its corresponding [ if the current cell is not zero.
  131. ;
  132. ; RAX will be pointing to BRZ, and RSI will be pointing to the address after.
  133. ; If we don't want to branch, we skip it by adding an extra 8 bytes to RSI.
  134. ; If we do, we dereference the address stored in RSI, and store it in RSI,
  135. ; and continue execution.
  136. ;
  137.  
  138.  
  139. brz: cmp byte [rdx], 0 ; left bracket [
  140. je .branch
  141.  
  142. mov rax, [rsi + 8] ; Skip branch address, get next instruction
  143. add rsi, 16
  144. jmp rax
  145.  
  146. .branch mov rsi, [rsi] ; Set thread pointer to new address
  147. lodsq
  148. jmp rax
  149.  
  150.  
  151. brnz: cmp byte [rdx], 0 ; right bracket ]
  152. jne .branch
  153.  
  154. mov rax, [rsi + 8] ; Skip branch address, get next instruction
  155. add rsi, 16
  156. jmp rax
  157.  
  158. .branch mov rsi, [rsi]
  159. lodsq
  160. jmp rax
  161.  
  162.  
  163. inc_: ; right >
  164. add rdx, 1
  165. lodsq
  166. jmp rax
  167.  
  168. dec_: ; left <
  169. sub rdx, 1
  170. lodsq
  171. jmp rax
  172.  
  173. inc_ind: ; plus +
  174. add byte [rdx], 1
  175. lodsq
  176. jmp rax
  177.  
  178. dec_ind: ; minus -
  179. sub byte [rdx], 1
  180. lodsq
  181. jmp rax
  182.  
  183.  
  184.  
  185. _start: cld
  186. mov rbp, rsp ; initialize stack
  187. mov rsi, tokens ; ptr to buf to read into
  188. xor ebx, ebx ; bytes read counter = 0
  189. mov edx, CODESIZE-1 ; Make space for null byte terminator
  190. ; Read a line of input. This means your
  191. ; programs can't have newlines in them.
  192. ; This is fine for the "hello world" programs I've been using to test with.
  193. ; I'll improve this later.
  194.  
  195. .read xor eax, eax ; read syscall no.
  196. xor edi, edi ; stdin fd
  197. add rsi, rax ; read into &tokens[bytesread]
  198. sub edx, eax
  199. syscall
  200.  
  201. add rbx, rax
  202.  
  203. cmp ebx, CODESIZE
  204. jae toobig;
  205.  
  206. test rax, rax ; number of bytes read this time
  207. jnz .read ; if it's zero, we've read the whole file and hit EOF
  208.  
  209. add rbx, tokens ; tokens[bytesread] = '\0';
  210. mov byte [rbx], 0
  211.  
  212. mov rsi, tokens
  213. mov rdi, thread
  214.  
  215. %macro DISPATCH 2
  216. cmp al, %1
  217. je %2
  218. %endmacro
  219.  
  220. .loop movzx eax, byte [rsi]
  221. add rsi, 1
  222.  
  223. DISPATCH '[', .lbra
  224. DISPATCH ']', .rbra
  225. DISPATCH '+', .plus
  226. DISPATCH '-', .minus
  227. DISPATCH '<', .left
  228. DISPATCH '>', .right
  229. DISPATCH '.', .dot
  230. DISPATCH ',', .comma
  231. DISPATCH 0, .end
  232. jmp .loop ; Ignore all other chars
  233.  
  234. %macro EMIT 1
  235. mov qword [rdi], %1
  236. add rdi, 8
  237. %endmacro
  238.  
  239.  
  240. .plus EMIT inc_ind
  241. jmp .loop
  242.  
  243. .minus EMIT dec_ind
  244. jmp .loop
  245.  
  246. .left EMIT dec_
  247. jmp .loop
  248.  
  249. .right EMIT inc_
  250. jmp .loop
  251.  
  252. .dot EMIT putc
  253. jmp .loop
  254.  
  255. .comma EMIT getc
  256. jmp .loop
  257.  
  258.  
  259. .lbra mov qword[rdi], brz ; Emit pointer to left bracket code.
  260. add rdi, 16 ; Leave 8 bytes free for a pointer to fill in later.
  261. push rdi ; Push ptr to subsequent instruction onto the stack.
  262. jmp .loop
  263.  
  264.  
  265. .rbra cmp rsp, rbp ; Stack empty?
  266. jae mismatch ; Then mismatched brackets (will exit)
  267.  
  268. pop rcx ; Pop pointer to the instr following the matching left bracket...
  269. mov qword [rdi], brnz
  270. mov qword [rdi + 8], rcx ; ...and emit it.
  271. add rdi, 16
  272.  
  273. sub rcx, 8 ; Point it to the branching addr of the left bracket.
  274. mov qword[rcx], rdi ; Backpatch lbracket's branch addr to be the instruction after rbracket.
  275. jmp .loop
  276.  
  277.  
  278. .end EMIT exit ; End of the program, so emit the address of the exit syscall code.
  279. cmp rsp, rbp ; Stack not empty?
  280. jb mismatch ; Then mismatched brackets (will exit).
  281.  
  282. mov rsi, thread
  283. mov rdx, cells
  284. lodsq
  285. jmp rax ; Start interpretation.
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement