Advertisement
aaaaaa123456789

Render animation frames (renderframes.s)

Nov 9th, 2022
1,718
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. %imacro function 0-1 end
  2.     global %00:function (%00.%1 - %00)
  3. %00:
  4. %endmacro
  5.  
  6. %define ext(function) function wrt ..plt
  7.  
  8. struc plum_image
  9.     .type:              resw 1
  10.     .max_palette_index: resb 1
  11.     .color_format:      resb 1
  12.     .frames:            resd 1
  13.     .height:            resd 1
  14.     .width:             resd 1
  15.     .allocator:         resq 1
  16.     .metadata:          resq 1
  17.     .palette:           resq 1
  18.     .data:              resq 1
  19.     .userdata:          resq 1
  20. endstruc
  21.  
  22. struc plum_metadata
  23.     .type: resd 1
  24.     alignb 8
  25.     .size: resq 1
  26.     .data: resq 1
  27.     .next: resq 1
  28. endstruc
  29.  
  30. %assign PLUM_COLOR_64                 1
  31. %assign PLUM_ERR_IMAGE_TOO_LARGE      7
  32. %assign PLUM_IMAGE_PNG                3
  33. %assign PLUM_METADATA_BACKGROUND      2
  34. %assign PLUM_METADATA_FRAME_DISPOSAL  5
  35. %assign PLUM_MODE_FILENAME           -1
  36.  
  37. extern plum_destroy_image:function
  38. extern plum_find_metadata:function
  39. extern plum_get_error_text:function
  40. extern plum_load_image:function
  41. extern plum_store_image:function
  42.  
  43. extern calloc:function
  44. extern fprintf:function
  45. extern free:function
  46. extern stderr:data
  47.  
  48. section .text
  49.  
  50. main: function
  51.     ; C calling convention! rdi = argc, rsi = argv, return status in rax
  52.     push qword[rsi]
  53.     cmp edi, 3
  54.     je .continue
  55.     mov rdx, [rsi]
  56.     lea rsi, [rel .usage]
  57.     mov rdi, [rel stderr]
  58.     call ext(fprintf)
  59.     add rsp, 8
  60.     mov eax, 2
  61.     ret
  62.  
  63. .error:
  64.     mov edi, [rsp]
  65.     call ext(plum_get_error_text)
  66.     mov rcx, rax
  67. .showerror:
  68.     mov rdx, [rsp + 16]
  69.     mov rsi, r13
  70.     mov rdi, [rel stderr]
  71.     call ext(fprintf)
  72.     add rsp, 24
  73.     mov eax, 1
  74.     ret
  75.  
  76. .continue:
  77.     push qword[rsi + 16]
  78.     sub rsp, 8
  79.     mov rdi, [rsi + 8]
  80.     mov rsi, PLUM_MODE_FILENAME
  81.     mov edx, PLUM_COLOR_64
  82.     mov rcx, rsp
  83.     call ext(plum_load_image)
  84.     test rax, rax
  85.     lea r13, [rel .loaderrmsg]
  86.     jz .error
  87.  
  88.     ; image is loaded: check that height * frames doesn't overflow
  89.     mov r12, rax
  90.     mov esi, [rax + plum_image.height]
  91.     mov ecx, [rax + plum_image.frames]
  92.     imul rcx, rsi
  93.     cmp rcx, 0x7fffffff
  94.     mov ecx, PLUM_ERR_IMAGE_TOO_LARGE
  95.     ja .showerror
  96.     ; set to PNG, allocate a frame canvas, determine the background color and initialize
  97.     mov word[rax + plum_image.type], PLUM_IMAGE_PNG
  98.     mov edi, [rax + plum_image.width]
  99.     imul rdi, rsi
  100.     mov r13, rdi
  101.     mov esi, 8
  102.     call ext(calloc)
  103.     ; don't care about calloc failure - this is assembly, so we can guarantee a segfault by accessing the buffer
  104.     mov r14, rax
  105.     mov esi, PLUM_METADATA_BACKGROUND
  106.     mov rdi, r12
  107.     call ext(plum_find_metadata)
  108.     xor r15, r15
  109.     test rax, rax
  110.     jz .no_background
  111.     mov r15, [rax + plum_metadata.data]
  112.     mov r15, [r15]
  113.     mov rdi, r14
  114.     mov rcx, r13
  115.     mov rax, r15
  116.     rep stosq
  117. .no_background:
  118.     ; find the frame disposal information and initialize local variables (data buffer, metadata pointer, frame number)
  119.     mov dword[rsp], 0
  120.     mov esi, PLUM_METADATA_FRAME_DISPOSAL
  121.     mov rdi, r12
  122.     call ext(plum_find_metadata)
  123.     test rax, rax
  124.     jz .no_disposal_load
  125.     mov rax, [rax + plum_metadata.data]
  126. .no_disposal_load:
  127.     push rax
  128.     push qword[r12 + plum_image.data]
  129.     ; preload some registers
  130.     vpxor xmm0, xmm0, xmm0
  131.     movq xmm8, r15
  132.     vmovdqa xmm9, [rel .packmask]
  133.     vmovdqa xmm10, [rel .roundmask]
  134.     vmovdqa xmm11, [rel .flipmask]
  135.     vmovdqa xmm12, [rel .shufflemask]
  136.     vpunpcklwd xmm8, xmm8, xmm0
  137.     xor eax, eax
  138.  
  139. .loop:
  140.     ; process each frame
  141.     mov rdx, [rsp + 8]
  142.     test rdx, rdx
  143.     jz .no_disposal
  144.     movzx edx, byte[rdx + rax]
  145. .no_disposal:
  146.     mov r11, [rsp]
  147.     lea rax, [rel .callback_offsets]
  148.     add rax, [rax + rdx * 8]
  149.     call rax
  150.     lea rsi, [r11 + r13 * 8]
  151.     mov [rsp], rsi
  152.     inc dword[rsp + 16]
  153.     mov eax, [rsp + 16]
  154.     cmp eax, [r12 + plum_image.frames]
  155.     jb .loop
  156.     add rsp, 16
  157.     mov rdi, r14
  158.     call ext(free)
  159.  
  160.     ; update the image dimensions (one frame, height *= frames)
  161.     mov eax, [r12 + plum_image.frames]
  162.     mov dword[r12 + plum_image.frames], 1
  163.     imul eax, [r12 + plum_image.height]
  164.     mov [r12 + plum_image.height], eax
  165.     ; store the image and finish
  166.     mov rdi, r12
  167.     mov rsi, [rsp + 8]
  168.     mov rdx, PLUM_MODE_FILENAME
  169.     mov rcx, rsp
  170.     call ext(plum_store_image)
  171.     mov rdi, r12
  172.     call ext(plum_destroy_image)
  173.     cmp dword[rsp], 0
  174.     lea r13, [rel .storeerrmsg]
  175.     jnz .error
  176.     add rsp, 24
  177.     xor eax, eax
  178.     ret
  179. .end:
  180.  
  181. .loaderrmsg: db `%s: load error: %s\n`, 0
  182. .storeerrmsg: db `%s: store error: %s\n`, 0
  183. .usage: db `usage: %s <image> <output.png>\n`, 0
  184.  
  185.     align 16, db 0
  186. .packmask: db 2, 3, 6, 7, 10, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
  187. .roundmask: dw 0x8000, 0, 0x8000, 0, 0x8000, 0, 0, 0
  188. .flipmask: dw -1, 0, -1, 0, -1, 0, 0, 0
  189. .shufflemask: db 12, 13, -1, -1, 12, 13, -1, -1, 12, 13, -1, -1, -1, -1, -1, -1
  190.  
  191. .callback_offsets:
  192.     %rep 2
  193.         dq render_frame - .callback_offsets
  194.         dq render_frame_keep_background - .callback_offsets
  195.         dq render_frame_no_preserve - .callback_offsets
  196.     %endrep
  197.  
  198. render_frame: function
  199.     endbr64
  200.     xor eax, eax
  201. .loop:
  202.     movq xmm1, [r11 + rax * 8]
  203.     movq xmm2, [r14 + rax * 8]
  204.     vpunpcklwd xmm1, xmm1, xmm0
  205.     vpunpcklwd xmm2, xmm2, xmm0
  206.     vpshufb xmm3, xmm1, xmm12
  207.     vpmulld xmm5, xmm2, xmm3
  208.     vpxor xmm4, xmm3, xmm11
  209.     vpaddd xmm5, xmm5, xmm10
  210.     vpmulld xmm6, xmm1, xmm4
  211.     vpaddd xmm3, xmm6, xmm5
  212.     vpsrld xmm5, xmm3, 16
  213.     vpaddd xmm3, xmm3, xmm5
  214.     vpshufb xmm1, xmm3, xmm9
  215.     movq [r11 + rax * 8], xmm1
  216.     movq [r14 + rax * 8], xmm1
  217.     inc rax
  218.     cmp rax, r13
  219.     jb .loop
  220.     ret
  221. .end:
  222.  
  223. render_frame_keep_background: function
  224.     endbr64
  225.     xor eax, eax
  226. .loop:
  227.     movq xmm1, [r11 + rax * 8]
  228.     movq xmm2, [r14 + rax * 8]
  229.     vpunpcklwd xmm1, xmm1, xmm0
  230.     vpunpcklwd xmm2, xmm2, xmm0
  231.     vpshufb xmm3, xmm1, xmm12
  232.     vpmulld xmm5, xmm2, xmm3
  233.     vpxor xmm4, xmm3, xmm11
  234.     vpaddd xmm5, xmm5, xmm10
  235.     vpmulld xmm6, xmm1, xmm4
  236.     vpmulld xmm7, xmm8, xmm4
  237.     vpaddd xmm3, xmm6, xmm5
  238.     vpaddd xmm4, xmm7, xmm5
  239.     vpsrld xmm5, xmm3, 16
  240.     vpsrld xmm6, xmm4, 16
  241.     vpaddd xmm3, xmm3, xmm5
  242.     vpaddd xmm4, xmm4, xmm6
  243.     vpshufb xmm1, xmm3, xmm9
  244.     vpshufb xmm2, xmm4, xmm9
  245.     movq [r11 + rax * 8], xmm1
  246.     movq [r14 + rax * 8], xmm2
  247.     inc rax
  248.     cmp rax, r13
  249.     jb .loop
  250.     ret
  251. .end:
  252.  
  253. render_frame_no_preserve: function
  254.     endbr64
  255.     xor eax, eax
  256. .loop:
  257.     movq xmm1, [r11 + rax * 8]
  258.     movq xmm2, [r14 + rax * 8]
  259.     vpunpcklwd xmm1, xmm1, xmm0
  260.     vpunpcklwd xmm2, xmm2, xmm0
  261.     vpshufb xmm3, xmm1, xmm12
  262.     vpmulld xmm5, xmm2, xmm3
  263.     vpxor xmm4, xmm3, xmm11
  264.     vpaddd xmm5, xmm5, xmm10
  265.     vpmulld xmm6, xmm1, xmm4
  266.     vpaddd xmm3, xmm6, xmm5
  267.     vpsrld xmm5, xmm3, 16
  268.     vpaddd xmm3, xmm3, xmm5
  269.     vpshufb xmm1, xmm3, xmm9
  270.     movq [r11 + rax * 8], xmm1
  271.     inc rax
  272.     cmp rax, r13
  273.     jb .loop
  274.     ret
  275. .end:
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement