Advertisement
Guest User

Untitled

a guest
Jun 17th, 2018
76
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. ; In order to call stbir_resize_float
  2. ;  I pass input_pixels in RDI, input_w in RSI, input_h in RDX, input_stride_in_bytes in RCX,
  3. ;   output_pixels in r8, output_w in r9
  4. ; For the stack:
  5. ;  From where RBP points to, there are first the previous RBP value, and then the return address
  6. ;  Then there is the first parameter, then the second, and so on
  7.  
  8. %include "linux_syscalls.s"
  9. %include "linux_platform.s"
  10. %include "render.s"
  11. %include "string.s"
  12.  
  13. %define KB(x) (x * 1024)
  14. %define MB(x) (KB(x) * 1024)
  15.  
  16. struc input_action_struct
  17.     .is_down:               resb 1
  18.     .was_down:              resb 1
  19. endstruc
  20. struc input_struct
  21.     .move_left:             resb 2
  22.     .move_right:            resb 2
  23.     .toggle_editor:         resb 2
  24.     .toggle_brick:          resb 2
  25.     .start_round:           resb 2
  26. endstruc
  27. struc editor_struct
  28.     .is_enabled:            resb 1
  29.     .pad0:                  resb 7
  30.     .selected_grid_cell:    resd 1
  31. endstruc
  32. struc grid_struct
  33.     .num_x:resd 1
  34.     .num_y:resd 1
  35.     .size_x:resd 1
  36.     .size_y:resd 1
  37.     .pos_x:resd 1
  38.     .pos_y:resd 1
  39.     .bounds_x:resd 1
  40.     .bounds_y:resd 1
  41.     .data_x:resq 1
  42.     .data_y:resq 1
  43.     .data_visual:resq 1
  44.     .data_color:resq 1
  45.     .data_block:resq 1
  46. endstruc
  47. struc input_event_struct
  48.     .time:  resq    2
  49.     .type:  resw    1
  50.     .code:  resw    1
  51.     .value: resd    1
  52. endstruc
  53. struc buffer_struct
  54.     .base:  resq    1
  55.     .used:  resd    1
  56.     .max:   resd    1
  57. endstruc
  58. struc image_data_struct
  59.     .data:   resq       1
  60.     .width:  resd       1
  61.     .height: resd       1
  62.     .type:   resb       1
  63.     .image_data_struct_pad0 resb 7
  64. endstruc
  65.  
  66. SECTION .text
  67.     global _start
  68. ; rdi = float * input_pixels, esi = int input_w, edx = int input_ha, ecx = input_stride_in_bytes
  69. ; r8  = float * output_pixels, r9d = int output_w
  70. ; push on stack: int num_channels, int output_stride_in_bytes, int output_h
  71. extern stbir_resize_float
  72.  
  73. ; rdi = u64 idx
  74. ; xmm0 = rect_struct
  75. grid_rect_from_idx:
  76.     push rbp
  77.     mov rbp, rsp
  78.  
  79.     mov rsi, [g_grid + grid_struct.data_x]
  80.     mov eax, [rsi + rdi * 4]
  81.     mov [rbp - 16], eax
  82.     mov rsi, [g_grid + grid_struct.data_y]
  83.     mov eax, [rsi + rdi * 4]
  84.     mov [rbp - 12], eax
  85.     mov eax, [g_grid + grid_struct.size_x]
  86.     mov [rbp -  8], eax
  87.     mov eax, [g_grid + grid_struct.size_y]
  88.     mov [rbp -  4], eax
  89.     movups xmm0, [rbp - 16]
  90.     cvtdq2ps xmm0, xmm0
  91.  
  92.     pop rbp
  93.     ret
  94. ; rdi = u8 *ptr, rsi = u8 val, rdx = u64 len
  95. memset:
  96.     mov rcx, rdx
  97.     mov rax, rsi
  98.     rep stosb
  99.     ret
  100. ; rdi = char *filename, rsi = struc buffer_struc *mem
  101. ; rax = void *memory, rbx = u64 size
  102. load_entire_file_to_memory:
  103.     push rbp
  104.     mov rbp, rsp
  105.     sub rsp, 128
  106.  
  107.     mov r12d, [rsi + buffer_struct.used]
  108.     mov r14, [rsi + buffer_struct.base]
  109.     lea r14, [r14 + r12]
  110.     mov r13, [rsi + buffer_struct.max]
  111.     call platform_get_filesize
  112.     add r12, rax
  113.     cmp r12, r13
  114.     jge load_entire_file_to_memory_error_not_enough_memory
  115.     mov [rsi + buffer_struct.used], r12d
  116.     mov r12, rax ; r12 = filesize
  117.     mov rsi, PLATFORM_OPEN_READ
  118.     call platform_open
  119.     assert rax,jg,0
  120.     mov r13, rax; r13 = FD
  121.     mov rdi, r13
  122.     mov rsi, r14
  123.     mov rdx, r12
  124.     call read
  125.     assert rax,jge,0
  126.     mov rbx, r12
  127.     mov rax, r14
  128.  
  129.     add rsp, 128
  130.     pop rbp
  131.     ret
  132. load_entire_file_to_memory_error_not_enough_memory:
  133.     int3
  134.  
  135. ; rdi = char *filename, rsi = struc buffer_struc *mem
  136. ; rax = void *memory, rbx = u32 width, rcx = u32 height
  137. load_entire_farbfeld_to_memory:
  138.     push rbp
  139.     mov rbp, rsp
  140.     sub rsp, 128
  141.  
  142.     ; Just allocate enough, so that it won't be an issue
  143.     mov [rbp - 28], rsi
  144.     mov eax, [rsi + buffer_struct.used]
  145.     mov [rbp - 20], eax
  146.     add eax, MB(40)
  147.     mov [rsi + buffer_struct.used], eax
  148.     mov rax, [rsi + buffer_struct.base]
  149.     mov [rbp - 8], rax
  150.     call load_entire_file_to_memory
  151.     ; rbx is set as size now
  152.     mov rsi, rax
  153.     mov rax, [rsi + 8] ; r8 is height + width
  154.     mov rbx, rax
  155.     shr rbx, 32
  156.     mov eax, eax
  157.     bswap eax
  158.     bswap ebx
  159.     mov [rbp - 12], eax ; [rbp - 12] is width
  160.     mov [rbp - 16], ebx ; [rbp - 16] is height
  161.     add rsi, 16 ; rsi now points to beginning of data
  162.  
  163.     movaps xmm1, [g_move_16_to_32_shift_mask_1]; xmm1 is now our first mask
  164.     movaps xmm2, [g_move_16_to_32_shift_mask_2]; xmm2 is now our first mask
  165.  
  166.     mov rax, [rbp - 12]
  167.     mov rbx, [rbp - 16]
  168.     xor rdx, rdx
  169.     mul rbx
  170.     shl rax, 3 ; multiply by 8, as in RGBA * 2
  171.     mov rbx, rax
  172.  
  173.     mov rdi, [rbp - 8]
  174.     xor rdx, rdx
  175.     xor rcx, rcx
  176.     movaps xmm0, [g_divisor]
  177.     cvtdq2ps xmm0, xmm0
  178. load_entire_farbfeld_to_memory_loop_begin:
  179.     movups xmm4, [rsi + rdx] ; We read 128 bits = 16 bytes
  180.     ;movaps xmm4, [g_temp_128]
  181.     movaps xmm5, xmm4
  182.     ; 16 bytes is twice RGBA
  183.  
  184.     add rdx, 16
  185.     pshufb xmm4, xmm1
  186.     pshufb xmm5, xmm2
  187.     cvtdq2ps xmm4, xmm4
  188.     cvtdq2ps xmm5, xmm5
  189.     divps xmm4, xmm0
  190.     divps xmm5, xmm0
  191.     movups [rdi + rcx], xmm4
  192.     add rcx, 16
  193.     movups [rdi + rcx], xmm5
  194.     add rcx, 16
  195.  
  196.     cmp rdx, rbx
  197.     jl load_entire_farbfeld_to_memory_loop_begin
  198.  
  199.     mov rax, rsi
  200.     mov rsi, [rbp - 28]
  201.     mov [rsi + buffer_struct.used], ecx
  202.  
  203.     mov rbx, [rbp - 12]
  204.     mov rcx, [rbp - 16]
  205.  
  206.     add rsp, 128
  207.     pop rbp
  208.     ret
  209. ; rdi = image_data_struct *data, rsi = struct buffer_struc *mem
  210. ; rax = image_data_struct *new
  211. convert_image_data_f32_to_u8:
  212.     push rbp
  213.     mov rbp, rsp
  214.     sub rsp, 128
  215.  
  216.     mov al, [rdi + image_data_struct.type]
  217.     assert al,je,IMAGE_DATA_F32
  218.     mov eax, [rdi + image_data_struct.width]
  219.     mov ebx, [rdi + image_data_struct.height]
  220.     mul ebx
  221.     shl rax, 2
  222.     mov [rbp - 8], rax ; [rbp - 8] is now width * height * 4
  223.     mov [rbp - 16], rdi
  224.  
  225.     ; TODO allocate space for the image_data_struc and the image data
  226.     mov eax, [rsi + buffer_struct.used]
  227.     mov rcx, [rsi + buffer_struct.base]
  228.     add rcx, rax
  229.     mov [rbp - 24], rcx ; [rbp - 24] is now NEW image_data_struct
  230.     mov ebx, [rsi + buffer_struct.max]
  231.     add rax, image_data_struct_size
  232.     add rax, [rbp - 8] ; RAX is now the new used amount
  233.     cmp rax, rbx
  234.     jl convert_image_data_f32_to_u8_space_is_available
  235.     int3 ; Space is NOT available
  236. convert_image_data_f32_to_u8_space_is_available:
  237.     mov [rsi + buffer_struct.used], eax
  238.     mov rsi, [rbp - 24]
  239.     mov rbx, rsi
  240.     add rbx, image_data_struct_size
  241.     mov [rsi + image_data_struct.data], rbx
  242.     mov byte [rsi + image_data_struct.type], IMAGE_DATA_U8
  243.     mov eax, [rdi + image_data_struct.width]
  244.     mov [rsi + image_data_struct.width], eax
  245.     mov eax, [rdi + image_data_struct.height]
  246.     mov [rsi + image_data_struct.height], eax
  247.     mov rsi, rbx ; RSI now points to beginning of new data
  248.     mov rdi, [rdi + image_data_struct.data] ; RDI now points to beginning of old data
  249.  
  250.     movaps xmm0, [g_multiplicant] ; xmm0 is now multiplicant
  251.     cvtdq2ps xmm0, xmm0
  252.     movaps xmm1, [g_move_32_to_8_shift_mask] ; xmm1 is now mask
  253.  
  254.     xor rdx, rdx
  255.     xor rcx, rcx
  256. convert_image_data_f32_to_u8_loop_begin:
  257.     movups xmm2, [rdi + rdx + 0]
  258.     movups xmm3, [rdi + rdx + 16]
  259.     movups xmm4, [rdi + rdx + 32]
  260.     movups xmm5, [rdi + rdx + 48]
  261.     add rdx, 64 ; 16 * 4 = 64
  262.     mulps xmm2, xmm0
  263.     mulps xmm3, xmm0
  264.     mulps xmm4, xmm0
  265.     mulps xmm5, xmm0
  266.     cvttps2dq xmm2, xmm2
  267.     cvttps2dq xmm3, xmm3
  268.     cvttps2dq xmm4, xmm4
  269.     cvttps2dq xmm5, xmm5
  270.     pshufb xmm2, xmm1
  271.     pshufb xmm3, xmm1
  272.     pshufb xmm4, xmm1
  273.     pshufb xmm5, xmm1
  274.     psrldq xmm3, 4
  275.     psrldq xmm4, 8
  276.     psrldq xmm5, 12
  277.     por xmm2, xmm3
  278.     por xmm2, xmm4
  279.     por xmm2, xmm5
  280.     ; xmm2 is now packed 8bit RGBA
  281.     movdqu [rsi + rcx], xmm2
  282.     add rcx, 16 ; There are 16 8 bit values in 128 bit
  283.  
  284.     cmp rcx, r8
  285.     jl convert_image_data_f32_to_u8_loop_begin
  286.    
  287.     mov rax, [rbp - 24]
  288.     add rsp, 128
  289.     pop rbp
  290.     ret
  291. ; rdi = u8 *dst, rsi = u8 *src, rdx = u64 len
  292. memcpy:
  293.     mov rcx, rdx
  294.     rep movsb
  295.     ret
  296. _start:
  297.     mov byte [g_running], 1
  298.     mov byte [g_input_thread_running], 1
  299.     ; 16 bit align rsp
  300.     mov rax, 16
  301.     sub rsp, rax
  302.     neg rax
  303.     and rsp, rax
  304.  
  305.     rdtsc
  306.     shl rdx, 4
  307.     or rax, rdx
  308.     mov [g_begin_time], rax
  309.  
  310.     mov rdi, g_keyboard_device_path
  311.     mov rsi, PLATFORM_OPEN_READ
  312.     call platform_open
  313.     assert rax,jge,0
  314.     mov [g_keyboard_device],eax
  315.  
  316.     mov rdi, 1280
  317.     mov rsi, 720
  318.     mov [render_width], rdi
  319.     mov [render_height], rsi
  320.     ;call platform_init_visuals
  321.  
  322.     ; xsize * ysize * 32
  323.     ;mov ecx, dword [g_platform_visual_info + xsize]
  324.     ;mov eax, dword [g_platform_visual_info + ysize]
  325.     mov rcx, rdi
  326.     mov rax, rsi
  327.     ; We assume that we aren't overflowing 32 bit
  328.     mul ecx
  329.     mov ecx, 4
  330.     mul ecx
  331.     mov [g_buffer_size], rax
  332.     mov r15, rax ; TODO(ian): check if rdi can be changed by the linux standard
  333.  
  334.     mov rbx, 0xF
  335.     add rax, rbx
  336.     neg rbx
  337.     and rax, rbx ; 16 bit align rax
  338.     mov [g_memory + buffer_struct.used],  rax
  339.     add rax, MB(80)
  340.     mov [g_memory + buffer_struct.max],   eax
  341.     mov rdi, rax
  342.     call platform_alloc
  343.     assert rax,jg,0
  344.     mov [g_memory + buffer_struct.base], rax
  345.     mov [g_backbuffer], rax
  346.     mov rdi, rax
  347.     mov rsi, 0x0
  348.     mov edx, [g_memory + buffer_struct.max]
  349.  
  350.     ; SETUP GRID
  351.     ; GRID MEMORY
  352.     mov eax, [g_grid + grid_struct.num_x]
  353.     mov ebx, [g_grid + grid_struct.num_y]
  354.     mul rbx
  355.     mov ebx, [g_memory + buffer_struct.used]
  356.     mov rdi, [g_memory + buffer_struct.base]
  357.     xor rcx, rcx
  358.     lea rdi, [rdi + rbx]
  359.     mov [g_grid + grid_struct.data_x], rdi
  360.     lea rcx, [rcx + rax*4]
  361.     lea rdi, [rdi + rax*4]
  362.     mov [g_grid + grid_struct.data_y], rdi
  363.     lea rcx, [rcx + rax*4]
  364.     lea rdi, [rdi + rax*4]
  365.     mov [g_grid + grid_struct.data_visual], rdi
  366.     lea rcx, [rcx + rax*4]
  367.     lea rdi, [rdi + rax*4]
  368.     mov [g_grid + grid_struct.data_color], rdi
  369.     lea rcx, [rcx + rax*8]
  370.     lea rcx, [rcx + rax*8]
  371.     lea rdi, [rdi + rax*8]
  372.     lea rdi, [rdi + rax*8]
  373.     mov [g_grid + grid_struct.data_block], rdi
  374.     lea rcx, [rcx + rax*4]
  375.     lea rdi, [rdi + rax*4]
  376.     add ebx, ecx
  377.     mov [g_memory + buffer_struct.used], ebx
  378.     ; GRID SIZE
  379.     xor rdx, rdx
  380.     mov eax, [g_grid + grid_struct.bounds_x]
  381.     mov ebx, [g_grid + grid_struct.num_x]
  382.     div rbx
  383.     xor rdx, rdx
  384.     mov [g_grid + grid_struct.size_x], eax
  385.     mov eax, [g_grid + grid_struct.bounds_y]
  386.     mov ebx, [g_grid + grid_struct.num_y]
  387.     div rbx
  388.     mov [g_grid + grid_struct.size_y], eax
  389.     ; SET GRID MEMORY
  390.     xor rcx, rcx ; Data iterator
  391.     xor rdx, rdx
  392.     mov eax, [g_grid + grid_struct.pos_x]
  393.     mov rdi, [g_grid + grid_struct.data_x]
  394. .GRID_SETUP_LOOP_X:
  395.     lea rsi, [rdi + rcx * 4]
  396.     mov [rsi], eax
  397.     add eax, [g_grid + grid_struct.size_x]
  398.     inc rcx
  399.     inc rdx
  400.     cmp edx, [g_grid + grid_struct.num_x]
  401.     jl .GRID_SETUP_LOOP_X_DONT_BREAK
  402.     xor rdx, rdx
  403.     xor rax, rax
  404. .GRID_SETUP_LOOP_X_DONT_BREAK:
  405.     ;cmp rcx, [g_grid + grid_struct.num_x]
  406.     cmp rcx, [rsp - 8]
  407.     jl .GRID_SETUP_LOOP_X
  408.     xor rcx, rcx
  409.     xor rdx, rdx
  410.     mov eax, [g_grid + grid_struct.pos_y]
  411.     mov rdi, [g_grid + grid_struct.data_y]
  412. .GRID_SETUP_LOOP_Y:
  413.     lea rsi, [rdi + rcx * 4]
  414.     mov [rsi], eax
  415.     inc rcx
  416.     inc rdx
  417.     cmp edx, [g_grid + grid_struct.num_x]
  418.     jl .GRID_SETUP_LOOP_Y_DONT_BREAK
  419.     add eax, [g_grid + grid_struct.size_y]
  420.     xor rdx, rdx
  421. .GRID_SETUP_LOOP_Y_DONT_BREAK:
  422.     cmp rcx, [rsp - 8]
  423.     jl .GRID_SETUP_LOOP_Y
  424.     xor rcx, rcx
  425. .GRID_SETUP_LOOP_COLOR:
  426.     mov edi, 0x00FF00FF
  427.     call byte_color_to_floating_point
  428.     mov rsi, [g_grid + grid_struct.data_color]
  429.     lea rsi, [rsi + rcx*8]
  430.     lea rsi, [rsi + rcx*8]
  431.     movups [rsi], xmm0
  432.     inc rcx
  433.     cmp rcx, [rsp - 8]
  434.     jl .GRID_SETUP_LOOP_COLOR
  435.  
  436.     mov rdi, g_filename
  437.     mov rsi, g_memory
  438.     call load_entire_farbfeld_to_memory
  439.     mov [g_test_image_f32 + image_data_struct.data],        rax
  440.     mov [g_test_image_f32 + image_data_struct.width],       ebx
  441.     mov [g_test_image_f32 + image_data_struct.height],      ecx
  442.     mov byte [g_test_image_f32 + image_data_struct.type],   IMAGE_DATA_F32
  443.  
  444.     mov r10, rax
  445.     mov rax, rbx
  446.     mul rcx
  447.     shl rax, 4
  448.  
  449.     mov r14, rax
  450.     mov rdi, g_f32_filename
  451.     mov rsi, PLATFORM_OPEN_WRITE
  452.     call platform_open
  453.     mov rdx, r14
  454.     mov rdi, rax
  455.     mov rsi, r10
  456.     call write
  457.     call close
  458.  
  459.     mov rdi, g_test_image_f32
  460.     mov rsi, g_memory
  461.     call convert_image_data_f32_to_u8
  462.     mov [g_test_image_u8_ptr], rax
  463.  
  464.     mov rdi, g_u8_filename
  465.     mov rsi, PLATFORM_OPEN_WRITE
  466.     call platform_open
  467.     mov rdi, rax
  468.     mov rbp, [g_test_image_u8_ptr]
  469.     mov rsi, [rbp + image_data_struct.data]
  470.     mov eax, [rbp + image_data_struct.width]
  471.     mov edx, [rbp + image_data_struct.height]
  472.     mul rdx
  473.     shl rax, 2
  474.     mov rdx, rax
  475.     call write
  476.     call close
  477.  
  478.     mov rdi, g_frontbuffer_file
  479.     mov rsi, PLATFORM_OPEN_RW
  480.     call platform_open
  481.     mov r8, rax
  482.     mov rdi, rax
  483.     mov rsi, r15
  484.     call ftruncate
  485.     mov rdi, r15
  486.     ;mov esi, dword [g_platform_visual_info + fbdev]
  487.     mov rsi, r8
  488.     call platform_alloc_file
  489.     mov [g_frontbuffer], rax
  490.     assert rax,jg,0
  491.  
  492.     mov rdi, [g_backbuffer]
  493.     mov rsi, 0x0
  494.     mov rdx, r15
  495.     call memset
  496.     mov rdi, [g_frontbuffer]
  497.     mov rsi, 0x30
  498.     mov rdx, r15
  499.     call memset
  500.  
  501.     ;mov eax, [g_player_rect + rect_struct.x]
  502.     mov dword [rsp - 16], 10 ; x
  503.     mov dword [rsp - 12], 560 ; y
  504.     mov dword [rsp - 8], 64 ; width
  505.     mov dword [rsp - 4], 32 ; height
  506.     movdqa xmm0, [rsp - 16]
  507.     cvtdq2ps xmm0, xmm0
  508.     movdqa [g_player_rect], xmm0
  509.  
  510.     mov eax, [g_ball_start_pos]
  511.     mov dword [rsp - 16], eax
  512.     mov eax, [g_ball_start_pos + 4]
  513.     mov dword [rsp - 12], eax
  514.     mov dword [rsp - 8],  16
  515.     mov dword [rsp - 4],  16
  516.     movups xmm0, [rsp - 16]
  517.     cvtdq2ps xmm0, xmm0
  518.     movaps [g_ball_rect], xmm0
  519.  
  520.     xorps xmm0, xmm0
  521.     xorps xmm1, xmm1
  522.     mov dword [rsp - 8], 0x1
  523.     mov dword [rsp - 4], 0x0
  524.     movlps xmm0, [rsp - 8]
  525.     mov dword [rsp - 8], 60
  526.     mov dword [rsp - 4], 0x0
  527.     movlps xmm1, [rsp - 8]
  528.     divss xmm0, xmm1
  529.     movlps [rsp - 8], xmm0
  530.     mov eax, [rsp - 8]
  531.     mov [g_fixed_delta_time_sec], eax
  532.    
  533. .L0:
  534.     lea rdi, [g_frame_start_time]
  535.     call platform_gettime
  536.  
  537.     mov byte [g_input + input_struct.move_left + input_action_struct.was_down], 0
  538.     mov byte [g_input + input_struct.move_right + input_action_struct.was_down], 0
  539.     mov byte [g_input + input_struct.toggle_editor + input_action_struct.was_down], 0
  540.     mov byte [g_input + input_struct.toggle_brick + input_action_struct.was_down], 0
  541.     mov byte [g_input + input_struct.start_round + input_action_struct.was_down], 0
  542.  
  543.     mov rdi, [g_keyboard_device]
  544.     mov [rsp - 8], edi
  545.     mov word [rsp - 4], POLLIN
  546.     lea rdi, [rsp - 8]
  547.     mov rsi, 1
  548.     xor rdx, rdx
  549.     call poll
  550.     cmp rax, 0
  551.     je .DONE_WITH_INPUT_POLLING
  552.     jl .poll_error
  553.    
  554.     mov rdi, [g_keyboard_device]
  555.     mov rax, 64
  556.     mov rcx, input_event_struct_size
  557.     mul rcx
  558.     mov rdx, rax
  559.     mov r13, rax
  560.  
  561.     sub rsp, r13
  562.     mov rsi, rsp
  563.     call read
  564.     xor rdx, rdx
  565.     mov rcx, input_event_struct_size
  566.     div rcx
  567.     mov r14, rax ; r14 is the number of events
  568.  
  569.     mov rbp, rsp
  570.     xor rcx, rcx
  571. .L1:
  572.     xor rax, rax
  573.     mov ax, [rbp + input_event_struct.type]
  574.     cmp ax, EV_SYN
  575.     je .SYN
  576.     cmp ax, EV_KEY
  577.     je .KEY
  578.     jmp .L2
  579. .SYN:
  580.     jmp .L2
  581. .KEY:
  582.     xor rax, rax
  583.     mov ax, [rbp + input_event_struct.code]
  584.     mov ebx, [rbp + input_event_struct.value]
  585.     mov rdi, 0
  586.     cmp rax, KEY_LEFT
  587.     je .KEY_LEFT
  588.     cmp rax, KEY_RIGHT
  589.     je .KEY_RIGHT
  590.     cmp rax, KEY_E
  591.     je .KEY_E
  592.     cmp rax, KEY_R
  593.     je .KEY_R
  594.     cmp rax, KEY_SPACE
  595.     je .KEY_SPACE
  596.     jmp .L2
  597. .KEY_LEFT:
  598.     cmp rbx, 0
  599.     je .KEY_LEFT_UP
  600.     mov byte [g_input + input_struct.move_left + input_action_struct.is_down], 0xFF
  601.     jmp .L2
  602. .KEY_LEFT_UP:
  603.     xor rax, rax
  604.     mov al, [g_input + input_struct.move_left + input_action_struct.is_down]
  605.     mov byte [g_input + input_struct.move_left + input_action_struct.was_down], al
  606.     mov byte [g_input + input_struct.move_left + input_action_struct.is_down], 0x0
  607.     jmp .L2
  608. .KEY_RIGHT:
  609.     cmp rbx, 0
  610.     je .KEY_RIGHT_UP
  611.     mov byte [g_input + input_struct.move_right + input_action_struct.is_down], 0xFF
  612.     jmp .L2
  613. .KEY_RIGHT_UP:
  614.     xor rax, rax
  615.     mov al,  [g_input + input_struct.move_right + input_action_struct.is_down]
  616.     mov byte [g_input + input_struct.move_right + input_action_struct.was_down], al
  617.     mov byte [g_input + input_struct.move_right + input_action_struct.is_down], 0x0
  618.     jmp .L2
  619. .KEY_E:
  620.     cmp rbx, 0
  621.     je .KEY_E_UP
  622.     mov byte [g_input + input_struct.toggle_editor + input_action_struct.is_down], 0xFF
  623.     jmp .L2
  624. .KEY_E_UP:
  625.     xor rax, rax
  626.     mov al,  [g_input + input_struct.toggle_editor + input_action_struct.is_down]
  627.     mov byte [g_input + input_struct.toggle_editor + input_action_struct.was_down], al
  628.     mov byte [g_input + input_struct.toggle_editor + input_action_struct.is_down], 0x0
  629.     jmp .L2
  630. .KEY_R:
  631.     cmp rbx, 0
  632.     je .KEY_R_UP
  633.     mov byte [g_input + input_struct.toggle_brick + input_action_struct.is_down], 0xFF
  634.     jmp .L2
  635. .KEY_R_UP:
  636.     xor rax, rax
  637.     mov al,  [g_input + input_struct.toggle_brick + input_action_struct.is_down]
  638.     mov byte [g_input + input_struct.toggle_brick + input_action_struct.was_down], al
  639.     mov byte [g_input + input_struct.toggle_brick + input_action_struct.is_down], 0x0
  640.     jmp .L2
  641. .KEY_SPACE:
  642.     cmp rbx, 0
  643.     je .KEY_SPACE_UP
  644.     mov byte [g_input + input_struct.start_round + input_action_struct.is_down], 0xFF
  645.     jmp .L2
  646. .KEY_SPACE_UP:
  647.     xor rax, rax
  648.     mov al,  [g_input + input_struct.start_round + input_action_struct.is_down]
  649.     mov byte [g_input + input_struct.start_round + input_action_struct.was_down], al
  650.     mov byte [g_input + input_struct.start_round + input_action_struct.is_down], 0x0
  651.     jmp .L2
  652. .L2:
  653.     inc rcx
  654.     add rbp, input_event_struct_size
  655.     cmp rcx, r14
  656.     jl .L1
  657.     add rsp, r13
  658.  
  659. .DONE_WITH_INPUT_POLLING:
  660.  
  661.     xor rax, rax
  662.     mov al, [g_input + input_struct.toggle_editor + input_action_struct.is_down]
  663.     not al
  664.     and al, [g_input + input_struct.toggle_editor + input_action_struct.was_down]
  665.     cmp al, 0x0
  666.     je .EDITOR_NO_TOGGLE
  667.     xor rax, rax
  668.     mov al, [g_editor + editor_struct.is_enabled]
  669.     not al
  670.     mov [g_editor + editor_struct.is_enabled], al
  671. .EDITOR_NO_TOGGLE:
  672.  
  673.     mov al, [g_editor + editor_struct.is_enabled]
  674.     cmp al, 0x0
  675.     jne .EDITOR_ENABLED_0
  676.  
  677.     xor rax, rax
  678.     mov al, [g_input + input_struct.start_round + input_action_struct.is_down]
  679.     not al
  680.     and al, [g_input + input_struct.start_round + input_action_struct.was_down]
  681.     and al, 0x1
  682.     cmp al, 0x0
  683.     je .DONT_START_ROUND
  684.     xorps xmm0, xmm0
  685.     mov dword [rsp - 8], 0x4
  686.     mov dword [rsp - 4], -0x4
  687.     movlps xmm0, [rsp - 8]
  688.     cvtdq2ps xmm0, xmm0
  689.     movlps [g_ball_vel], xmm0
  690. .DONT_START_ROUND:
  691.  
  692.     movaps xmm0, [g_player_rect]
  693.     xor rax, rax
  694.     mov byte al, [g_input + input_struct.move_left + input_action_struct.is_down]
  695.     shr rax, 4
  696.     movd xmm1, eax
  697.     cvtdq2ps xmm1, xmm1
  698.     subps xmm0, xmm1
  699.  
  700.     xor rax, rax
  701.     mov byte al, [g_input + input_struct.move_right + input_action_struct.is_down]
  702.     shr rax, 4
  703.     movd xmm1, eax
  704.     cvtdq2ps xmm1, xmm1
  705.     addps xmm0, xmm1
  706.  
  707.     xorps xmm1, xmm1
  708.     movhlps xmm1, xmm0
  709.     movups [rsp - 16], xmm1
  710.     mov dword [rsp - 12], 0x0
  711.     mov dword [rsp -  8], 0x0
  712.     mov dword [rsp -  4], 0x0
  713.     movups xmm1, [rsp - 16]
  714.     movaps xmm2, xmm0
  715.     addps xmm2, xmm1
  716.     movups [rsp - 20], xmm0
  717.     movups [rsp - 16], xmm2
  718.     mov dword [rsp - 12], 0x0
  719.     mov dword [rsp -  8], 0x0
  720.     movups xmm1, [rsp - 20]
  721.     mov dword [rsp - 16], 0x0
  722.     mov eax, [render_width]
  723.     mov [rsp - 12], eax
  724.     mov dword [rsp -  8], 0x0
  725.     mov dword [rsp -  4], 0x0
  726.     movups xmm2, [rsp - 16]
  727.     cvtdq2ps xmm2, xmm2
  728.     cmpps xmm2, xmm1, 0xE
  729.     movups [rsp - 16], xmm2
  730.     cmp dword [rsp - 16], 0x0
  731.     je .PLAYER_BOUNDS_X1_OK
  732.     movups [rsp - 32], xmm0
  733.     mov dword [rsp - 32], 0x0
  734.     movups xmm0, [rsp - 32]
  735. .PLAYER_BOUNDS_X1_OK:
  736.     cmp dword [rsp - 12], 0x0
  737.     jne .PLAYER_BOUNDS_X2_OK
  738.     xorps xmm1, xmm1
  739.     movlhps xmm1, xmm0
  740.     addps xmm1, xmm0
  741.     mov dword [rsp - 16], 0x0
  742.     mov dword [rsp - 12], 0x0
  743.     mov eax, [render_width]
  744.     mov dword [rsp -  8], eax
  745.     mov dword [rsp -  6], 0x0
  746.     movups xmm2, [rsp - 16]
  747.     cvtdq2ps xmm2, xmm2
  748.     subps xmm1, xmm2
  749.     movups [rsp - 16], xmm1
  750.     mov eax, [rsp - 8]
  751.     mov dword [rsp - 16], eax
  752.     mov dword [rsp - 12], 0x0
  753.     mov dword [rsp -  8], 0x0
  754.     mov dword [rsp -  4], 0x0
  755.     movups xmm1, [rsp - 16]
  756.     subps xmm0, xmm1
  757. .PLAYER_BOUNDS_X2_OK:
  758.    
  759.     movaps [g_player_rect], xmm0
  760.  
  761.     xorps xmm0, xmm0
  762.     movlps xmm0, [g_ball_vel]
  763.     movaps xmm1, [g_ball_rect]
  764.     addps xmm1, xmm0
  765.     movaps xmm0, xmm1
  766.     xorps xmm2, xmm2
  767.     movlhps xmm2, xmm0
  768.     addps xmm0, xmm2 ; XMM0 is now in quad form
  769.     mov dword [rsp - 16], 0x0
  770.     mov dword [rsp - 12], 0x0
  771.     mov eax, [render_width]
  772.     mov dword [rsp -  8], eax
  773.     mov eax, [render_height]
  774.     mov dword [rsp -  4], eax
  775.     movups xmm2, [rsp - 16]
  776.     cvtdq2ps xmm2, xmm2
  777.     cmpps xmm2, xmm0, 0xE ; XMM2 > XMM0
  778.     movups [rsp - 16], xmm2
  779.     mov eax, [rsp - 16]
  780.     cmp eax, 0x0
  781.     je .BALL_SCREEN_BOUNDS_X1_OK
  782.     movups xmm1, [g_ball_rect]
  783.     movlps xmm3, [g_ball_vel]
  784.     xorps xmm4, xmm4
  785.     mov dword [rsp - 24], -1
  786.     mov dword [rsp - 20],  1
  787.     movlps xmm4, [rsp - 24]
  788.     cvtdq2ps xmm4, xmm4
  789.     mulps xmm3, xmm4
  790.     movlps [g_ball_vel], xmm3 ; x value of ball velocity has been negated
  791. .BALL_SCREEN_BOUNDS_X1_OK:
  792.     mov eax, [rsp - 12]
  793.     cmp eax, 0x0
  794.     je .BALL_SCREEN_BOUNDS_Y1_OK
  795.     movups xmm1, [g_ball_rect]
  796.     movlps xmm3, [g_ball_vel]
  797.     xorps xmm4, xmm4
  798.     mov dword [rsp - 24],  1
  799.     mov dword [rsp - 20], -1
  800.     movlps xmm4, [rsp - 24]
  801.     cvtdq2ps xmm4, xmm4
  802.     mulps xmm3, xmm4
  803.     movlps [g_ball_vel], xmm3 ; x value of ball velocity has been negated
  804. .BALL_SCREEN_BOUNDS_Y1_OK:
  805.     mov eax, [rsp -  8]
  806.     cmp eax, 0x0
  807.     jne .BALL_SCREEN_BOUNDS_X2_OK
  808.     movups xmm1, [g_ball_rect]
  809.     movlps xmm3, [g_ball_vel]
  810.     xorps xmm4, xmm4
  811.     mov dword [rsp - 24], -1
  812.     mov dword [rsp - 20],  1
  813.     movlps xmm4, [rsp - 24]
  814.     cvtdq2ps xmm4, xmm4
  815.     mulps xmm3, xmm4
  816.     movlps [g_ball_vel], xmm3 ; x value of ball velocity has been negated
  817. .BALL_SCREEN_BOUNDS_X2_OK:
  818.     mov eax, [rsp -  4]
  819.     cmp eax, 0x0
  820.     jne .BALL_SCREEN_BOUNDS_Y2_OK
  821.     movups xmm1, [g_ball_rect]
  822.     movlps xmm3, [g_ball_vel]
  823.     xorps xmm4, xmm4
  824.     mov dword [rsp - 24],  1
  825.     mov dword [rsp - 20], -1
  826.     movlps xmm4, [rsp - 24]
  827.     cvtdq2ps xmm4, xmm4
  828.     mulps xmm3, xmm4
  829.     movlps [g_ball_vel], xmm3 ; x value of ball velocity has been negated
  830. .BALL_SCREEN_BOUNDS_Y2_OK:
  831.    
  832.     movaps [g_ball_rect], xmm1
  833.  
  834. .EDITOR_ENABLED_0:
  835.     mov rax, [g_test_image_u8_ptr]
  836.     mov rax, [rax + image_data_struct.data]
  837.     mov rdi, [g_backbuffer]
  838.     mov rsi, rax
  839.     mov rdx, [g_buffer_size]
  840.     call memcpy
  841.  
  842.     mov edi, 0x0000FFFF
  843.     call byte_color_to_floating_point
  844.     movaps xmm1, xmm0
  845.     movaps xmm0, [g_player_rect]
  846.     mov rdx, [g_backbuffer]
  847.     mov ecx, 0x2
  848.     call render_color_rect
  849.  
  850.     lea rdi, [g_grid]
  851.     mov rsi, [g_backbuffer]
  852.     call render_grid
  853.  
  854.     mov al, [g_editor + editor_struct.is_enabled]
  855.     cmp al, 0x0
  856.     je .EDITOR_DISABLED_0
  857.  
  858.     xor rax, rax
  859.     mov al, [g_input + input_struct.move_right + input_action_struct.is_down]
  860.     not al
  861.     and al, [g_input + input_struct.move_right + input_action_struct.was_down]
  862.     and al, 0x1
  863.     mov edi, [g_editor + editor_struct.selected_grid_cell]
  864.     mov ecx, edi
  865.     add edi, eax
  866.     mov eax, [g_grid + grid_struct.num_x]
  867.     mov ebx, [g_grid + grid_struct.num_y]
  868.     mul ebx
  869.     cmp edi, eax
  870.     cmovb ecx, edi
  871.     mov [g_editor + editor_struct.selected_grid_cell], ecx
  872.  
  873.     xor rax, rax
  874.     mov al, [g_input + input_struct.move_left + input_action_struct.is_down]
  875.     not al
  876.     and al, [g_input + input_struct.move_left + input_action_struct.was_down]
  877.     and al, 0x1
  878.     mov edi, [g_editor + editor_struct.selected_grid_cell]
  879.     mov ecx, edi
  880.     sub edi, eax
  881.     cmp edi, 0x0
  882.     cmovge ecx, edi
  883.     mov [g_editor + editor_struct.selected_grid_cell], ecx
  884.  
  885.     xor rax, rax
  886.     mov al, [g_input + input_struct.toggle_brick + input_action_struct.is_down]
  887.     not al
  888.     and al, [g_input + input_struct.toggle_brick + input_action_struct.was_down]
  889.     and al, 0x1
  890.     cmp al, 0x0
  891.     je .DONT_TOGGLE_BRICK
  892.     mov edi, [g_editor + editor_struct.selected_grid_cell]
  893.     mov rsi, [g_grid + grid_struct.data_visual]
  894.     lea rsi, [rsi + rdi*4]
  895.     mov ecx, [rsi]
  896.     not ecx
  897.     and ecx, 0x1
  898.     mov [rsi], ecx
  899.     mov rsi, [g_grid + grid_struct.data_block]
  900.     lea rsi, [rsi + rdi*4]
  901.     mov ecx, [rsi]
  902.     not ecx
  903.     and ecx, 0x1
  904.     mov [rsi], ecx
  905.  
  906.     lea rdi, [g_grid]
  907.     mov rsi, [g_backbuffer]
  908.     call render_grid
  909. .DONT_TOGGLE_BRICK:
  910.  
  911.     mov edi, 0xFF00FFFF
  912.     call byte_color_to_floating_point
  913.     movaps xmm1, xmm0
  914.     mov edi, [g_editor + editor_struct.selected_grid_cell]
  915.     call grid_rect_from_idx
  916.     mov rdx, [g_backbuffer]
  917.     mov ecx, 0x2
  918.     call render_outline_rect
  919.  
  920. .EDITOR_DISABLED_0:
  921.  
  922.     mov edi, 0xFF0000FF
  923.     call byte_color_to_floating_point
  924.     movaps xmm1, xmm0
  925.     movaps xmm0, [g_ball_rect]
  926.     mov rdx, [g_backbuffer]
  927.     call render_color_rect
  928.  
  929.     mov rdi, [g_frontbuffer]
  930.     mov rsi, [g_backbuffer]
  931.     mov rdx, [g_buffer_size]
  932.     call memcpy
  933.    
  934.     sub rsp, 16
  935.     mov rdi, rsp
  936.     call platform_gettime
  937.     mov rax, [rsp + tv_sec]
  938.     sub rax, [g_frame_start_time + tv_sec]
  939.     mov [rsp + tv_sec], rax
  940.     mov rax, [rsp + tv_nsec]
  941.     sub rax, [g_frame_start_time + tv_nsec]
  942.     mov [rsp + tv_nsec], rax
  943.     add rsp, 16
  944.  
  945.     mov rsi, 16000000 ; 16 ms
  946.     sub rsi, [rsp - 16 + tv_nsec]
  947.     xor rdi, rdi
  948.     call platform_nanosleep
  949.  
  950.     jmp .L0
  951.  
  952.     mov byte [g_running], 0
  953.     call platform_exit
  954. .poll_error:
  955.     int3
  956. SECTION .bss
  957.     g_backbuffer:       resq    1
  958.     g_frontbuffer:      resq    1
  959.     g_buffer_size:      resq    1
  960.     g_begin_time:       resq    1
  961.     g_memory:
  962.         istruc buffer_struct
  963.             at buffer_struct.base,    resq    1
  964.             at buffer_struct.used,    resd    1
  965.             at buffer_struct.max,     resd    1
  966.         iend
  967.     g_test_image_u8_ptr:    resq    1
  968.     g_test_image_f32:
  969.         istruc image_data_struct
  970.             at image_data_struct.data,      resq    1
  971.             at image_data_struct.width,     resd    1
  972.             at image_data_struct.height,    resd    1
  973.             at image_data_struct.type,      resb    1
  974.             at image_data_struct.image_data_struct_pad0,      resb    7
  975.     g_input:
  976.         istruc input_struct
  977.             at input_struct.move_left,              resb 2
  978.             at input_struct.move_right,             resb 2
  979.             at input_struct.toggle_editor,          resb 2
  980.             at input_struct.toggle_brick,           resb 2
  981.             at input_struct.start_round,            resb 2
  982.     g_last_time:        resq    1
  983.     g_keyboard_device:  resd    1
  984.     g_running:          resb    1
  985.     g_input_thread_running: resb 1
  986.     g_frame_start_time: resq    2
  987.     g_fixed_delta_time_sec: resd    1
  988. SECTION .data
  989.     g_color_array:      db      0x80,0x30,0xFF,0xAA
  990.     g_filename:         db      "test.ff",0
  991.     g_f32_filename:     db      "image.f32",0
  992.     g_u8_filename:     db      "image.u8",0
  993.     align 16
  994.     g_ball_rect:        dd    0x0,0x0,0x0,0x0
  995.     g_move_16_to_32_shift_mask_1:     dq    0xFFFF0302FFFF0100,0xFFFF0706FFFF0504
  996.     g_move_16_to_32_shift_mask_2:     dq    0xFFFF0B0AFFFF0908,0xFFFF0F0EFFFF0D0C
  997.     ;g_move_32_to_8_shift_mask:        dq    0xFFFFFFFFFFFFFFFF,0x0C080400FFFFFFFF ; RGBA
  998.     g_move_32_to_8_shift_mask:        dq    0xFFFFFFFFFFFFFFFF,0x0C000408FFFFFFFF  ; BGRA (correct)
  999.     ;g_move_32_to_8_shift_mask:        dq    0xFFFFFFFFFFFFFFFF,0x0004080CFFFFFFFF  ; ABGR
  1000.     ;g_move_32_to_8_shift_mask:        dq    0xFFFFFFFFFFFFFFFF,0x0804000CFFFFFFFF ; ARGB
  1001.     g_temp_128:                       dq    0xFFFFFFFFFFFFFFFF,0xFFFF00000000FFFF
  1002.     g_divisor:                        dq    0x0000FFFF0000FFFF,0x0000FFFF0000FFFF ; Max for 16 bit
  1003.     g_multiplicant:                   dq    0x000000FF000000FF,0x000000FF000000FF ; Max for  8 bit
  1004.     g_frontbuffer_file:               db    "/tmp/nasm_screen",0
  1005.     g_keyboard_device_path:           db    "/dev/input/event3",0
  1006.     g_input_thread_stack_size:        dq    0x1000000
  1007.     g_input_key_left_down:            db    0
  1008.     g_input_key_right_down:           db    0
  1009.     align 16
  1010.     g_player_rect:
  1011.         istruc rect_struct
  1012.             at rect_struct.x,   dd      0x0
  1013.             at rect_struct.y,   dd      0x0
  1014.             at rect_struct.w,   dd      0x0
  1015.             at rect_struct.h,   dd      0x0
  1016.     g_grid:
  1017.         istruc grid_struct
  1018.             at grid_struct.num_x,               dd 0xA
  1019.             at grid_struct.num_y,               dd 0x8
  1020.             at grid_struct.size_x,              dd 0x0
  1021.             at grid_struct.size_y,              dd 0x0
  1022.             at grid_struct.pos_x,               dd 0x0
  1023.             at grid_struct.pos_y,               dd 0x0
  1024.             at grid_struct.bounds_x,            dd 1280
  1025.             at grid_struct.bounds_y,            dd 540
  1026.             at grid_struct.data_x,              dq 0x0
  1027.             at grid_struct.data_y,              dq 0x0
  1028.             at grid_struct.data_visual,         dq 0x0
  1029.             at grid_struct.data_color,          dq 0x0
  1030.             at grid_struct.data_block,          dq 0x0
  1031.     g_editor:
  1032.         istruc editor_struct
  1033.             at editor_struct.is_enabled,                db 0x0
  1034.             at editor_struct.pad0,                      db 0x0
  1035.             at editor_struct.selected_grid_cell,        dd 0x0
  1036.     g_ball_start_pos: dd 100,500
  1037.     g_ball_vel: dd 0x0, 0x0
  1038.  
  1039.     IMAGE_DATA_F32:     equ     0x0
  1040.     IMAGE_DATA_U8:      equ     0x1
  1041.  
  1042.     EV_SYN:             equ     0x00
  1043.     EV_KEY:             equ     0x01
  1044.     KEY_LEFT:           equ     105
  1045.     KEY_RIGHT:          equ     106
  1046.     KEY_UP:             equ     103
  1047.     KEY_DOWN:           equ     108
  1048.     KEY_E:              equ     18
  1049.     KEY_R:              equ     19
  1050.     KEY_SPACE:          equ     57
  1051.  
  1052. ; vim: set ts=4 sw=4 tw=0 et:
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement