Advertisement
ArBa

dll_2

Oct 20th, 2019
1,747
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. ;A 64-bit function that posterize values in 2^8 valuse
  2. ;
  3. ; The function has prototype:
  4. ;   void posterize(unsigned short[] array, uint64_t length, char levelNo)
  5. ;       array - array of values to posterize
  6. ;       length - length of array
  7. ;       levelNo - number of posterize levels. Cube root of number of colors
  8. ; -----------------------------------------------------------------------------
  9.  
  10. ; Gnu/Linux
  11. ; rdi – 1 argument - array
  12. ; rsi – 2 argument - length
  13. ; rdx – 3 argument - levelNo
  14.  
  15. ; Windows (TODO check if correct, change when working on Windows)
  16. ; rcx - 1 argument
  17. ; rdx - 2 argument
  18. ; r8  - 3 argument
  19.  
  20. ; r9  - progging step value -1. Used to modulo calculation
  21. ; r10 - half of the progging step value
  22. ; r12 - table offset, used to calculate the actual part of the table to transform
  23.  
  24.     global  posterize
  25.     section .text
  26.  
  27.  
  28. calculate_progVal:
  29.     ;calculate progging step value
  30.     mov     r10, rdx                ;save level No to r10
  31.     mov     rax, maxByteVal         ;set divisor to 255  
  32.     mov     rdx, 0
  33.    
  34.     div     r10
  35.     mov     r9, rax                 ;copy value to r9 and r10
  36.     mov     r10, r9
  37.     dec     r9                      ;decrese r9 to use it in modulo operation
  38.     shr     r10, 1                  ;divide r10 by 2
  39.  
  40. setUpStartValues:
  41.     mov     r12,  0                 ;set table offset = 0
  42.  
  43.  
  44. posterize:
  45.     cmp     rsi, 0                  ;check if lenght <= 0
  46.     jle     done                    ;go to done if length <= 0
  47.  
  48.  
  49. fill_vector:
  50.     VMOVDQU16   zmm0, [rdi+r12]     ;fill vector with array values
  51.     sub         rsi, wordCounter    ;decrese lenght by vector size
  52.  
  53.  
  54. transform:
  55.     ; add half progging value
  56.     VXORPS      zmm2, zmm2, zmm2    ;make zmm2 = 0
  57.     PSHUFW      zmm2, zmm2, r10     ;copy halfVal as vector to zmm2
  58.                                     ;using shuffling with map
  59.     VPADDW      zmm0, zmm0, zmm2    ;add vector zmm2 and zmm0
  60.  
  61.     ; modulo with progVal - 1
  62.     VXORPS      zmm2, zmm2, zmm2    ;make zmm2 = 0
  63.     PSHUFW      zmm2, zmm2, r9      ;copy progVal-1 as vector to zmm2
  64.                                     ;using shuffling with map
  65.     VPANDQ      zmm1, zmm2, zmm0    ;make modulo operation
  66.  
  67.  
  68.     vpsubw      zmm2, zmm0, zmm1    ;substract zmm0 from zmm1, save in zmm2
  69.  
  70.  
  71. save_values:
  72.     vmovdqu16   [rdi+r12], zmm2     ;save values to array
  73.     add         r12, wordCounter    ;move to the next elements of array
  74.     jmp         posterize           ;go back to posterize
  75.  
  76.  
  77. done:
  78.     ret                             ;end function
  79.  
  80.  
  81.  
  82.     section .data
  83. wordCounter:    db  32
  84. maxByteVal:     db  255
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement