Guest User

Untitled

a guest
Aug 14th, 2018
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.76 KB | None | 0 0
  1. Optimizing x64 assembler MUL loop
  2. void muladd(unsigned* r, const unsigned* a, unsigned len, unsigned b) {
  3. unsigned __int64 of = 0; // overflow
  4. unsigned i = 0; // loop variable
  5. while (i < len) {
  6. of += (unsigned __int64)a[i] * b + r[i];
  7. r[i] = (unsigned)of;
  8. of >>= 32;
  9. ++i;
  10. }
  11. r[i] = (unsigned)of; // save overflow
  12. }
  13.  
  14. mov rax, rdi ; rdi = b
  15. mul QWORD PTR [rbx+r10*8-64] ; rdx:rax = a[i] * b; r10 = i
  16. mov rsi, QWORD PTR [r14+r10*8-64] ; r14 = r; rsi = r[i]
  17. add rax, rsi
  18. adc rdx, 0
  19. add rax, r11 ; r11 = of (low part)
  20. adc rdx, 0
  21. mov QWORD PTR [r14+r10*8-64], rax ; save result
  22. mov r11, rdx
  23.  
  24. ; this repeats itself 8 times with different offsets
Add Comment
Please, Sign In to add comment