Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- Optimizing x64 assembler MUL loop
- void muladd(unsigned* r, const unsigned* a, unsigned len, unsigned b) {
- unsigned __int64 of = 0; // overflow
- unsigned i = 0; // loop variable
- while (i < len) {
- of += (unsigned __int64)a[i] * b + r[i];
- r[i] = (unsigned)of;
- of >>= 32;
- ++i;
- }
- r[i] = (unsigned)of; // save overflow
- }
- mov rax, rdi ; rdi = b
- mul QWORD PTR [rbx+r10*8-64] ; rdx:rax = a[i] * b; r10 = i
- mov rsi, QWORD PTR [r14+r10*8-64] ; r14 = r; rsi = r[i]
- add rax, rsi
- adc rdx, 0
- add rax, r11 ; r11 = of (low part)
- adc rdx, 0
- mov QWORD PTR [r14+r10*8-64], rax ; save result
- mov r11, rdx
- ; this repeats itself 8 times with different offsets
Add Comment
Please, Sign In to add comment