Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // numvamp/vecglo -- vector routines -- autogenerated -- DO NOT EDIT
- #include <numvamp/numvamp.h>
- void
- xtnxmmadd3(vdigmsk_p dst,vdigmsk_pc s1,vdigmsk_pc s2)
- {
- #if XMMCIN
- long vidx = 0;
- #endif
- zprt(ZPXHOWPYG,"xtnxmmadd3: ENTER\n");
- zprt(ZPXHOWPYG,"xtnxmmadd3: SR1 %s\n",mtxvdmp("sr1",s1,VORDMAX));
- zprt(ZPXHOWPYG,"xtnxmmadd3: SR2 %s\n",mtxvdmp("sr2",s2,VORDMAX));
- __asm__ __volatile__(
- // load our constants once
- " movdqu %[mmnine],%[rgnine]\n" // get the limit
- " movdqu %[mmcout],%[rgcout]\n" // get the carry out
- #if XMMCIN
- " pxor %[rgcin],%[rgcin]\n" // get the carry in
- #endif
- // loop on double quad words
- "1:\n"
- AVL(s1,rgsum) // get s1
- AVL(s2,rgcmp) // get s2
- " paddb %[rgcmp],%[rgsum]\n" // s1 += s2
- #if XMMCIN
- " paddb %[rgcin],%[rgsum]\n" // s1 += cin
- #endif
- // ripple carry loop
- "2:\n"
- " movdqu %[rgsum],%[rgcmp]\n" // put sum in compare reg
- " pcmpgtb %[rgnine],%[rgcmp]\n" // compare: sum gt 9
- " ptest %[rgcmp],%[rgcmp]\n" // did we carry out?
- " jz 3f\n" // no, fly
- " movdqu %[rgnine],%[rgten]\n" // get 9 array
- " paddb %[rgcout],%[rgten]\n" // get 10 array
- " pand %[rgcmp],%[rgten]\n" // get the modulus mask
- " psubb %[rgten],%[rgsum]\n" // apply modulus to sum
- #if XMMCIN
- " movdqu %[rgcmp],%[rgcin]\n" // save carry in for next
- #endif
- " pslldq $1,%[rgcmp]\n" // create mask for carry out
- " pand %[rgcout],%[rgcmp]\n" // convert it to ones
- " paddb %[rgcmp],%[rgsum]\n" // add carry to sum
- " jmp 2b\n" // loop until no more carry
- // we've complete ripple carry
- "3:\n"
- AVS(rgsum,dst) // store final result
- #if XMMCIN
- " add %[vidxinc],%[rgvidx]\n" // increment the index reg
- " cmp %[vidxmax],%[rgvidx]\n" // any more to do?
- " jge 7f\n" // no, fly
- " psrldq $15,%[rgcin]\n" // create mask for carry in
- " pand %[rgcout],%[rgcin]\n" // convert it to ones
- " jmp 1b\n" // do next double quad
- #else
- " jmp 7f\n" // go to the exit
- #endif
- : [rgnine] "=&x" (xmmtmp_0),
- [rgcout] "=&x" (xmmtmp_0),
- [rgcin] "=&x" (xmmtmp_0),
- [rgcmp] "=&x" (xmmtmp_0),
- [rgsum] "=&x" (xmmtmp_0),
- [rgten] "=&x" (xmmtmp_0)
- : [dst] "r" (dst),
- [s1] "r" (s1),
- [s2] "r" (s2),
- #if XMMCIN
- [rgvidx] "r" (vidx),
- [vidxinc] "n" (VORDXMM),
- [vidxmax] "n" (VORDMAX),
- #endif
- [mmnine] "m" (xmm_nine),
- [mmcout] "m" (xmm_cry)
- : /*"memory"*/);
- // our exit
- __asm__ __volatile__(
- "7:\n"
- :
- :
- : );
- zprt(ZPXHOWPYG,"xtnxmmadd3: RTN %s\n",mtxvdmp("dst",dst,VORDMAX));
- zprt(ZPXHOWPYG,"xtnxmmadd3: EXIT\n");
- }
- void
- xtnxmmrgt(vdigmsk_p dst,vdigmsk_pc s1)
- {
- zprt(ZPXHOWPYG,"xtnxmmrgt: ENTER %s\n",mtxvdmp("s1",s1,VORDMAX));
- __asm__ __volatile__(
- " movdqu (%[s1]),%[rgsum]\n" // get s1
- " pslldq $1,%[rgsum]\n" // create carry out
- " movdqu %[rgsum],(%[dst])\n" // store final result
- : [rgsum] "=x" (xmmtmp_0)
- : [dst] "r" (dst),
- [s1] "r" (s1)
- : "memory");
- zprt(ZPXHOWPYG,"xtnxmmrgt: EXIT %s\n",mtxvdmp("dst",dst,VORDMAX));
- }
Advertisement
Add Comment
Please, Sign In to add comment