cae7291

vecglo routines

Mar 2nd, 2016
212
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.08 KB | None | 0 0
  1. // numvamp/vecglo -- vector routines -- autogenerated -- DO NOT EDIT
  2. #include <numvamp/numvamp.h>
  3.  
  4. void
  5. xtnxmmadd3(vdigmsk_p dst,vdigmsk_pc s1,vdigmsk_pc s2)
  6. {
  7. #if XMMCIN
  8. long vidx = 0;
  9. #endif
  10.  
  11. zprt(ZPXHOWPYG,"xtnxmmadd3: ENTER\n");
  12.  
  13. zprt(ZPXHOWPYG,"xtnxmmadd3: SR1 %s\n",mtxvdmp("sr1",s1,VORDMAX));
  14. zprt(ZPXHOWPYG,"xtnxmmadd3: SR2 %s\n",mtxvdmp("sr2",s2,VORDMAX));
  15.  
  16. __asm__ __volatile__(
  17. // load our constants once
  18. " movdqu %[mmnine],%[rgnine]\n" // get the limit
  19. " movdqu %[mmcout],%[rgcout]\n" // get the carry out
  20.  
  21. #if XMMCIN
  22. " pxor %[rgcin],%[rgcin]\n" // get the carry in
  23. #endif
  24.  
  25. // loop on double quad words
  26. "1:\n"
  27. AVL(s1,rgsum) // get s1
  28. AVL(s2,rgcmp) // get s2
  29. " paddb %[rgcmp],%[rgsum]\n" // s1 += s2
  30.  
  31. #if XMMCIN
  32. " paddb %[rgcin],%[rgsum]\n" // s1 += cin
  33. #endif
  34.  
  35. // ripple carry loop
  36. "2:\n"
  37. " movdqu %[rgsum],%[rgcmp]\n" // put sum in compare reg
  38.  
  39. " pcmpgtb %[rgnine],%[rgcmp]\n" // compare: sum gt 9
  40. " ptest %[rgcmp],%[rgcmp]\n" // did we carry out?
  41. " jz 3f\n" // no, fly
  42.  
  43. " movdqu %[rgnine],%[rgten]\n" // get 9 array
  44. " paddb %[rgcout],%[rgten]\n" // get 10 array
  45.  
  46. " pand %[rgcmp],%[rgten]\n" // get the modulus mask
  47. " psubb %[rgten],%[rgsum]\n" // apply modulus to sum
  48.  
  49. #if XMMCIN
  50. " movdqu %[rgcmp],%[rgcin]\n" // save carry in for next
  51. #endif
  52.  
  53. " pslldq $1,%[rgcmp]\n" // create mask for carry out
  54. " pand %[rgcout],%[rgcmp]\n" // convert it to ones
  55.  
  56. " paddb %[rgcmp],%[rgsum]\n" // add carry to sum
  57. " jmp 2b\n" // loop until no more carry
  58.  
  59. // we've complete ripple carry
  60. "3:\n"
  61. AVS(rgsum,dst) // store final result
  62.  
  63. #if XMMCIN
  64. " add %[vidxinc],%[rgvidx]\n" // increment the index reg
  65. " cmp %[vidxmax],%[rgvidx]\n" // any more to do?
  66. " jge 7f\n" // no, fly
  67.  
  68. " psrldq $15,%[rgcin]\n" // create mask for carry in
  69. " pand %[rgcout],%[rgcin]\n" // convert it to ones
  70.  
  71. " jmp 1b\n" // do next double quad
  72. #else
  73. " jmp 7f\n" // go to the exit
  74. #endif
  75.  
  76. : [rgnine] "=&x" (xmmtmp_0),
  77. [rgcout] "=&x" (xmmtmp_0),
  78. [rgcin] "=&x" (xmmtmp_0),
  79. [rgcmp] "=&x" (xmmtmp_0),
  80. [rgsum] "=&x" (xmmtmp_0),
  81. [rgten] "=&x" (xmmtmp_0)
  82.  
  83. : [dst] "r" (dst),
  84. [s1] "r" (s1),
  85. [s2] "r" (s2),
  86. #if XMMCIN
  87. [rgvidx] "r" (vidx),
  88. [vidxinc] "n" (VORDXMM),
  89. [vidxmax] "n" (VORDMAX),
  90. #endif
  91. [mmnine] "m" (xmm_nine),
  92. [mmcout] "m" (xmm_cry)
  93.  
  94. : /*"memory"*/);
  95.  
  96. // our exit
  97. __asm__ __volatile__(
  98. "7:\n"
  99. :
  100. :
  101. : );
  102.  
  103. zprt(ZPXHOWPYG,"xtnxmmadd3: RTN %s\n",mtxvdmp("dst",dst,VORDMAX));
  104.  
  105. zprt(ZPXHOWPYG,"xtnxmmadd3: EXIT\n");
  106. }
  107.  
  108. void
  109. xtnxmmrgt(vdigmsk_p dst,vdigmsk_pc s1)
  110. {
  111.  
  112. zprt(ZPXHOWPYG,"xtnxmmrgt: ENTER %s\n",mtxvdmp("s1",s1,VORDMAX));
  113.  
  114. __asm__ __volatile__(
  115. " movdqu (%[s1]),%[rgsum]\n" // get s1
  116. " pslldq $1,%[rgsum]\n" // create carry out
  117. " movdqu %[rgsum],(%[dst])\n" // store final result
  118.  
  119. : [rgsum] "=x" (xmmtmp_0)
  120.  
  121. : [dst] "r" (dst),
  122. [s1] "r" (s1)
  123.  
  124. : "memory");
  125.  
  126. zprt(ZPXHOWPYG,"xtnxmmrgt: EXIT %s\n",mtxvdmp("dst",dst,VORDMAX));
  127. }
Advertisement
Add Comment
Please, Sign In to add comment