Advertisement
Guest User

Untitled

a guest
Oct 25th, 2014
235
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.00 KB | None | 0 0
  1. uint64_t div(uint64_t a_lo, uint64_t a_hi, uint64_t b, uint64_t &r)
  2. {
  3. uint64_t p_lo;
  4. uint64_t p_hi;
  5. uint64_t q = 0;
  6.  
  7. auto r_hi = a_hi;
  8. auto r_lo = a_lo;
  9.  
  10. int s = 0;
  11. if(0 == (b >> 63)){
  12.  
  13. // Normalize so quotient estimates are
  14. // no more than 2 in error.
  15.  
  16. // Note: If any bits get shifted out of
  17. // r_hi at this point, the result would
  18. // overflow.
  19.  
  20. s = 63 - bsr(b);
  21. const auto t = 64 - s;
  22.  
  23. b <<= s;
  24. r_hi = (r_hi << s)|(r_lo >> t);
  25. r_lo <<= t;
  26. }
  27.  
  28. const auto b_hi = b >> 32;
  29.  
  30. /*
  31. The first full-by-half division places b
  32. across r_hi and r_lo, making the reduction
  33. step a little complicated.
  34.  
  35. To make this easier, u_hi and u_lo will hold
  36. a shifted image of the remainder.
  37.  
  38. [u_hi|| ][u_lo|| ]
  39. [r_hi|| ][r_lo|| ]
  40. [ b || ]
  41. [p_hi|| ][p_lo|| ]
  42. |
  43. V
  44. [q_hi|| ]
  45. */
  46.  
  47. auto q_hat = r_hi / b_hi;
  48.  
  49. p_lo = mul(b, q_hat, p_hi);
  50.  
  51. const auto u_hi = r_hi >> 32;
  52. const auto u_lo = (r_hi << 32)|(r_lo >> 32);
  53.  
  54. // r -= b*q_hat
  55. //
  56. // At most 2 iterations of this...
  57. while(
  58. (p_hi > u_hi) ||
  59. ((p_hi == u_hi) && (p_lo > u_lo))
  60. )
  61. {
  62. if(p_lo < b){
  63. --p_hi;
  64. }
  65. p_lo -= b;
  66. --q_hat;
  67. }
  68.  
  69. auto w_lo = (p_lo << 32);
  70. auto w_hi = (p_hi << 32)|(p_lo >> 32);
  71.  
  72. if(w_lo > r_lo){
  73. ++w_hi;
  74. }
  75.  
  76. r_lo -= w_lo;
  77. r_hi -= w_hi;
  78.  
  79. q = q_hat << 32;
  80.  
  81. /*
  82. The lower half of the quotient is easier,
  83. as b is now aligned with r_lo.
  84.  
  85. |r_hi][r_lo|| ]
  86. [ b || ]
  87. [p_hi|| ][p_lo|| ]
  88. |
  89. V
  90. [q_hi||q_lo]
  91. */
  92.  
  93. q_hat = ((r_hi << 32)|(r_lo >> 32)) / b_hi;
  94.  
  95. p_lo = mul(b, q_hat, p_hi);
  96.  
  97. // r -= b*q_hat
  98. //
  99. // ...and at most 2 iterations of this.
  100. while(
  101. (p_hi > r_hi) ||
  102. ((p_hi == r_hi) && (p_lo > r_lo))
  103. )
  104. {
  105. if(p_lo < b){
  106. --p_hi;
  107. }
  108. p_lo -= b;
  109. --q_hat;
  110. }
  111.  
  112. r_lo -= p_lo;
  113.  
  114. q |= q_hat;
  115.  
  116. r = r_lo >> s;
  117.  
  118. return q;
  119. }
  120.  
  121. int bsr(uint64_t x)
  122. {
  123. uint64_t y;
  124. uint64_t r;
  125.  
  126. r = (x > 0xFFFFFFFF) << 5; x >>= r;
  127. y = (x > 0xFFFF ) << 4; x >>= y; r |= y;
  128. y = (x > 0xFF ) << 3; x >>= y; r |= y;
  129. y = (x > 0xF ) << 2; x >>= y; r |= y;
  130. y = (x > 0x3 ) << 1; x >>= y; r |= y;
  131.  
  132. return static_cast<int>(r | (x >> 1));
  133. }
  134.  
  135. uint64_t mul(uint64_t a, uint64_t b, uint64_t &y)
  136. {
  137. auto a_lo = a & 0x00000000FFFFFFFF;
  138. auto a_hi = a >> 32;
  139.  
  140. auto b_lo = b & 0x00000000FFFFFFFF;
  141. auto b_hi = b >> 32;
  142.  
  143. auto c0 = a_lo * b_lo;
  144. auto c1 = a_hi * b_lo;
  145. auto c2 = a_hi * b_hi;
  146.  
  147. auto u1 = c1 + (a_lo * b_hi);
  148. if(u1 < _c1){
  149. c2 += 1LL << 32;
  150. }
  151.  
  152. auto u0 = c0 + (u1 << 32);
  153. if(u0 < c0){
  154. ++c2;
  155. }
  156.  
  157. y = c2 + (u1 >> 32);
  158.  
  159. return u0;
  160. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement