Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- uint64_t div(uint64_t a_lo, uint64_t a_hi, uint64_t b, uint64_t &r)
- {
- uint64_t p_lo;
- uint64_t p_hi;
- uint64_t q = 0;
- auto r_hi = a_hi;
- auto r_lo = a_lo;
- int s = 0;
- if(0 == (b >> 63)){
- // Normalize so quotient estimates are
- // no more than 2 in error.
- // Note: If any bits get shifted out of
- // r_hi at this point, the result would
- // overflow.
- s = 63 - bsr(b);
- const auto t = 64 - s;
- b <<= s;
- r_hi = (r_hi << s)|(r_lo >> t);
- r_lo <<= t;
- }
- const auto b_hi = b >> 32;
- /*
- The first full-by-half division places b
- across r_hi and r_lo, making the reduction
- step a little complicated.
- To make this easier, u_hi and u_lo will hold
- a shifted image of the remainder.
- [u_hi|| ][u_lo|| ]
- [r_hi|| ][r_lo|| ]
- [ b || ]
- [p_hi|| ][p_lo|| ]
- |
- V
- [q_hi|| ]
- */
- auto q_hat = r_hi / b_hi;
- p_lo = mul(b, q_hat, p_hi);
- const auto u_hi = r_hi >> 32;
- const auto u_lo = (r_hi << 32)|(r_lo >> 32);
- // r -= b*q_hat
- //
- // At most 2 iterations of this...
- while(
- (p_hi > u_hi) ||
- ((p_hi == u_hi) && (p_lo > u_lo))
- )
- {
- if(p_lo < b){
- --p_hi;
- }
- p_lo -= b;
- --q_hat;
- }
- auto w_lo = (p_lo << 32);
- auto w_hi = (p_hi << 32)|(p_lo >> 32);
- if(w_lo > r_lo){
- ++w_hi;
- }
- r_lo -= w_lo;
- r_hi -= w_hi;
- q = q_hat << 32;
- /*
- The lower half of the quotient is easier,
- as b is now aligned with r_lo.
- |r_hi][r_lo|| ]
- [ b || ]
- [p_hi|| ][p_lo|| ]
- |
- V
- [q_hi||q_lo]
- */
- q_hat = ((r_hi << 32)|(r_lo >> 32)) / b_hi;
- p_lo = mul(b, q_hat, p_hi);
- // r -= b*q_hat
- //
- // ...and at most 2 iterations of this.
- while(
- (p_hi > r_hi) ||
- ((p_hi == r_hi) && (p_lo > r_lo))
- )
- {
- if(p_lo < b){
- --p_hi;
- }
- p_lo -= b;
- --q_hat;
- }
- r_lo -= p_lo;
- q |= q_hat;
- r = r_lo >> s;
- return q;
- }
- int bsr(uint64_t x)
- {
- uint64_t y;
- uint64_t r;
- r = (x > 0xFFFFFFFF) << 5; x >>= r;
- y = (x > 0xFFFF ) << 4; x >>= y; r |= y;
- y = (x > 0xFF ) << 3; x >>= y; r |= y;
- y = (x > 0xF ) << 2; x >>= y; r |= y;
- y = (x > 0x3 ) << 1; x >>= y; r |= y;
- return static_cast<int>(r | (x >> 1));
- }
- uint64_t mul(uint64_t a, uint64_t b, uint64_t &y)
- {
- auto a_lo = a & 0x00000000FFFFFFFF;
- auto a_hi = a >> 32;
- auto b_lo = b & 0x00000000FFFFFFFF;
- auto b_hi = b >> 32;
- auto c0 = a_lo * b_lo;
- auto c1 = a_hi * b_lo;
- auto c2 = a_hi * b_hi;
- auto u1 = c1 + (a_lo * b_hi);
- if(u1 < _c1){
- c2 += 1LL << 32;
- }
- auto u0 = c0 + (u1 << 32);
- if(u0 < c0){
- ++c2;
- }
- y = c2 + (u1 >> 32);
- return u0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement