Advertisement
Guest User

Untitled

a guest
Jun 17th, 2019
87
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 12.53 KB | None | 0 0
  1. uint64_t a = xxx, b = yyy;
  2.  
  3. x = a * b;
  4. if (a != 0 && x / a != b) {
  5. // overflow handling
  6. }
  7.  
  8. uint64_t hi(uint64_t x) {
  9. return x >> 32;
  10. }
  11.  
  12. uint64_t lo(uint64_t x) {
  13. return ((1L << 32) - 1) & x;
  14. }
  15.  
  16. void multiply(uint64_t a, uint64_t b) {
  17. // actually uint32_t would do, but the casting is annoying
  18. uint64_t s0, s1, s2, s3;
  19.  
  20. uint64_t x = lo(a) * lo(b);
  21. s0 = lo(x);
  22.  
  23. x = hi(a) * lo(b) + hi(x);
  24. s1 = lo(x);
  25. s2 = hi(x);
  26.  
  27. x = s1 + lo(a) * hi(b);
  28. s1 = lo(x);
  29.  
  30. x = s2 + hi(a) * hi(b) + hi(x);
  31. s2 = lo(x);
  32. s3 = hi(x);
  33.  
  34. uint64_t result = s1 << 32 | s0;
  35. uint64_t carry = s3 << 32 | s2;
  36. }
  37.  
  38. x = s2 + hi(a) * hi(b) + hi(x)
  39.  
  40. x <= (B - 1) + (B - 1)(B - 1) + (B - 1)
  41. <= B*B - 1
  42. < B*B
  43.  
  44. if (b > 0 && a > 18446744073709551615 / b) {
  45. // overflow handling
  46. }; else {
  47. c = a * b;
  48. }
  49.  
  50. 18446744073709551615 == (1<<64)-1
  51.  
  52. // split input numbers into 32-bit digits
  53. uint64_t a0 = a & ((1LL<<32)-1);
  54. uint64_t a1 = a >> 32;
  55. uint64_t b0 = b & ((1LL<<32)-1);
  56. uint64_t b1 = b >> 32;
  57.  
  58.  
  59. // The following 3 lines of code is to calculate the carry of d1
  60. // (d1 - 32-bit second digit of result, and it can be calculated as d1=d11+d12),
  61. // but to avoid overflow.
  62. // Actually rewriting the following 2 lines:
  63. // uint64_t d1 = (a0 * b0 >> 32) + a1 * b0 + a0 * b1;
  64. // uint64_t c1 = d1 >> 32;
  65. uint64_t d11 = a1 * b0 + (a0 * b0 >> 32);
  66. uint64_t d12 = a0 * b1;
  67. uint64_t c1 = (d11 > 18446744073709551615 - d12) ? 1 : 0;
  68.  
  69. uint64_t d2 = a1 * b1 + c1;
  70. uint64_t carry = d2; // needed carry stored here
  71.  
  72. /* Multiply with overflow checking, emulating clang's builtin function
  73. *
  74. * __builtin_umull_overflow
  75. *
  76. * This code benchmarks five possible schemes for doing so.
  77. */
  78.  
  79. #include <stddef.h>
  80. #include <stdio.h>
  81. #include <stdlib.h>
  82. #include <stdint.h>
  83. #include <limits.h>
  84.  
  85. #ifndef BOOL
  86. #define BOOL int
  87. #endif
  88.  
  89. // Option 1, check for overflow a wider type
  90. // - Often fastest and the least code, especially on modern compilers
  91. // - When long is a 64-bit int, requires compiler support for 128-bits
  92. // ints (requires GCC >= 3.0 or Clang)
  93.  
  94. #if LONG_BIT > 32
  95. typedef __uint128_t long_overflow_t ;
  96. #else
  97. typedef uint64_t long_overflow_t;
  98. #endif
  99.  
  100. BOOL
  101. umull_overflow1(unsigned long lhs, unsigned long rhs, unsigned long* result)
  102. {
  103. long_overflow_t prod = (long_overflow_t)lhs * (long_overflow_t)rhs;
  104. *result = (unsigned long) prod;
  105. return (prod >> LONG_BIT) != 0;
  106. }
  107.  
  108. // Option 2, perform long multiplication using a smaller type
  109. // - Sometimes the fastest (e.g., when mulitply on longs is a library
  110. // call).
  111. // - Performs at most three multiplies, and sometimes only performs one.
  112. // - Highly portable code; works no matter how many bits unsigned long is
  113.  
  114. BOOL
  115. umull_overflow2(unsigned long lhs, unsigned long rhs, unsigned long* result)
  116. {
  117. const unsigned long HALFSIZE_MAX = (1ul << LONG_BIT/2) - 1ul;
  118. unsigned long lhs_high = lhs >> LONG_BIT/2;
  119. unsigned long lhs_low = lhs & HALFSIZE_MAX;
  120. unsigned long rhs_high = rhs >> LONG_BIT/2;
  121. unsigned long rhs_low = rhs & HALFSIZE_MAX;
  122.  
  123. unsigned long bot_bits = lhs_low * rhs_low;
  124. if (!(lhs_high || rhs_high)) {
  125. *result = bot_bits;
  126. return 0;
  127. }
  128. BOOL overflowed = lhs_high && rhs_high;
  129. unsigned long mid_bits1 = lhs_low * rhs_high;
  130. unsigned long mid_bits2 = lhs_high * rhs_low;
  131.  
  132. *result = bot_bits + ((mid_bits1+mid_bits2) << LONG_BIT/2);
  133. return overflowed || *result < bot_bits
  134. || (mid_bits1 >> LONG_BIT/2) != 0
  135. || (mid_bits2 >> LONG_BIT/2) != 0;
  136. }
  137.  
  138. // Option 3, perform long multiplication using a smaller type (this code is
  139. // very similar to option 2, but calculates overflow using a different but
  140. // equivalent method).
  141. // - Sometimes the fastest (e.g., when mulitply on longs is a library
  142. // call; clang likes this code).
  143. // - Performs at most three multiplies, and sometimes only performs one.
  144. // - Highly portable code; works no matter how many bits unsigned long is
  145.  
  146. BOOL
  147. umull_overflow3(unsigned long lhs, unsigned long rhs, unsigned long* result)
  148. {
  149. const unsigned long HALFSIZE_MAX = (1ul << LONG_BIT/2) - 1ul;
  150. unsigned long lhs_high = lhs >> LONG_BIT/2;
  151. unsigned long lhs_low = lhs & HALFSIZE_MAX;
  152. unsigned long rhs_high = rhs >> LONG_BIT/2;
  153. unsigned long rhs_low = rhs & HALFSIZE_MAX;
  154.  
  155. unsigned long lowbits = lhs_low * rhs_low;
  156. if (!(lhs_high || rhs_high)) {
  157. *result = lowbits;
  158. return 0;
  159. }
  160. BOOL overflowed = lhs_high && rhs_high;
  161. unsigned long midbits1 = lhs_low * rhs_high;
  162. unsigned long midbits2 = lhs_high * rhs_low;
  163. unsigned long midbits = midbits1 + midbits2;
  164. overflowed = overflowed || midbits < midbits1 || midbits > HALFSIZE_MAX;
  165. unsigned long product = lowbits + (midbits << LONG_BIT/2);
  166. overflowed = overflowed || product < lowbits;
  167.  
  168. *result = product;
  169. return overflowed;
  170. }
  171.  
  172. // Option 4, checks for overflow using division
  173. // - Checks for overflow using division
  174. // - Division is slow, especially if it is a library call
  175.  
  176. BOOL
  177. umull_overflow4(unsigned long lhs, unsigned long rhs, unsigned long* result)
  178. {
  179. *result = lhs * rhs;
  180. return rhs > 0 && (SIZE_MAX / rhs) < lhs;
  181. }
  182.  
  183. // Option 5, checks for overflow using division
  184. // - Checks for overflow using division
  185. // - Avoids division when the numbers are "small enough" to trivially
  186. // rule out overflow
  187. // - Division is slow, especially if it is a library call
  188.  
  189. BOOL
  190. umull_overflow5(unsigned long lhs, unsigned long rhs, unsigned long* result)
  191. {
  192. const unsigned long MUL_NO_OVERFLOW = (1ul << LONG_BIT/2) - 1ul;
  193. *result = lhs * rhs;
  194. return (lhs >= MUL_NO_OVERFLOW || rhs >= MUL_NO_OVERFLOW) &&
  195. rhs > 0 && SIZE_MAX / rhs < lhs;
  196. }
  197.  
  198. #ifndef umull_overflow
  199. #define umull_overflow2
  200. #endif
  201.  
  202. /*
  203. * This benchmark code performs a multiply at all bit sizes,
  204. * essentially assuming that sizes are logarithmically distributed.
  205. */
  206.  
  207. int main()
  208. {
  209. unsigned long i, j, k;
  210. int count = 0;
  211. unsigned long mult;
  212. unsigned long total = 0;
  213.  
  214. for (k = 0; k < 0x40000000 / LONG_BIT / LONG_BIT; ++k)
  215. for (i = 0; i != LONG_MAX; i = i*2+1)
  216. for (j = 0; j != LONG_MAX; j = j*2+1) {
  217. count += umull_overflow(i+k, j+k, &mult);
  218. total += mult;
  219. }
  220. printf("%d overflows (total %lu)n", count, total);
  221. }
  222.  
  223. +------------------+----------+----------+----------+----------+----------+
  224. | | Option 1 | Option 2 | Option 3 | Option 4 | Option 5 |
  225. | | BigInt | LngMult1 | LngMult2 | Div | OptDiv |
  226. +------------------+----------+----------+----------+----------+----------+
  227. | Clang 3.5 i386 | 1.610 | 3.217 | 3.129 | 4.405 | 4.398 |
  228. | GCC 4.9.0 i386 | 1.488 | 3.469 | 5.853 | 4.704 | 4.712 |
  229. | GCC 4.2.1 i386 | 2.842 | 4.022 | 3.629 | 4.160 | 4.696 |
  230. | GCC 4.2.1 PPC32 | 8.227 | 7.756 | 7.242 | 20.632 | 20.481 |
  231. | GCC 3.3 PPC32 | 5.684 | 9.804 | 11.525 | 21.734 | 22.517 |
  232. +------------------+----------+----------+----------+----------+----------+
  233. | Clang 3.5 x86_64 | 1.584 | 2.472 | 2.449 | 9.246 | 7.280 |
  234. | GCC 4.9 x86_64 | 1.414 | 2.623 | 4.327 | 9.047 | 7.538 |
  235. | GCC 4.2.1 x86_64 | 2.143 | 2.618 | 2.750 | 9.510 | 7.389 |
  236. | GCC 4.2.1 PPC64 | 13.178 | 8.994 | 8.567 | 37.504 | 29.851 |
  237. +------------------+----------+----------+----------+----------+----------+
  238.  
  239. x = a * b;
  240. if (a != 0 && x / a != b) {
  241. // overflow handling
  242. }
  243.  
  244. #include <stdint.h>
  245.  
  246. uint64_t mul(uint64_t a, uint64_t b) {
  247. uint32_t ah = a >> 32;
  248. uint32_t al = a; // truncates: now a = al + 2**32 * ah
  249. uint32_t bh = b >> 32;
  250. uint32_t bl = b; // truncates: now b = bl + 2**32 * bh
  251. // a * b = 2**64 * ah * bh + 2**32 * (ah * bl + bh * al) + al * bl
  252. uint64_t partial = (uint64_t) al * (uint64_t) bl;
  253. uint64_t mid1 = (uint64_t) ah * (uint64_t) bl;
  254. uint64_t mid2 = (uint64_t) al * (uint64_t) bh;
  255. uint64_t carry = (uint64_t) ah * (uint64_t) bh;
  256. // add high parts of mid1 and mid2 to carry
  257. // add low parts of mid1 and mid2 to partial, carrying
  258. // any carry bits into carry...
  259. }
  260.  
  261. struct INT32struct {INT16 high, low;};
  262. typedef union
  263. {
  264. struct INT32struct s;
  265. INT32 ll;
  266. } INT32union;
  267.  
  268. INT16 mulFunction(INT16 a, INT16 b)
  269. {
  270. INT32union result.ll = a * b; //32Bits result
  271. if(result.s.high > 0)
  272. Overflow();
  273. return (result.s.low)
  274. }
  275.  
  276. INT32 mulFunction(INT32 a, INT32 b)
  277. {
  278.  
  279. INT32union s_a.ll = abs(a);
  280. INT32union s_b.ll = abs(b); //32Bits result
  281. INT32union result;
  282. if(s_a.s.hi > 0 && s_b.s.hi > 0)
  283. {
  284. Overflow();
  285. }
  286. else if (s_a.s.hi > 0)
  287. {
  288. INT32union res1.ll = s_a.s.hi * s_b.s.lo;
  289. INT32union res2.ll = s_a.s.lo * s_b.s.lo;
  290. if (res1.hi == 0)
  291. {
  292. result.s.lo = res1.s.lo + res2.s.hi;
  293. if (result.s.hi == 0)
  294. {
  295. result.s.ll = result.s.lo << 16 + res2.s.lo;
  296. if ((a.s.hi >> 15) ^ (b.s.hi >> 15) == 1)
  297. {
  298. result.s.ll = -result.s.ll;
  299. }
  300. return result.s.ll
  301. }else
  302. {
  303. Overflow();
  304. }
  305. }else
  306. {
  307. Overflow();
  308. }
  309. }else if (s_b.s.hi > 0)
  310. {
  311.  
  312. //Same code changing a with b
  313.  
  314. }else
  315. {
  316. return (s_a.lo * s_b.lo);
  317. }
  318. }
  319.  
  320. func hex128 (_ hi: UInt64, _ lo: UInt64) -> String
  321. {
  322. var s: String = String(format: "%08X", hi >> 32)
  323. + String(format: "%08X", hi & 0xFFFFFFFF)
  324. + String(format: "%08X", lo >> 32)
  325. + String(format: "%08X", lo & 0xFFFFFFFF)
  326. return (s)
  327. }
  328.  
  329. func mul64to128 (_ multiplier: UInt64, _ multiplicand : UInt64)
  330. -> (result_hi: UInt64, result_lo: UInt64)
  331. {
  332. let x: UInt64 = multiplier
  333. let x_lo: UInt64 = (x & 0xffffffff)
  334. let x_hi: UInt64 = x >> 32
  335.  
  336. let y: UInt64 = multiplicand
  337. let y_lo: UInt64 = (y & 0xffffffff)
  338. let y_hi: UInt64 = y >> 32
  339.  
  340. let mul_lo: UInt64 = (x_lo * y_lo)
  341. let mul_hi: UInt64 = (x_hi * y_lo) + (mul_lo >> 32)
  342. let mul_carry: UInt64 = (x_lo * y_hi) + (mul_hi & 0xffffffff)
  343. let result_hi: UInt64 = (x_hi * y_hi) + (mul_hi >> 32) + (mul_carry >> 32)
  344. let result_lo: UInt64 = (mul_carry << 32) + (mul_lo & 0xffffffff)
  345.  
  346. return (result_hi, result_lo)
  347. }
  348.  
  349. var c: UInt64 = 0
  350. var d: UInt64 = 0
  351.  
  352. (c, d) = mul64to128(0x1234567890123456, 0x9876543210987654)
  353. // 0AD77D742CE3C72E45FD10D81D28D038 is the result of the above example
  354. print(hex128(c, d))
  355.  
  356. (c, d) = mul64to128(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF)
  357. // FFFFFFFFFFFFFFFE0000000000000001 is the result of the above example
  358. print(hex128(c, d))
  359.  
  360. #include <stdlib.h>
  361. #include <stdio.h>
  362.  
  363. int might_be_mul_oflow(unsigned long a, unsigned long b)
  364. {
  365. if (!a || !b)
  366. return 0;
  367.  
  368. a = a | (a >> 1) | (a >> 2) | (a >> 4) | (a >> 8) | (a >> 16) | (a >> 32);
  369. b = b | (b >> 1) | (b >> 2) | (b >> 4) | (b >> 8) | (b >> 16) | (b >> 32);
  370.  
  371. for (;;) {
  372. unsigned long na = a << 1;
  373. if (na <= a)
  374. break;
  375. a = na;
  376. }
  377.  
  378. return (a & b) ? 1 : 0;
  379. }
  380.  
  381. int main(int argc, char **argv)
  382. {
  383. unsigned long a, b;
  384. char *endptr;
  385.  
  386. if (argc < 3) {
  387. printf("supply two unsigned long integers in C formn");
  388. return EXIT_FAILURE;
  389. }
  390.  
  391. a = strtoul(argv[1], &endptr, 0);
  392.  
  393. if (*endptr != 0) {
  394. printf("%s is garbagen", argv[1]);
  395. return EXIT_FAILURE;
  396. }
  397.  
  398. b = strtoul(argv[2], &endptr, 0);
  399.  
  400. if (*endptr != 0) {
  401. printf("%s is garbagen", argv[2]);
  402. return EXIT_FAILURE;
  403. }
  404.  
  405. if (might_be_mul_oflow(a, b))
  406. printf("might be multiplication overflown");
  407.  
  408. {
  409. unsigned long c = a * b;
  410. printf("%lu * %lu = %lun", a, b, c);
  411. if (a != 0 && c / a != b)
  412. printf("confirmed multiplication overflown");
  413. }
  414.  
  415. return 0;
  416. }
  417.  
  418. might_be_mul_oflow
  419.  
  420. int might_be_mul_oflow(unsigned long a, unsigned long b)
  421. {
  422. if (!a || !b)
  423. return 0;
  424.  
  425. {
  426. unsigned long arng = ULONG_MAX >> 1;
  427. unsigned long brng = 1;
  428.  
  429. while (arng != 0) {
  430. if (a <= arng && b <= brng)
  431. return 0;
  432. arng >>= 1;
  433. brng <<= 1;
  434. brng |= 1;
  435. }
  436.  
  437. return 1;
  438. }
  439. }
  440.  
  441. int64_t safemult(int64_t a, int64_t b) {
  442. double dx;
  443.  
  444. dx = (double)a * (double)b;
  445.  
  446. if ( fabs(dx) < (double)9007199254740992 )
  447. return (int64_t)dx;
  448.  
  449. if ( (double)INT64_MAX < fabs(dx) )
  450. return INT64_MAX;
  451.  
  452. return a*b;
  453. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement