Guest User

Untitled

a guest
Feb 21st, 2018
83
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 11.98 KB | None | 0 0
  1. --- mp_word.d Sat Feb 11 16:48:58 2017
  2. +++ mp_word.d Thu Feb 8 20:20:23 2018
  3. @@ -215,111 +215,6 @@
  4. */
  5. word word8_add3(ref word[8] z, const ref word[8] x, const ref word[8] y, word carry)
  6. {
  7. - version(D_InlineAsm_X86_64) {
  8. -
  9. - word* _z = z.ptr;
  10. - clearMem(_z, z.length);
  11. - word* _x = cast(word*)x.ptr;
  12. - word* _y = cast(word*)y.ptr;
  13. - word* _carry = &carry;
  14. - asm pure nothrow @nogc {
  15. -
  16. - mov RBX,_x;
  17. - mov RSI,_y;
  18. - mov RDI,_z;
  19. - mov RCX,_carry;
  20. - xor RAX,RAX;
  21. - sub RAX,[RCX]; //force CF=1 iff *carry==1
  22. - mov RAX,[RBX];
  23. - adc RAX,[RSI];
  24. - mov [RDI],RAX;
  25. -
  26. - mov RAX,[RBX+8];
  27. - adc RAX,[RSI+8];
  28. - mov [RDI+8],RAX;
  29. -
  30. - mov RAX,[RBX+16];
  31. - adc RAX,[RSI+16];
  32. - mov [RDI+16],RAX;
  33. -
  34. - mov RAX,[RBX+24];
  35. - adc RAX,[RSI+24];
  36. - mov [RDI+24],RAX;
  37. -
  38. - mov RAX,[RBX+32];
  39. - adc RAX,[RSI+32];
  40. - mov [RDI+32],RAX;
  41. -
  42. - mov RAX,[RBX+40];
  43. - adc RAX,[RSI+40];
  44. - mov [RDI+40],RAX;
  45. -
  46. - mov RAX,[RBX+48];
  47. - adc RAX,[RSI+48];
  48. - mov [RDI+48],RAX;
  49. -
  50. - mov RAX,[RBX+56];
  51. - adc RAX,[RSI+56];
  52. - mov [RDI+56],RAX;
  53. -
  54. - sbb RAX,RAX;
  55. - neg RAX;
  56. - mov carry, RAX;
  57. - }
  58. - return carry;
  59. - } else version (D_InlineAsm_X86) {
  60. - word* _z = z.ptr;
  61. - clearMem(_z, z.length);
  62. - word* _x = cast(word*)x.ptr;
  63. - word* _y = cast(word*)y.ptr;
  64. - word* _carry = &carry;
  65. - asm pure nothrow @nogc {
  66. -
  67. - mov EBX,_x;
  68. - mov ESI,_y;
  69. - mov EDI,_z;
  70. - mov ECX,_carry;
  71. - xor EAX,EAX;
  72. - sub EAX,[ECX]; //force CF=1 iff *carry==1
  73. - mov EAX,[EBX];
  74. - adc EAX,[ESI];
  75. - mov [EDI],EAX;
  76. -
  77. - mov EAX,[EBX+4];
  78. - adc EAX,[ESI+4];
  79. - mov [EDI+4],EAX;
  80. -
  81. - mov EAX,[EBX+8];
  82. - adc EAX,[ESI+8];
  83. - mov [EDI+8],EAX;
  84. -
  85. - mov EAX,[EBX+12];
  86. - adc EAX,[ESI+12];
  87. - mov [EDI+12],EAX;
  88. -
  89. - mov EAX,[EBX+16];
  90. - adc EAX,[ESI+16];
  91. - mov [EDI+16],EAX;
  92. -
  93. - mov EAX,[EBX+20];
  94. - adc EAX,[ESI+20];
  95. - mov [EDI+20],EAX;
  96. -
  97. - mov EAX,[EBX+24];
  98. - adc EAX,[ESI+24];
  99. - mov [EDI+24],EAX;
  100. -
  101. - mov EAX,[EBX+28];
  102. - adc EAX,[ESI+28];
  103. - mov [EDI+28],EAX;
  104. -
  105. - sbb EAX,EAX;
  106. - neg EAX;
  107. - mov carry, EAX;
  108. - }
  109. - return carry;
  110. - }
  111. - else {
  112. z[0] = word_add(x[0], y[0], &carry);
  113. z[1] = word_add(x[1], y[1], &carry);
  114. z[2] = word_add(x[2], y[2], &carry);
  115. @@ -329,7 +224,6 @@
  116. z[6] = word_add(x[6], y[6], &carry);
  117. z[7] = word_add(x[7], y[7], &carry);
  118. return carry;
  119. - }
  120. }
  121.  
  122. /*
  123. @@ -349,96 +243,6 @@
  124. */
  125. word word8_sub2(ref word[8] x, const ref word[8] y, word carry)
  126. {
  127. - version(D_InlineAsm_X86_64) {
  128. - word* _x = x.ptr;
  129. - word[8] ret;
  130. - word* _z = ret.ptr;
  131. - word* _y = cast(word*)y.ptr;
  132. - word* _carry = &carry;
  133. - asm pure nothrow @nogc {
  134. - mov RBX,_x;
  135. - mov RSI,_y;
  136. - mov RDI, _z;
  137. - mov RCX,_carry;
  138. - xor RAX,RAX;
  139. - sub RAX,[RCX]; //force CF=1 iff *carry==1
  140. - mov RAX,[RBX];
  141. - sbb RAX,[RSI];
  142. - mov [RDI],RAX;
  143. - mov RAX,[RBX+8];
  144. - sbb RAX,[RSI+8];
  145. - mov [RDI+8],RAX;
  146. - mov RAX,[RBX+16];
  147. - sbb RAX,[RSI+16];
  148. - mov [RDI+16],RAX;
  149. - mov RAX,[RBX+24];
  150. - sbb RAX,[RSI+24];
  151. - mov [RDI+24],RAX;
  152. - mov RAX,[RBX+32];
  153. - sbb RAX,[RSI+32];
  154. - mov [RDI+32],RAX;
  155. - mov RAX,[RBX+40];
  156. - sbb RAX,[RSI+40];
  157. - mov [RDI+40],RAX;
  158. - mov RAX,[RBX+48];
  159. - sbb RAX,[RSI+48];
  160. - mov [RDI+48],RAX;
  161. - mov RAX,[RBX+56];
  162. - sbb RAX,[RSI+56];
  163. - mov [RDI+56],RAX;
  164. - sbb RAX,RAX;
  165. - neg RAX;
  166. - mov carry, RAX;
  167. - }
  168. - x[0 .. 8] = ret[0 .. 8];
  169. - return carry;
  170. -
  171. - }
  172. - else version (D_InlineAsm_X86) {
  173. - word* _x = x.ptr;
  174. - word* _y = cast(word*)y.ptr;
  175. - word[8] ret;
  176. - word* _z = ret.ptr;
  177. - word* _carry = &carry;
  178. - asm pure nothrow @nogc {
  179. - mov EBX,_x;
  180. - mov EDI,_z;
  181. - mov ESI,_y;
  182. - mov ECX,_carry;
  183. - xor EAX,EAX;
  184. - sub EAX,[ECX]; //force CF=1 iff *carry==1
  185. - mov EAX,[EBX];
  186. - sbb EAX,[ESI];
  187. - mov [EDI],EAX;
  188. - mov EAX,[EBX+4];
  189. - sbb EAX,[ESI+4];
  190. - mov [EDI+4],EAX;
  191. - mov EAX,[EBX+8];
  192. - sbb EAX,[ESI+8];
  193. - mov [EDI+8],EAX;
  194. - mov EAX,[EBX+12];
  195. - sbb EAX,[ESI+12];
  196. - mov [EDI+12],EAX;
  197. - mov EAX,[EBX+16];
  198. - sbb EAX,[ESI+16];
  199. - mov [EDI+16],EAX;
  200. - mov EAX,[EBX+20];
  201. - sbb EAX,[ESI+20];
  202. - mov [EDI+20],EAX;
  203. - mov EAX,[EBX+24];
  204. - sbb EAX,[ESI+24];
  205. - mov [EDI+24],EAX;
  206. - mov EAX,[EBX+28];
  207. - sbb EAX,[ESI+28];
  208. - mov [EDI+28],EAX;
  209. - sbb EAX,EAX;
  210. - neg EAX;
  211. - mov carry, EAX;
  212. - }
  213. - x[0 .. 8] = ret[0 .. 8];
  214. - return carry;
  215. -
  216. - } else {
  217. x[0] = word_sub(x[0], y[0], &carry);
  218. x[1] = word_sub(x[1], y[1], &carry);
  219. x[2] = word_sub(x[2], y[2], &carry);
  220. @@ -448,7 +252,6 @@
  221. x[6] = word_sub(x[6], y[6], &carry);
  222. x[7] = word_sub(x[7], y[7], &carry);
  223. return carry;
  224. - }
  225. }
  226.  
  227. /*
  228. @@ -472,93 +275,6 @@
  229. */
  230. word word8_sub3(ref word[8] z, const ref word[8] x, const ref word[8] y, word carry)
  231. {
  232. - version(D_InlineAsm_X86_64) {
  233. - word* _z = z.ptr;
  234. - clearMem(_z, z.length);
  235. -
  236. - word* _x = cast(word*)x.ptr;
  237. - word* _y = cast(word*)y.ptr;
  238. - word* _carry = &carry;
  239. - asm pure nothrow @nogc {
  240. - mov RBX,_x;
  241. - mov RSI,_y;
  242. - mov RCX,_carry;
  243. - xor RAX,RAX;
  244. - sub RAX,[RCX]; //force CF=1 iff *carry==1
  245. - mov RDI,_z;
  246. - mov RAX,[RBX];
  247. - sbb RAX,[RSI];
  248. - mov [RDI],RAX;
  249. - mov RAX,[RBX+8];
  250. - sbb RAX,[RSI+8];
  251. - mov [RDI+8],RAX;
  252. - mov RAX,[RBX+16];
  253. - sbb RAX,[RSI+16];
  254. - mov [RDI+16],RAX;
  255. - mov RAX,[RBX+24];
  256. - sbb RAX,[RSI+24];
  257. - mov [RDI+24],RAX;
  258. - mov RAX,[RBX+32];
  259. - sbb RAX,[RSI+32];
  260. - mov [RDI+32],RAX;
  261. - mov RAX,[RBX+40];
  262. - sbb RAX,[RSI+40];
  263. - mov [RDI+40],RAX;
  264. - mov RAX,[RBX+48];
  265. - sbb RAX,[RSI+48];
  266. - mov [RDI+48],RAX;
  267. - mov RAX,[RBX+56];
  268. - sbb RAX,[RSI+56];
  269. - mov [RDI+56],RAX;
  270. - sbb RAX,RAX;
  271. - neg RAX;
  272. - mov carry, RAX;
  273. - }
  274. - return carry;
  275. - } else version (D_InlineAsm_X86) {
  276. -
  277. - word* _z = z.ptr;
  278. - word* _x = cast(word*)x.ptr;
  279. - word* _y = cast(word*)y.ptr;
  280. - word* _carry = &carry;
  281. - asm {
  282. - mov EBX,_x;
  283. - mov ESI,_y;
  284. - mov ECX,_carry;
  285. - xor EAX,EAX;
  286. - sub EAX,[ECX]; //force CF=1 iff *carry==1
  287. - mov EDI,_z;
  288. - mov EAX,[EBX];
  289. - sbb EAX,[ESI];
  290. - mov [EDI],EAX;
  291. - mov EAX,[EBX+4];
  292. - sbb EAX,[ESI+4];
  293. - mov [EDI+4],EAX;
  294. - mov EAX,[EBX+8];
  295. - sbb EAX,[ESI+8];
  296. - mov [EDI+8],EAX;
  297. - mov EAX,[EBX+12];
  298. - sbb EAX,[ESI+12];
  299. - mov [EDI+12],EAX;
  300. - mov EAX,[EBX+16];
  301. - sbb EAX,[ESI+16];
  302. - mov [EDI+16],EAX;
  303. - mov EAX,[EBX+20];
  304. - sbb EAX,[ESI+20];
  305. - mov [EDI+20],EAX;
  306. - mov EAX,[EBX+24];
  307. - sbb EAX,[ESI+24];
  308. - mov [EDI+24],EAX;
  309. - mov EAX,[EBX+28];
  310. - sbb EAX,[ESI+28];
  311. - mov [EDI+28],EAX;
  312. - sbb EAX,EAX;
  313. - neg EAX;
  314. - mov carry, EAX;
  315. - }
  316. - return carry;
  317. - }
  318. - else {
  319. z[0] = word_sub(x[0], y[0], &carry);
  320. z[1] = word_sub(x[1], y[1], &carry);
  321. z[2] = word_sub(x[2], y[2], &carry);
  322. @@ -568,7 +284,6 @@
  323. z[6] = word_sub(x[6], y[6], &carry);
  324. z[7] = word_sub(x[7], y[7], &carry);
  325. return carry;
  326. - }
  327. }
  328.  
  329. /*
  330. @@ -576,85 +291,6 @@
  331. */
  332. word word8_linmul2(ref word[8] x, word y, word carry)
  333. {
  334. - version(D_InlineAsm_X86_64) {
  335. - word* _x = x.ptr;
  336. - word[8] ret;
  337. - word* _z = ret.ptr;
  338. - word* _carry = &carry;
  339. - asm pure nothrow @nogc {
  340. - mov RSI, _x;
  341. - mov RDI, _z;
  342. - mov RDX, _carry;
  343. - mov RCX, [RDX];
  344. -
  345. - mov RAX, [RSI];
  346. - mov RBX, y;
  347. - mul RBX;
  348. - add RAX, RCX;
  349. - adc RDX, 0;
  350. - mov RCX, RDX;
  351. - mov [RDI], RAX;
  352. -
  353. - mov RAX, [RSI+8];
  354. - mov RBX, y;
  355. - mul RBX;
  356. - add RAX, RCX;
  357. - adc RDX, 0;
  358. - mov RCX, RDX;
  359. - mov [RDI+8], RAX;
  360. -
  361. - mov RAX, [RSI+16];
  362. - mov RBX, y;
  363. - mul RBX;
  364. - add RAX, RCX;
  365. - adc RDX, 0;
  366. - mov RCX, RDX;
  367. - mov [RDI+16], RAX;
  368. -
  369. - mov RAX, [RSI+24];
  370. - mov RBX, y;
  371. - mul RBX;
  372. - add RAX, RCX;
  373. - adc RDX, 0;
  374. - mov RCX, RDX;
  375. - mov [RDI+24], RAX;
  376. -
  377. - mov RAX, [RSI+32];
  378. - mov RBX, y;
  379. - mul RBX;
  380. - add RAX, RCX;
  381. - adc RDX, 0;
  382. - mov RCX, RDX;
  383. - mov [RDI+32], RAX;
  384. -
  385. - mov RAX, [RSI+40];
  386. - mov RBX, y;
  387. - mul RBX;
  388. - add RAX, RCX;
  389. - adc RDX, 0;
  390. - mov RCX, RDX;
  391. - mov [RDI+40], RAX;
  392. -
  393. - mov RAX, [RSI+48];
  394. - mov RBX, y;
  395. - mul RBX;
  396. - add RAX, RCX;
  397. - adc RDX, 0;
  398. - mov RCX, RDX;
  399. - mov [RDI+48], RAX;
  400. -
  401. - mov RAX, [RSI+56];
  402. - mov RBX, y;
  403. - mul RBX;
  404. - add RAX, RCX;
  405. - adc RDX, 0;
  406. - mov carry, RDX;
  407. - mov [RDI+56], RAX;
  408. - }
  409. - x[0 .. 8] = ret[0 .. 8];
  410. - return carry;
  411. - }
  412. - else {
  413. x[0] = word_madd2(x[0], y, &carry);
  414. x[1] = word_madd2(x[1], y, &carry);
  415. x[2] = word_madd2(x[2], y, &carry);
  416. @@ -664,7 +300,6 @@
  417. x[6] = word_madd2(x[6], y, &carry);
  418. x[7] = word_madd2(x[7], y, &carry);
  419. return carry;
  420. - }
  421. }
  422.  
  423. /*
  424. @@ -672,85 +307,6 @@
  425. */
  426. word word8_linmul3(ref word[8] z, const ref word[8] x, word y, word carry)
  427. {
  428. -
  429. - version(D_InlineAsm_X86_64) {
  430. - word* _x = cast(word*)x.ptr;
  431. - word* _z = z.ptr;
  432. - word* _carry = &carry;
  433. - clearMem(_z, z.length);
  434. - asm pure nothrow @nogc {
  435. - mov RSI, _x;
  436. - mov RDI, _z;
  437. - mov RDX, _carry;
  438. - mov RCX, [RDX];
  439. -
  440. - mov RAX, [RSI];
  441. - mov RBX, y;
  442. - mul RBX;
  443. - add RAX, RCX;
  444. - adc RDX, 0;
  445. - mov RCX, RDX;
  446. - mov [RDI], RAX;
  447. -
  448. - mov RAX, [RSI+8];
  449. - mov RBX, y;
  450. - mul RBX;
  451. - add RAX, RCX;
  452. - adc RDX, 0;
  453. - mov RCX, RDX;
  454. - mov [RDI+8], RAX;
  455. -
  456. - mov RAX, [RSI+16];
  457. - mov RBX, y;
  458. - mul RBX;
  459. - add RAX, RCX;
  460. - adc RDX, 0;
  461. - mov RCX, RDX;
  462. - mov [RDI+16], RAX;
  463. -
  464. - mov RAX, [RSI+24];
  465. - mov RBX, y;
  466. - mul RBX;
  467. - add RAX, RCX;
  468. - adc RDX, 0;
  469. - mov RCX, RDX;
  470. - mov [RDI+24], RAX;
  471. -
  472. - mov RAX, [RSI+32];
  473. - mov RBX, y;
  474. - mul RBX;
  475. - add RAX, RCX;
  476. - adc RDX, 0;
  477. - mov RCX, RDX;
  478. - mov [RDI+32], RAX;
  479. -
  480. - mov RAX, [RSI+40];
  481. - mov RBX, y;
  482. - mul RBX;
  483. - add RAX, RCX;
  484. - adc RDX, 0;
  485. - mov RCX, RDX;
  486. - mov [RDI+40], RAX;
  487. -
  488. - mov RAX, [RSI+48];
  489. - mov RBX, y;
  490. - mul RBX;
  491. - add RAX, RCX;
  492. - adc RDX, 0;
  493. - mov RCX, RDX;
  494. - mov [RDI+48], RAX;
  495. -
  496. - mov RAX, [RSI+56];
  497. - mov RBX, y;
  498. - mul RBX;
  499. - add RAX, RCX;
  500. - adc RDX, 0;
  501. - mov carry, RDX;
  502. - mov [RDI+56], RAX;
  503. - }
  504. - return carry;
  505. - }
  506. - else {
  507. z[0] = word_madd2(x[0], y, &carry);
  508. z[1] = word_madd2(x[1], y, &carry);
  509. z[2] = word_madd2(x[2], y, &carry);
  510. @@ -760,7 +316,6 @@
  511. z[6] = word_madd2(x[6], y, &carry);
  512. z[7] = word_madd2(x[7], y, &carry);
  513. return carry;
  514. - }
  515. }
  516.  
  517. /*
  518. @@ -768,109 +323,6 @@
  519. */
  520. word word8_madd3(ref word[8] z, const ref word[8] x, word y, word carry)
  521. {
  522. - version(D_InlineAsm_X86_64) {
  523. - word* _x = cast(word*)x.ptr;
  524. - word* _z = z.ptr;
  525. - word* _carry = &carry;
  526. - word[8] ret; word* _z1 = ret.ptr;
  527. - asm pure nothrow @nogc {
  528. - mov R8, _x;
  529. - mov RSI, _z;
  530. - mov R10, y;
  531. - mov RDI, _z1;
  532. - mov RDX, _carry;
  533. - mov RCX, [RDX];
  534. -
  535. - mov RAX, [R8];
  536. - mov RBX, R10;
  537. - mul RBX;
  538. - add RAX, [RSI];
  539. - adc RDX, 0;
  540. - add RAX, RCX;
  541. - adc RDX, 0;
  542. - mov RCX, RDX;
  543. - mov [RDI], RAX;
  544. - add R8, 8;
  545. -
  546. - mov RAX, [R8];
  547. - mov RBX, R10;
  548. - mul RBX;
  549. - add RAX, [RSI+8];
  550. - adc RDX, 0;
  551. - add RAX, RCX;
  552. - adc RDX, 0;
  553. - mov RCX, RDX;
  554. - mov [RDI+8], RAX;
  555. - add R8, 8;
  556. -
  557. - mov RAX, [R8];
  558. - mov RBX, R10;
  559. - mul RBX;
  560. - add RAX, [RSI+16];
  561. - adc RDX, 0;
  562. - add RAX, RCX;
  563. - adc RDX, 0;
  564. - mov RCX, RDX;
  565. - mov [RDI+16], RAX;
  566. - add R8, 8;
  567. -
  568. - mov RAX, [R8];
  569. - mov RBX, R10;
  570. - mul RBX;
  571. - add RAX, [RSI+24];
  572. - adc RDX, 0;
  573. - add RAX, RCX;
  574. - adc RDX, 0;
  575. - mov RCX, RDX;
  576. - mov [RDI+24], RAX;
  577. - add R8, 8;
  578. -
  579. - mov RAX, [R8];
  580. - mov RBX, R10;
  581. - mul RBX;
  582. - add RAX, [RSI+32];
  583. - adc RDX, 0;
  584. - add RAX, RCX;
  585. - adc RDX, 0;
  586. - mov RCX, RDX;
  587. - mov [RDI+32], RAX;
  588. - add R8, 8;
  589. -
  590. - mov RAX, [R8];
  591. - mov RBX, R10;
  592. - mul RBX;
  593. - add RAX, [RSI+40];
  594. - adc RDX, 0;
  595. - add RAX, RCX;
  596. - adc RDX, 0;
  597. - mov RCX, RDX;
  598. - mov [RDI+40], RAX;
  599. - add R8, 8;
  600. -
  601. - mov RAX, [R8];
  602. - mov RBX, R10;
  603. - mul RBX;
  604. - add RAX, [RSI+48];
  605. - adc RDX, 0;
  606. - add RAX, RCX;
  607. - adc RDX, 0;
  608. - mov RCX, RDX;
  609. - mov [EDI+48], RAX;
  610. - add R8, 8;
  611. -
  612. - mov RAX, [R8];
  613. - mov RBX, R10;
  614. - mul RBX;
  615. - add RAX, [RSI+56];
  616. - adc RDX, 0;
  617. - add RAX, RCX;
  618. - adc RDX, 0;
  619. - mov carry, RDX;
  620. - mov [RDI+56], RAX;
  621. - }
  622. - z[0 .. 8] = ret[0..8];
  623. - return carry;
  624. - } else {
  625. z[0] = word_madd3(x[0], y, z[0], &carry);
  626. z[1] = word_madd3(x[1], y, z[1], &carry);
  627. z[2] = word_madd3(x[2], y, z[2], &carry);
  628. @@ -880,7 +332,6 @@
  629. z[6] = word_madd3(x[6], y, z[6], &carry);
  630. z[7] = word_madd3(x[7], y, z[7], &carry);
  631. return carry;
  632. - }
  633. }
  634.  
  635. /*
Add Comment
Please, Sign In to add comment