Advertisement
xiahanlu

Untitled

Jun 9th, 2018
171
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 35.69 KB | None | 0 0
  1. ;**************************************************************************************************
  2. ;CSurfaceCopy32.asm - simple vram copy rountine [RGB32 or RGB24]
  3. ;same as memcpy, but add pitch - offset and height [Y scanline]
  4. ;
  5. ;Strategy:
  6. ; Small size copy
  7. ; - easy, no care
  8. ; Source/target aligned or same address offset, num is align
  9. ; - Simple, aligned copy or/corrent offset, copy it
  10. ; Source/target aligned or same address offset, num is unalign
  11. ; - Simple, rep movsb it
  12. ; [
  13. ; Under certain conditions, Rep, movsd/movsb,
  14. ; Rep, and stdsd/stdsb copy speeds equal to movq/movaps in SSE/MMX
  15. ;
  16. ; % Move the entire cache line at once
  17. ; % source address and destination address are aligned by 8
  18. ; % increment is forward (clear direction sign)
  19. ; % counter (ECX) is greater than or equal to 64
  20. ; % The difference between *EDI and ESI is numerically greater than or equal to 32
  21. ; % source memory and destination memory must be write back or combination write mode
  22. ; ]
  23. ;
  24. ; - For misaligned addresses, we can always convert to\
  25. ; target alignment and the source is misaligned.
  26. ; Read the two MEM block, and use the palignr assembler instructions \
  27. ; [http://www.felixcloutier.com/x86/PALIGNR.html] \
  28. ; to fit the source operands aligned to the write, \
  29. ; for boundary access, #GP (0) exception may be generated. Please note - moecmks
  30. ;**************************************************************************************************
  31.  
  32. .386
  33. .mmx
  34. .xmm
  35. .model flat, c
  36.  
  37. ;; save stack frame [use default TEB chunk]
  38. ifdef __X86_32_
  39. STF_SAVE equ 0700h
  40. elseifdef __X86_WOW64_
  41. STF_SAVE equ 01ACh
  42. elseifdef __X86_64_
  43. STF_SAVE equ 02D0h
  44. else
  45. STF_SAVE equ 01ACh
  46. endif
  47. ;; for prg-code align
  48. ALIGN_Z equ align 16
  49.  
  50. .code
  51. CSurfaceCopy32 proc C
  52. option prologue:none, epilogue:none
  53.  
  54. ;; esp + 4 <- target pointer
  55. ;; esp + 8 <- target x
  56. ;; esp + 12<- target y
  57. ;; esp + 16<- target pitch
  58. ;; esp + 20<- source pointer
  59. ;; esp + 24<- source x
  60. ;; esp + 28<- source y
  61. ;; esp + 32<- source pitch
  62. ;; esp + 36<- copy's width
  63. ;; esp + 40<- copy's height
  64.  
  65. push esi
  66. push edi
  67. push ebx
  68. push ebp ;; save context register
  69. ; save esp
  70. assume fs:nothing
  71. mov fs:[STF_SAVE], esp
  72. cld
  73.  
  74. ;; reloc target start address
  75. mov edi, [esp+4 + 16] ;; edi <- target pointer
  76. mov ebx, [esp+8 + 16] ;; ebx <- target x
  77. mov ecx, [esp+12+ 16] ;; ecx <- target y
  78. mov edx, [esp+16+ 16] ;; edx <- target pitch
  79.  
  80. test edi, edi ;; null pointer ?
  81. jz V_EXIT
  82.  
  83. shl ebx, 2
  84. add edi, ebx
  85. imul ecx, edx
  86. add edi, ecx ;; edi <- target start pointer[save]
  87.  
  88. ;; reloc source start address
  89. mov esi, [esp+20+ 16] ;; esi <- source pointer
  90. mov ebx, [esp+24+ 16] ;; ebx <- source x
  91. mov ecx, [esp+28+ 16] ;; ecx <- source y
  92. mov ebp, [esp+32+ 16] ;; ebp <- source pitch
  93.  
  94. test esi, esi ;; null pointer ?
  95. jz V_EXIT
  96.  
  97. shl ebx, 2
  98. add esi, ebx
  99. imul ecx, ebp
  100. add esi, ecx ;; esi <- source start pointer[save]
  101.  
  102. ;; get width/height, pitch RVA.
  103. mov ecx, [esp+36 +16] ;; ecx <- width
  104. mov ebx, [esp+40 +16] ;; ebx <- height
  105.  
  106. test ecx, ecx ;; width is ZERO ?
  107. jz V_EXIT
  108.  
  109. test ebx, ebx ;; height is ZERO ?
  110. jz V_EXIT
  111.  
  112. lea eax, [ecx*4]
  113. nop ;; spare
  114. test edx, 15 ;; check pitch align 16??
  115. jne unAlignPitchWind
  116. test ebp, 15 ;; check pitch align 16??
  117. jne unAlignPitchWind
  118. sub edx, eax ;; edx <- target pitch RVA
  119. sub ebp, eax ;; ebp <- source pitch RVA
  120. mov esp, ecx
  121.  
  122. ;; register dispatch
  123. ;;
  124. ;; eax <- none
  125. ;; ebx <- height
  126. ;; ecx <- width
  127. ;; edx <- target pitch RVA
  128. ;; ebp <- source pitch RVA
  129. ;; edi <- target pointer
  130. ;; esi <- source pointer
  131. ;; esp <- width
  132.  
  133. ; small size ???
  134. ;---------------------------------------------------------------------------------------------
  135. cmp ecx, 31
  136. ja @F
  137. jmp dword ptr smallCopyRoutine[ecx*4]
  138.  
  139. ; check align
  140. ; ------------------------------------------------------------------------------
  141. ALIGN_Z
  142. @@:
  143. mov eax, edi
  144. mov esp, esi
  145. and eax, 1100b
  146. and esp, 1100b
  147. shr esp, 2 ;; DDSS
  148. or eax, esp
  149. mov esp, ecx
  150. jmp dword ptr chkAlignRoutine[eax*4]
  151. ; small size ???
  152. ;---------------------------------------------------------------------------------------------
  153. unAlignPitchWind:
  154. sub edx, eax ;; edx <- target pitch RVA
  155. sub ebp, eax ;; ebp <- source pitch RVA
  156. mov esp, ecx
  157. cmp ecx, 31
  158. ja @F
  159. jmp dword ptr smallCopyRoutine[ecx*4]
  160.  
  161. ; check align
  162. ; ------------------------------------------------------------------------------
  163. ALIGN_Z
  164. @@:
  165. mov esp, ecx
  166. jmp cAR_SXXX
  167. ALIGN_Z
  168. cAR_S0D0: ;; aligned 16 (TODO:SSE Unwind!!!)
  169. test ecx, 15
  170. jz cAR_A16_pure
  171. sub ecx, 16
  172. sub esp, 16
  173. ALIGN_Z
  174. cAR_A16_dirty:
  175. movdqa xmm0, [esi+000h]
  176. movdqa xmm1, [esi+010h]
  177. movdqa xmm2, [esi+020h]
  178. movdqa xmm3, [esi+030h]
  179.  
  180. ;; maybe use movntdqa/prefetch ??
  181. movdqa [edi+000h], xmm0
  182. movdqa [edi+010h], xmm1
  183. movdqa [edi+020h], xmm2
  184. movdqa [edi+030h], xmm3
  185.  
  186. add esi, 64
  187. add edi, 64
  188. sub ecx, 16
  189. jg cAR_A16_dirty
  190. add ecx, 16
  191. @@:
  192. mov eax, [esi]
  193. lea esi, [esi+4]
  194. mov [edi], eax
  195. dec ecx
  196. lea edi, [edi+4]
  197. jnz @B
  198. add esi, ebp
  199. add edi, edx
  200. mov ecx, esp
  201. dec ebx
  202. jnz cAR_A16_dirty
  203. jmp V_EXIT
  204.  
  205. ALIGN_Z
  206. cAR_A16_pure:
  207. movdqa xmm0, [esi+000h]
  208. movdqa xmm1, [esi+010h]
  209. movdqa xmm2, [esi+020h]
  210. movdqa xmm3, [esi+030h]
  211.  
  212. movdqa [edi+000h], xmm0
  213. movdqa [edi+010h], xmm1
  214. movdqa [edi+020h], xmm2
  215. movdqa [edi+030h], xmm3
  216.  
  217. add esi, 64
  218. add edi, 64
  219. sub ecx, 16
  220. jnz cAR_A16_pure
  221. add esi, ebp
  222. add edi, edx
  223. mov ecx, esp
  224. dec ebx
  225. jnz cAR_A16_pure
  226. jmp V_EXIT
  227.  
  228. ALIGN_Z
  229. cAR_S0D8: ;; aligned 8
  230. cAR_S8D0: ;; aligned 8
  231. cAR_S8D8: ;; aligned 8
  232. cAR_SXXX:
  233. test ecx, 7
  234. jz cAR_A8_pure
  235. sub ecx, 8
  236. sub esp, 8
  237. ALIGN_Z
  238. cAR_A8_dirty:
  239. movq mm0, [esi+000h]
  240. movq mm1, [esi+008h]
  241. movq mm2, [esi+010h]
  242. movq mm3, [esi+018h]
  243.  
  244. movq [edi+000h], mm0
  245. movq [edi+008h], mm1
  246. movq [edi+010h], mm2
  247. movq [edi+018h], mm3
  248.  
  249. add esi, 32
  250. add edi, 32
  251. sub ecx, 8
  252. jg cAR_A8_dirty
  253. add ecx, 8
  254. @@:
  255. mov eax, [esi]
  256. lea esi, [esi+4]
  257. mov [edi], eax
  258. dec ecx
  259. lea edi, [edi+4]
  260. jnz @B
  261. add esi, ebp
  262. add edi, edx
  263. mov ecx, esp
  264. dec ebx
  265. jnz cAR_A8_dirty
  266. emms
  267. jmp V_EXIT
  268.  
  269. ALIGN_Z
  270. cAR_A8_pure:
  271. movq mm0, [esi+000h]
  272. movq mm1, [esi+008h]
  273. movq mm2, [esi+010h]
  274. movq mm3, [esi+018h]
  275.  
  276. movq [edi+000h], mm0
  277. movq [edi+008h], mm1
  278. movq [edi+010h], mm2
  279. movq [edi+018h], mm3
  280.  
  281. add esi, 32
  282. add edi, 32
  283. sub ecx, 8
  284. jnz cAR_A8_pure
  285. add esi, ebp
  286. add edi, edx
  287. mov ecx, esp
  288. dec ebx
  289. jnz cAR_A8_pure
  290. emms
  291. jmp V_EXIT
  292.  
  293. ALIGN_Z
  294. cAR_S4D4: ;; offset aligned one
  295. cAR_SCDC: ;; offset aligned one
  296. cAR_S4DC: ;; offset aligned one
  297. cAR_SCD4: ;; offset aligned one
  298. lea eax, [esp]
  299. neg eax
  300. sub esp, 1
  301. test esp, 7
  302. lea esi, [esi+4]
  303. lea edi, [edi+4]
  304. lea edx, [edx+4]
  305. lea ebp, [ebp+4]
  306. mov ecx, esp
  307. jz cAR_A8cc_pure
  308. sub esp, 8
  309. mov ecx, esp
  310. ALIGN_Z
  311. cAR_A8cc_dirty:
  312. movq mm0, [esi+000h]
  313. movq mm1, [esi+008h]
  314. movq mm2, [esi+010h]
  315. movq mm3, [esi+018h]
  316.  
  317. movq [edi+000h], mm0
  318. movq [edi+008h], mm1
  319. movq [edi+010h], mm2
  320. movq [edi+018h], mm3
  321.  
  322. add esi, 32
  323. add edi, 32
  324. sub ecx, 8
  325. jg cAR_A8cc_dirty
  326. add ecx, 8
  327. @@:
  328. mov eax, [esi]
  329. lea esi, [esi+4]
  330. mov [edi], eax
  331. dec ecx
  332. lea edi, [edi+4]
  333. jnz @B
  334. lea eax, [esp+8+1]
  335. neg eax
  336. mov ecx, [esi+eax*4]
  337. add esi, ebp
  338. mov [edi+eax*4], ecx
  339. mov ecx, esp
  340. add edi, edx
  341. mov eax, eax ;; spare
  342. dec ebx
  343. jnz cAR_A8cc_dirty
  344. emms
  345. jmp V_EXIT
  346.  
  347. ALIGN_Z
  348. cAR_A8cc_pure:
  349. movq mm0, [esi+000h]
  350. movq mm1, [esi+008h]
  351. movq mm2, [esi+010h]
  352. movq mm3, [esi+018h]
  353.  
  354. movq [edi+000h], mm0
  355. movq [edi+008h], mm1
  356. movq [edi+010h], mm2
  357. movq [edi+018h], mm3
  358.  
  359. add esi, 32
  360. add edi, 32
  361. sub ecx, 8
  362. jnz cAR_A8cc_pure
  363. mov ecx, [esi+eax*4]
  364. add esi, ebp
  365. mov [edi+eax*4], ecx
  366. mov ecx, esp
  367. add edi, edx
  368. mov eax, eax
  369. dec ebx
  370. jnz cAR_A8cc_pure
  371. emms
  372. jmp V_EXIT
  373.  
  374. ALIGN_Z
  375. cAR_S4D0: ;; ========================= target aligned 16- SYS-4 level %
  376. movdqa xmm0, [esi-04H]
  377. test ecx, 15
  378. jz cAR_S4D0_pure
  379. sub ecx, 16
  380. sub esp, 16
  381. ALIGN_Z
  382. cAR_S4D0_dirty:
  383. movdqa xmm1, [esi-04H+010h]
  384. movdqa xmm2, [esi-04H+020h]
  385. movdqa xmm3, [esi-04H+030h]
  386. movdqa xmm4, [esi-04H+040h]
  387.  
  388. movdqa xmm7, xmm4
  389. palignr xmm4, xmm3, 4 ;; fit, <-xmm4 <-target[3]
  390. palignr xmm3, xmm2, 4 ;; fit, <-xmm3 <-target[2]
  391. palignr xmm2, xmm1, 4 ;; fit, <-xmm2 <-target[1]
  392. palignr xmm1, xmm0, 4 ;; fit, <-xmm1 <-target[0]
  393. movdqa xmm0, xmm7
  394.  
  395. ;; write back buffer
  396. movdqa [edi+000h], xmm1
  397. movdqa [edi+010h], xmm2
  398. movdqa [edi+020h], xmm3
  399. movdqa [edi+030h], xmm4
  400.  
  401. add esi, 64
  402. add edi, 64
  403. sub ecx, 16
  404. jg cAR_S4D0_dirty
  405. add ecx, 16
  406. @@:
  407. mov eax, [esi]
  408. lea esi, [esi+4]
  409. mov [edi], eax
  410. dec ecx
  411. lea edi, [edi+4]
  412. jnz @B
  413. add esi, ebp
  414. add edi, edx
  415. mov ecx, esp
  416. dec ebx
  417. movdqa xmm0, [esi-04H]
  418. jnz cAR_S4D0_dirty
  419. jmp V_EXIT
  420.  
  421. ;--------------------------------------------------------------------
  422. ALIGN_Z
  423. cAR_S4D0_pure:
  424. movdqa xmm1, [esi-04H+010h]
  425. movdqa xmm2, [esi-04H+020h]
  426. movdqa xmm3, [esi-04H+030h]
  427. movdqa xmm4, [esi-04H+040h]
  428.  
  429. movdqa xmm7, xmm4
  430. palignr xmm4, xmm3, 4 ;; fit, <-xmm4 <-target[3]
  431. palignr xmm3, xmm2, 4 ;; fit, <-xmm3 <-target[2]
  432. palignr xmm2, xmm1, 4 ;; fit, <-xmm2 <-target[1]
  433. palignr xmm1, xmm0, 4 ;; fit, <-xmm1 <-target[0]
  434. movdqa xmm0, xmm7
  435.  
  436. ;; write back buffer
  437. movdqa [edi+000h], xmm1
  438. movdqa [edi+010h], xmm2
  439. movdqa [edi+020h], xmm3
  440. movdqa [edi+030h], xmm4
  441.  
  442. add esi, 64
  443. add edi, 64
  444. sub ecx, 16
  445. jnz cAR_S4D0_pure
  446. add esi, ebp
  447. add edi, edx
  448. mov ecx, esp
  449. dec ebx
  450. movdqa xmm0, [esi-04H]
  451. jnz cAR_S4D0_pure
  452. jmp V_EXIT
  453.  
  454. ALIGN_Z
  455. cAR_S0DC: ;; unaligned ================================== SYS-4 level 1 BUG
  456.  
  457. movdqa xmm0, [esi]
  458. lea eax, [esp]
  459. neg eax
  460. sub esp, 1
  461. test esp, 15
  462. lea esi, [esi+4]
  463. lea edi, [edi+4]
  464. lea edx, [edx+4]
  465. lea ebp, [ebp+4]
  466. mov ecx, esp
  467. jz cAR_S0DC_pure
  468. sub esp, 16
  469. mov ecx, esp
  470. ALIGN_Z
  471. cAR_S0DC_dirty:
  472. movdqa xmm1, [esi-04H+010h]
  473. movdqa xmm2, [esi-04H+020h]
  474. movdqa xmm3, [esi-04H+030h]
  475. movdqa xmm4, [esi-04H+040h]
  476.  
  477. movdqa xmm7, xmm4
  478. palignr xmm4, xmm3, 4 ;; fit, <-xmm4 <-target[3]
  479. palignr xmm3, xmm2, 4 ;; fit, <-xmm3 <-target[2]
  480. palignr xmm2, xmm1, 4 ;; fit, <-xmm2 <-target[1]
  481. palignr xmm1, xmm0, 4 ;; fit, <-xmm1 <-target[0]
  482. movdqa xmm0, xmm7
  483.  
  484. ;; write back buffer
  485. movdqa [edi+000h], xmm1
  486. movdqa [edi+010h], xmm2
  487. movdqa [edi+020h], xmm3
  488. movdqa [edi+030h], xmm4
  489.  
  490. add esi, 64
  491. add edi, 64
  492. sub ecx, 16
  493. jg cAR_S0DC_dirty
  494. add ecx, 16
  495. @@:
  496. mov eax, [esi]
  497. lea esi, [esi+4]
  498. mov [edi], eax
  499. dec ecx
  500. lea edi, [edi+4]
  501. jnz @B
  502. lea eax, [esp+16+1]
  503. neg eax
  504. mov ecx, [esi+eax*4]
  505. add esi, ebp
  506. mov [edi+eax*4], ecx
  507. mov ecx, esp
  508. add edi, edx
  509. mov eax, eax ;; spare
  510. dec ebx
  511. movdqa xmm0, [esi-04H]
  512. jnz cAR_S0DC_dirty
  513. jmp V_EXIT
  514.  
  515. cAR_S0DC_pure:
  516. movdqa xmm1, [esi-04H+010h]
  517. movdqa xmm2, [esi-04H+020h]
  518. movdqa xmm3, [esi-04H+030h]
  519. movdqa xmm4, [esi-04H+040h]
  520.  
  521. movdqa xmm7, xmm4
  522. palignr xmm4, xmm3, 4 ;; fit, <-xmm4 <-target[3]
  523. palignr xmm3, xmm2, 4 ;; fit, <-xmm3 <-target[2]
  524. palignr xmm2, xmm1, 4 ;; fit, <-xmm2 <-target[1]
  525. palignr xmm1, xmm0, 4 ;; fit, <-xmm1 <-target[0]
  526. movdqa xmm0, xmm7
  527.  
  528. ;; write back buffer
  529. movdqa [edi+000h], xmm1
  530. movdqa [edi+010h], xmm2
  531. movdqa [edi+020h], xmm3
  532. movdqa [edi+030h], xmm4
  533.  
  534. add esi, 64
  535. add edi, 64
  536. sub ecx, 16
  537. jnz cAR_S0DC_pure
  538. mov ecx, [esi+eax*4]
  539. add esi, ebp
  540. mov [edi+eax*4], ecx
  541. mov ecx, esp
  542. add edi, edx
  543. mov eax, eax ;; spare
  544. dec ebx
  545. movdqa xmm0, [esi-04H]
  546. jnz cAR_S0DC_pure
  547. jmp V_EXIT
  548.  
  549. cAR_SCD8: ;; unaligned ================================== SYS-4 level 2
  550.  
  551. movdqa xmm0, [esi+4h]
  552. lea eax, [esp]
  553. neg eax
  554. sub esp, 2
  555. test esp, 15
  556. lea esi, [esi+8]
  557. lea edi, [edi+8]
  558. lea edx, [edx+8]
  559. lea ebp, [ebp+8]
  560. mov ecx, esp
  561. jz cAR_SCD8_pure
  562. sub esp, 16
  563. mov ecx, esp
  564. ALIGN_Z
  565. cAR_SCD8_dirty:
  566. movdqa xmm1, [esi-04H+010h]
  567. movdqa xmm2, [esi-04H+020h]
  568. movdqa xmm3, [esi-04H+030h]
  569. movdqa xmm4, [esi-04H+040h]
  570.  
  571. movdqa xmm7, xmm4
  572. palignr xmm4, xmm3, 4 ;; fit, <-xmm4 <-target[3]
  573. palignr xmm3, xmm2, 4 ;; fit, <-xmm3 <-target[2]
  574. palignr xmm2, xmm1, 4 ;; fit, <-xmm2 <-target[1]
  575. palignr xmm1, xmm0, 4 ;; fit, <-xmm1 <-target[0]
  576. movdqa xmm0, xmm7
  577.  
  578. ;; write back buffer
  579. movdqa [edi+000h], xmm1
  580. movdqa [edi+010h], xmm2
  581. movdqa [edi+020h], xmm3
  582. movdqa [edi+030h], xmm4
  583.  
  584. add esi, 64
  585. add edi, 64
  586. sub ecx, 16
  587. jg cAR_SCD8_dirty
  588. add ecx, 16
  589. @@:
  590. mov eax, [esi]
  591. lea esi, [esi+4]
  592. mov [edi], eax
  593. dec ecx
  594. lea edi, [edi+4]
  595. jnz @B
  596. lea eax, [esp+16+2]
  597. neg eax
  598. mov ecx, [esi+eax*4]
  599. mov [edi+eax*4], ecx
  600. mov ecx, [esi+eax*4+4]
  601. mov [edi+eax*4+4], ecx
  602. add esi, ebp
  603. mov ecx, esp
  604. add edi, edx
  605. mov eax, eax ;; spare
  606. dec ebx
  607. movdqa xmm0, [esi-04H]
  608. jnz cAR_SCD8_dirty
  609. jmp V_EXIT
  610.  
  611. cAR_SCD8_pure:
  612. movdqa xmm1, [esi-04H+010h]
  613. movdqa xmm2, [esi-04H+020h]
  614. movdqa xmm3, [esi-04H+030h]
  615. movdqa xmm4, [esi-04H+040h]
  616.  
  617. movdqa xmm7, xmm4
  618. palignr xmm4, xmm3, 4 ;; fit, <-xmm4 <-target[3]
  619. palignr xmm3, xmm2, 4 ;; fit, <-xmm3 <-target[2]
  620. palignr xmm2, xmm1, 4 ;; fit, <-xmm2 <-target[1]
  621. palignr xmm1, xmm0, 4 ;; fit, <-xmm1 <-target[0]
  622. movdqa xmm0, xmm7
  623.  
  624. ;; write back buffer
  625. movdqa [edi+000h], xmm1
  626. movdqa [edi+010h], xmm2
  627. movdqa [edi+020h], xmm3
  628. movdqa [edi+030h], xmm4
  629.  
  630. add esi, 64
  631. add edi, 64
  632. sub ecx, 16
  633. jnz cAR_SCD8_pure
  634. mov ecx, [esi+eax*4]
  635. mov [edi+eax*4], ecx
  636. mov ecx, [esi+eax*4+4]
  637. mov [edi+eax*4+4], ecx
  638. add esi, ebp
  639. mov ecx, esp
  640. add edi, edx
  641. mov eax, eax ;; spare
  642. dec ebx
  643. movdqa xmm0, [esi-04H]
  644. jnz cAR_SCD8_pure
  645. jmp V_EXIT
  646.  
  647.  
  648. cAR_S8D4: ;; unaligned ================================== SYS-4 level 3
  649. movdqa xmm0, [esi+8h]
  650. lea eax, [esp]
  651. neg eax
  652. sub esp, 3
  653. test esp, 15
  654. lea esi, [esi+12]
  655. lea edi, [edi+12]
  656. lea edx, [edx+12]
  657. lea ebp, [ebp+12]
  658. mov ecx, esp
  659. jz cAR_S8D4_pure
  660. sub esp, 16
  661. mov ecx, esp
  662. ALIGN_Z
  663. cAR_S8D4_dirty:
  664. movdqa xmm1, [esi-04H+010h]
  665. movdqa xmm2, [esi-04H+020h]
  666. movdqa xmm3, [esi-04H+030h]
  667. movdqa xmm4, [esi-04H+040h]
  668.  
  669. movdqa xmm7, xmm4
  670. palignr xmm4, xmm3, 4 ;; fit, <-xmm4 <-target[3]
  671. palignr xmm3, xmm2, 4 ;; fit, <-xmm3 <-target[2]
  672. palignr xmm2, xmm1, 4 ;; fit, <-xmm2 <-target[1]
  673. palignr xmm1, xmm0, 4 ;; fit, <-xmm1 <-target[0]
  674. movdqa xmm0, xmm7
  675.  
  676. ;; write back buffer
  677. movdqa [edi+000h], xmm1
  678. movdqa [edi+010h], xmm2
  679. movdqa [edi+020h], xmm3
  680. movdqa [edi+030h], xmm4
  681.  
  682. add esi, 64
  683. add edi, 64
  684. sub ecx, 16
  685. jg cAR_S8D4_dirty
  686. add ecx, 16
  687. @@:
  688. mov eax, [esi]
  689. lea esi, [esi+4]
  690. mov [edi], eax
  691. dec ecx
  692. lea edi, [edi+4]
  693. jnz @B
  694. lea eax, [esp+16+3]
  695. neg eax
  696. mov ecx, [esi+eax*4]
  697. mov [edi+eax*4], ecx
  698. mov ecx, [esi+eax*4+4]
  699. mov [edi+eax*4+4], ecx
  700. mov ecx, [esi+eax*4+8]
  701. mov [edi+eax*4+8], ecx
  702. add esi, ebp
  703. mov ecx, esp
  704. add edi, edx
  705. mov eax, eax ;; spare
  706. dec ebx
  707. movdqa xmm0, [esi-04H]
  708. jnz cAR_S8D4_dirty
  709. jmp V_EXIT
  710.  
  711. cAR_S8D4_pure:
  712. movdqa xmm1, [esi-04H+010h]
  713. movdqa xmm2, [esi-04H+020h]
  714. movdqa xmm3, [esi-04H+030h]
  715. movdqa xmm4, [esi-04H+040h]
  716.  
  717. movdqa xmm7, xmm4
  718. palignr xmm4, xmm3, 4 ;; fit, <-xmm4 <-target[3]
  719. palignr xmm3, xmm2, 4 ;; fit, <-xmm3 <-target[2]
  720. palignr xmm2, xmm1, 4 ;; fit, <-xmm2 <-target[1]
  721. palignr xmm1, xmm0, 4 ;; fit, <-xmm1 <-target[0]
  722. movdqa xmm0, xmm7
  723.  
  724. ;; write back buffer
  725. movdqa [edi+000h], xmm1
  726. movdqa [edi+010h], xmm2
  727. movdqa [edi+020h], xmm3
  728. movdqa [edi+030h], xmm4
  729.  
  730. add esi, 64
  731. add edi, 64
  732. sub ecx, 16
  733. jnz cAR_S8D4_pure
  734. mov ecx, [esi+eax*4]
  735. mov [edi+eax*4], ecx
  736. mov ecx, [esi+eax*4+4]
  737. mov [edi+eax*4+4], ecx
  738. mov ecx, [esi+eax*4+8]
  739. mov [edi+eax*4+8], ecx
  740. add esi, ebp
  741. mov ecx, esp
  742. add edi, edx
  743. mov eax, eax ;; spare
  744. dec ebx
  745. movdqa xmm0, [esi-04H]
  746. jnz cAR_S8D4_pure
  747. jmp V_EXIT
  748.  
  749.  
  750. cAR_SCD0: ;; target aligned 16 ========================== SYS-C level %
  751. movdqa xmm0, [esi-0CH]
  752. test ecx, 15
  753. jz cAR_SCD0_pure
  754. sub ecx, 16
  755. mov esp, ecx
  756. ALIGN_Z
  757. cAR_SCD0_dirty:
  758. movdqa xmm1, [esi-0CH+010h]
  759. movdqa xmm2, [esi-0CH+020h]
  760. movdqa xmm3, [esi-0CH+030h]
  761. movdqa xmm4, [esi-0CH+040h]
  762.  
  763. movdqa xmm7, xmm4
  764. palignr xmm4, xmm3, 12 ;; fit, <-xmm4 <-target[3]
  765. palignr xmm3, xmm2, 12 ;; fit, <-xmm3 <-target[2]
  766. palignr xmm2, xmm1, 12 ;; fit, <-xmm2 <-target[1]
  767. palignr xmm1, xmm0, 12 ;; fit, <-xmm1 <-target[0]
  768. movdqa xmm0, xmm7
  769.  
  770. ;; write back buffer
  771. movdqa [edi+000h], xmm1
  772. movdqa [edi+010h], xmm2
  773. movdqa [edi+020h], xmm3
  774. movdqa [edi+030h], xmm4
  775.  
  776. add esi, 64
  777. add edi, 64
  778. sub ecx, 16
  779. jg cAR_SCD0_dirty
  780. add ecx, 16
  781. @@:
  782. mov eax, [esi]
  783. lea esi, [esi+4]
  784. mov [edi], eax
  785. dec ecx
  786. lea edi, [edi+4]
  787. jnz @B
  788. add esi, ebp
  789. add edi, edx
  790. mov ecx, esp
  791. dec ebx
  792. movdqa xmm0, [esi-0CH]
  793. jnz cAR_SCD0_dirty
  794. jmp V_EXIT
  795. ;--------------------------------------------------------------------
  796. ALIGN_Z
  797. cAR_SCD0_pure:
  798. movdqa xmm1, [esi-0CH+010h]
  799. movdqa xmm2, [esi-0CH+020h]
  800. movdqa xmm3, [esi-0CH+030h]
  801. movdqa xmm4, [esi-0CH+040h]
  802.  
  803. movdqa xmm7, xmm4
  804. palignr xmm4, xmm3, 12 ;; fit, <-xmm4 <-target[3]
  805. palignr xmm3, xmm2, 12 ;; fit, <-xmm3 <-target[2]
  806. palignr xmm2, xmm1, 12 ;; fit, <-xmm2 <-target[1]
  807. palignr xmm1, xmm0, 12 ;; fit, <-xmm1 <-target[0]
  808. movdqa xmm0, xmm7
  809.  
  810. ;; write back buffer
  811. movdqa [edi+000h], xmm1
  812. movdqa [edi+010h], xmm2
  813. movdqa [edi+020h], xmm3
  814. movdqa [edi+030h], xmm4
  815.  
  816. add esi, 64
  817. add edi, 64
  818. sub ecx, 16
  819. jnz cAR_SCD0_pure
  820. add esi, ebp
  821. add edi, edx
  822. mov ecx, esp
  823. dec ebx
  824. movdqa xmm0, [esi-0CH]
  825. jnz cAR_SCD0_pure
  826. jmp V_EXIT
  827.  
  828.  
  829. cAR_S8DC: ;; unaligned ================================== SYS-C level 1 BUG
  830. movdqa xmm0, [esi-08h]
  831. lea eax, [esp]
  832. neg eax
  833. sub esp, 1
  834. test esp, 15
  835. lea esi, [esi+4]
  836. lea edi, [edi+4]
  837. lea edx, [edx+4]
  838. lea ebp, [ebp+4]
  839. mov ecx, esp
  840. jz cAR_S8DC_pure
  841. sub esp, 16
  842. mov ecx, esp
  843. ALIGN_Z
  844. cAR_S8DC_dirty:
  845. movdqa xmm1, [esi-0CH+010h]
  846. movdqa xmm2, [esi-0CH+020h]
  847. movdqa xmm3, [esi-0CH+030h]
  848. movdqa xmm4, [esi-0CH+040h]
  849.  
  850. movdqa xmm7, xmm4
  851. palignr xmm4, xmm3, 12 ;; fit, <-xmm4 <-target[3]
  852. palignr xmm3, xmm2, 12 ;; fit, <-xmm3 <-target[2]
  853. palignr xmm2, xmm1, 12 ;; fit, <-xmm2 <-target[1]
  854. palignr xmm1, xmm0, 12 ;; fit, <-xmm1 <-target[0]
  855. movdqa xmm0, xmm7
  856.  
  857. ;; write back buffer
  858. movdqa [edi+000h], xmm1
  859. movdqa [edi+010h], xmm2
  860. movdqa [edi+020h], xmm3
  861. movdqa [edi+030h], xmm4
  862.  
  863. add esi, 64
  864. add edi, 64
  865. sub ecx, 16
  866. jg cAR_S8DC_dirty
  867. add ecx, 16
  868. @@:
  869. mov eax, [esi]
  870. lea esi, [esi+4]
  871. mov [edi], eax
  872. dec ecx
  873. lea edi, [edi+4]
  874. jnz @B
  875. lea eax, [esp+16+1]
  876. neg eax
  877. mov ecx, [esi+eax*4]
  878. add esi, ebp
  879. mov [edi+eax*4], ecx
  880. mov ecx, esp
  881. add edi, edx
  882. mov eax, eax ;; spare
  883. dec ebx
  884. movdqa xmm0, [esi-0CH]
  885. jnz cAR_S8DC_dirty
  886. jmp V_EXIT
  887.  
  888. cAR_S8DC_pure:
  889. movdqa xmm1, [esi-0CH+010h]
  890. movdqa xmm2, [esi-0CH+020h]
  891. movdqa xmm3, [esi-0CH+030h]
  892. movdqa xmm4, [esi-0CH+040h]
  893.  
  894. movdqa xmm7, xmm4
  895. palignr xmm4, xmm3, 12 ;; fit, <-xmm4 <-target[3]
  896. palignr xmm3, xmm2, 12 ;; fit, <-xmm3 <-target[2]
  897. palignr xmm2, xmm1, 12 ;; fit, <-xmm2 <-target[1]
  898. palignr xmm1, xmm0, 12 ;; fit, <-xmm1 <-target[0]
  899. movdqa xmm0, xmm7
  900.  
  901. ;; write back buffer
  902. movdqa [edi+000h], xmm1
  903. movdqa [edi+010h], xmm2
  904. movdqa [edi+020h], xmm3
  905. movdqa [edi+030h], xmm4
  906.  
  907. add esi, 64
  908. add edi, 64
  909. sub ecx, 16
  910. jnz cAR_S8DC_pure
  911. mov ecx, [esi+eax*4]
  912. add esi, ebp
  913. mov [edi+eax*4], ecx
  914. mov ecx, esp
  915. add edi, edx
  916. mov eax, eax ;; spare
  917. dec ebx
  918. movdqa xmm0, [esi-0CH]
  919. jnz cAR_S8DC_pure
  920. jmp V_EXIT
  921.  
  922. cAR_S4D8: ;; unaligned ================================== SYS-C level 2
  923.  
  924. movdqa xmm0, [esi-04H]
  925. lea eax, [esp]
  926. neg eax
  927. sub esp, 2
  928. test esp, 15
  929. lea esi, [esi+8]
  930. lea edi, [edi+8]
  931. lea ebp, [ebp+8]
  932. lea edx, [edx+8]
  933. mov ecx, esp
  934. jz cAR_S4D8_pure
  935. sub esp, 16
  936. mov ecx, esp
  937. ALIGN_Z
  938. cAR_S4D8_dirty:
  939. movdqa xmm1, [esi-0CH+010h]
  940. movdqa xmm2, [esi-0CH+020h]
  941. movdqa xmm3, [esi-0CH+030h]
  942. movdqa xmm4, [esi-0CH+040h]
  943.  
  944. movdqa xmm7, xmm4
  945. palignr xmm4, xmm3, 12 ;; fit, <-xmm4 <-target[3]
  946. palignr xmm3, xmm2, 12 ;; fit, <-xmm3 <-target[2]
  947. palignr xmm2, xmm1, 12 ;; fit, <-xmm2 <-target[1]
  948. palignr xmm1, xmm0, 12 ;; fit, <-xmm1 <-target[0]
  949. movdqa xmm0, xmm7
  950.  
  951. ;; write back buffer
  952. movdqa [edi+000h], xmm1
  953. movdqa [edi+010h], xmm2
  954. movdqa [edi+020h], xmm3
  955. movdqa [edi+030h], xmm4
  956.  
  957. add esi, 64
  958. add edi, 64
  959. sub ecx, 16
  960. jg cAR_S4D8_dirty
  961. add ecx, 16
  962. @@:
  963. mov eax, [esi]
  964. lea esi, [esi+4]
  965. mov [edi], eax
  966. dec ecx
  967. lea edi, [edi+4]
  968. jnz @B
  969. lea eax, [esp+16+2]
  970. neg eax
  971. mov ecx, [esi+eax*4]
  972. mov [edi+eax*4], ecx
  973. mov ecx, [esi+eax*4+4]
  974. mov [edi+eax*4+4], ecx
  975. add esi, ebp
  976. mov ecx, esp
  977. add edi, edx
  978. dec ebx
  979. movdqa xmm0, [esi-0CH]
  980. jnz cAR_S4D8_dirty
  981. jmp V_EXIT
  982.  
  983. cAR_S4D8_pure:
  984. movdqa xmm1, [esi-0CH+010h]
  985. movdqa xmm2, [esi-0CH+020h]
  986. movdqa xmm3, [esi-0CH+030h]
  987. movdqa xmm4, [esi-0CH+040h]
  988.  
  989. movdqa xmm7, xmm4
  990. palignr xmm4, xmm3, 12 ;; fit, <-xmm4 <-target[3]
  991. palignr xmm3, xmm2, 12 ;; fit, <-xmm3 <-target[2]
  992. palignr xmm2, xmm1, 12 ;; fit, <-xmm2 <-target[1]
  993. palignr xmm1, xmm0, 12 ;; fit, <-xmm1 <-target[0]
  994. movdqa xmm0, xmm7
  995.  
  996. ;; write back buffer
  997. movdqa [edi+000h], xmm1
  998. movdqa [edi+010h], xmm2
  999. movdqa [edi+020h], xmm3
  1000. movdqa [edi+030h], xmm4
  1001.  
  1002. add esi, 64
  1003. add edi, 64
  1004. sub ecx, 16
  1005. jnz cAR_S4D8_pure
  1006. mov ecx, [esi+eax*4]
  1007. mov [edi+eax*4], ecx
  1008. mov ecx, [esi+eax*4+4]
  1009. mov [edi+eax*4+4], ecx
  1010. add esi, ebp
  1011. mov ecx, esp
  1012. add edi, edx
  1013. dec ebx
  1014. movdqa xmm0, [esi-0CH]
  1015. jnz cAR_S4D8_pure
  1016. jmp V_EXIT
  1017.  
  1018. cAR_S0D4: ;; unaligned ================================== SYS-C level 3
  1019.  
  1020. movdqa xmm0, [esi]
  1021. lea eax, [esp]
  1022. neg eax
  1023. sub esp, 3
  1024. test esp, 15
  1025. lea esi, [esi+12]
  1026. lea edi, [edi+12]
  1027. lea ebp, [ebp+12]
  1028. lea edx, [edx+12]
  1029. mov ecx, esp
  1030. jz cAR_S0D4_pure
  1031. sub esp, 16
  1032. mov ecx, esp
  1033. ALIGN_Z
  1034. cAR_S0D4_dirty:
  1035. movdqa xmm1, [esi-0CH+010h]
  1036. movdqa xmm2, [esi-0CH+020h]
  1037. movdqa xmm3, [esi-0CH+030h]
  1038. movdqa xmm4, [esi-0CH+040h]
  1039.  
  1040. movdqa xmm7, xmm4
  1041. palignr xmm4, xmm3, 12 ;; fit, <-xmm4 <-target[3]
  1042. palignr xmm3, xmm2, 12 ;; fit, <-xmm3 <-target[2]
  1043. palignr xmm2, xmm1, 12 ;; fit, <-xmm2 <-target[1]
  1044. palignr xmm1, xmm0, 12 ;; fit, <-xmm1 <-target[0]
  1045. movdqa xmm0, xmm7
  1046.  
  1047. ;; write back buffer
  1048. movdqa [edi+000h], xmm1
  1049. movdqa [edi+010h], xmm2
  1050. movdqa [edi+020h], xmm3
  1051. movdqa [edi+030h], xmm4
  1052.  
  1053. add esi, 64
  1054. add edi, 64
  1055. sub ecx, 16
  1056. jg cAR_S0D4_dirty
  1057. add ecx, 16
  1058. @@:
  1059. mov eax, [esi]
  1060. lea esi, [esi+4]
  1061. mov [edi], eax
  1062. dec ecx
  1063. lea edi, [edi+4]
  1064. jnz @B
  1065. lea eax, [esp+16+3]
  1066. neg eax
  1067. mov ecx, [esi+eax*4]
  1068. mov [edi+eax*4], ecx
  1069. mov ecx, [esi+eax*4+4]
  1070. mov [edi+eax*4+4], ecx
  1071. mov ecx, [esi+eax*4+8]
  1072. mov [edi+eax*4+8], ecx
  1073. add esi, ebp
  1074. mov ecx, esp
  1075. add edi, edx
  1076. dec ebx
  1077. movdqa xmm0, [esi-0CH]
  1078. jnz cAR_S0D4_dirty
  1079. jmp V_EXIT
  1080.  
  1081. cAR_S0D4_pure:
  1082. movdqa xmm1, [esi-0CH+010h]
  1083. movdqa xmm2, [esi-0CH+020h]
  1084. movdqa xmm3, [esi-0CH+030h]
  1085. movdqa xmm4, [esi-0CH+040h]
  1086.  
  1087. movdqa xmm7, xmm4
  1088. palignr xmm4, xmm3, 12 ;; fit, <-xmm4 <-target[3]
  1089. palignr xmm3, xmm2, 12 ;; fit, <-xmm3 <-target[2]
  1090. palignr xmm2, xmm1, 12 ;; fit, <-xmm2 <-target[1]
  1091. palignr xmm1, xmm0, 12 ;; fit, <-xmm1 <-target[0]
  1092. movdqa xmm0, xmm7
  1093.  
  1094. ;; write back buffer
  1095. movdqa [edi+000h], xmm1
  1096. movdqa [edi+010h], xmm2
  1097. movdqa [edi+020h], xmm3
  1098. movdqa [edi+030h], xmm4
  1099.  
  1100. add esi, 64
  1101. add edi, 64
  1102. sub ecx, 16
  1103. jnz cAR_S0D4_pure
  1104. mov ecx, [esi+eax*4]
  1105. mov [edi+eax*4], ecx
  1106. mov ecx, [esi+eax*4+4]
  1107. mov [edi+eax*4+4], ecx
  1108. mov ecx, [esi+eax*4+8]
  1109. mov [edi+eax*4+8], ecx
  1110. add esi, ebp
  1111. mov ecx, esp
  1112. add edi, edx
  1113. dec ebx
  1114. movdqa xmm0, [esi-0CH]
  1115. jnz cAR_S0D4_pure
  1116. jmp V_EXIT
  1117.  
  1118. ALIGN_Z
  1119. V_EXIT:
  1120. mov esp, fs:[STF_SAVE]
  1121. pop ebp
  1122. pop ebx
  1123. pop edi
  1124. pop esi
  1125. ret
  1126.  
  1127. ALIGN_Z
  1128. ;---------------------------------------------------------------------------------------------
  1129. chkAlignRoutine dd cAR_S0D0, cAR_S4D0, cAR_S8D0, cAR_SCD0 ;; 00xx
  1130. dd cAR_S0D4, cAR_S4D4, cAR_S8D4, cAR_SCD4 ;; 01xx
  1131. dd cAR_S0D8, cAR_S4D8, cAR_S8D8, cAR_SCD8 ;; 10xx
  1132. dd cAR_S0DC, cAR_S4DC, cAR_S8DC, cAR_SCDC ;; 11xx
  1133. chkUnAlignRoutine dd cAR_S0D0, cAR_S4D0, cAR_S8D0, cAR_SCD0 ;; 00xx
  1134. dd cAR_S0D4, cAR_S4D4, cAR_S8D4, cAR_SCD4 ;; 01xx
  1135. dd cAR_S0D8, cAR_S4D8, cAR_S8D8, cAR_SCD8 ;; 10xx
  1136. dd cAR_S0DC, cAR_S4DC, cAR_S8DC, cAR_SCDC ;; 11xx
  1137. ;---------------------------------------------------------------------------------------------
  1138. ; smallCopyRoutine MACRO
  1139. ;---------------------------------------------------------------------------------------------
  1140. sCR_start macro s
  1141. ALIGN_Z
  1142. sCR&s&:
  1143.  
  1144. endm
  1145.  
  1146. sCR_end macro s
  1147. lea esi, [esi+ebp+0&s&h*4]
  1148. lea edi, [edi+edx+0&s&h*4]
  1149. dec ebx
  1150. jnz sCR&s&
  1151. jmp V_EXIT
  1152.  
  1153. endm
  1154. ; ----------------------------------------------------
  1155. ; step
  1156. ; ----------------------------------------------------
  1157. sCRc1 macro s
  1158. mov eax, [esi+s]
  1159. mov [edi+s], eax
  1160.  
  1161. endm
  1162.  
  1163. sCRc2 macro s
  1164. mov eax, [esi+s]
  1165. mov ecx, [esi+s+4]
  1166. mov [edi+s], eax
  1167. mov [edi+s+4],ecx
  1168. endm
  1169.  
  1170. sCRc4 macro s
  1171. sCRc2 s
  1172. sCRc2 s + 08h
  1173. endm
  1174.  
  1175. sCRc8 macro s
  1176. sCRc4 s
  1177. sCRc4 s + 10h
  1178. endm
  1179.  
  1180. sCRc16 macro s
  1181. sCRc8 s
  1182. sCRc8 s + 20h
  1183. endm
  1184.  
  1185. sCR_start 01
  1186. sCRc1 00h
  1187. sCR_end 01 ;; ----------------- width 1
  1188.  
  1189. sCR_start 02
  1190. sCRc2 00h
  1191. sCR_end 02 ;; ----------------- width 2
  1192.  
  1193. sCR_start 03
  1194. sCRc2 00h
  1195. sCRc1 08h
  1196. sCR_end 03 ;; ----------------- width 3
  1197.  
  1198. sCR_start 04
  1199. sCRc4 00h
  1200. sCR_end 04 ;; ----------------- width 4
  1201.  
  1202. sCR_start 05
  1203. sCRc4 00h
  1204. sCRc1 10h
  1205. sCR_end 05 ;; ----------------- width 5
  1206.  
  1207. sCR_start 06
  1208. sCRc4 00h
  1209. sCRc2 10h
  1210. sCR_end 06 ;; ----------------- width 6
  1211.  
  1212. sCR_start 07
  1213. sCRc4 00h
  1214. sCRc2 10h
  1215. sCRc1 18h
  1216. sCR_end 07 ;; ----------------- width 7
  1217.  
  1218. sCR_start 08
  1219. sCRc8 00h
  1220. sCR_end 08 ;; ----------------- width 8
  1221.  
  1222. sCR_start 09
  1223. sCRc8 00h
  1224. sCRc1 20h
  1225. sCR_end 09 ;; ----------------- width 9
  1226.  
  1227. sCR_start 0A
  1228. sCRc8 00h
  1229. sCRc2 20h
  1230. sCR_end 0A ;; ----------------- width 10
  1231.  
  1232. sCR_start 0B
  1233. sCRc8 00h
  1234. sCRc2 20h
  1235. sCRc1 28h
  1236. sCR_end 0B ;; ----------------- width 11
  1237.  
  1238. sCR_start 0C
  1239. sCRc8 00h
  1240. sCRc4 20h
  1241. sCR_end 0C ;; ----------------- width 12
  1242.  
  1243. sCR_start 0D
  1244. sCRc8 00h
  1245. sCRc4 20h
  1246. sCRc1 30h
  1247. sCR_end 0D ;; ----------------- width 13
  1248.  
  1249. sCR_start 0E
  1250. sCRc8 00h
  1251. sCRc4 20h
  1252. sCRc2 30h
  1253. sCR_end 0E ;; ----------------- width 14
  1254.  
  1255. sCR_start 0F
  1256. sCRc8 00h
  1257. sCRc4 20h
  1258. sCRc2 30h
  1259. sCRc1 38h
  1260. sCR_end 0F ;; ----------------- width 15
  1261.  
  1262. sCR_start 10
  1263. sCRc16 00h
  1264. sCR_end 10 ;; ----------------- width 16
  1265.  
  1266. sCR_start 11
  1267. sCRc16 00h
  1268. sCRc1 40h
  1269. sCR_end 11 ;; ----------------- width 17
  1270.  
  1271. sCR_start 12
  1272. sCRc16 00h
  1273. sCRc2 40h
  1274. sCR_end 12 ;; ----------------- width 18
  1275.  
  1276. sCR_start 13
  1277. sCRc16 00h
  1278. sCRc2 40h
  1279. sCRc1 48h
  1280. sCR_end 13 ;; ----------------- width 19
  1281.  
  1282. sCR_start 14
  1283. sCRc16 00h
  1284. sCRc4 40h
  1285. sCR_end 14 ;; ----------------- width 20
  1286.  
  1287. sCR_start 15
  1288. sCRc16 00h
  1289. sCRc4 40h
  1290. sCRc1 50h
  1291. sCR_end 15 ;; ----------------- width 21
  1292.  
  1293. sCR_start 16
  1294. sCRc16 00h
  1295. sCRc4 40h
  1296. sCRc2 50h
  1297. sCR_end 16 ;; ----------------- width 22
  1298.  
  1299. sCR_start 17
  1300. sCRc16 00h
  1301. sCRc4 40h
  1302. sCRc2 50h
  1303. sCRc1 58h
  1304. sCR_end 17 ;; ----------------- width 23
  1305.  
  1306. sCR_start 18
  1307. sCRc16 00h
  1308. sCRc8 40h
  1309. sCR_end 18 ;; ----------------- width 24
  1310.  
  1311. sCR_start 19
  1312. sCRc16 00h
  1313. sCRc8 40h
  1314. sCRc1 60h
  1315. sCR_end 19 ;; ----------------- width 25
  1316.  
  1317. sCR_start 1A
  1318. sCRc16 00h
  1319. sCRc8 40h
  1320. sCRc2 60h
  1321. sCR_end 1A ;; ----------------- width 26
  1322.  
  1323. sCR_start 1B
  1324. sCRc16 00h
  1325. sCRc8 40h
  1326. sCRc2 60h
  1327. sCRc1 68h
  1328. sCR_end 1B ;; ----------------- width 27
  1329.  
  1330. sCR_start 1C
  1331. sCRc16 00h
  1332. sCRc8 40h
  1333. sCRc4 60h
  1334. sCR_end 1C ;; ----------------- width 28
  1335.  
  1336. sCR_start 1D
  1337. sCRc16 00h
  1338. sCRc8 40h
  1339. sCRc4 60h
  1340. sCRc1 70h
  1341. sCR_end 1D ;; ----------------- width 29
  1342.  
  1343. sCR_start 1E
  1344. sCRc16 00h
  1345. sCRc8 40h
  1346. sCRc4 60h
  1347. sCRc2 70h
  1348. sCR_end 1E ;; ----------------- width 30
  1349.  
  1350. sCR_start 1F
  1351. sCRc16 00h
  1352. sCRc8 40h
  1353. sCRc4 60h
  1354. sCRc2 70h
  1355. sCRc1 78h
  1356. sCR_end 1F ;; ----------------- width 31
  1357.  
  1358. ALIGN_Z
  1359.  
  1360. ;---------------------------------------------------------------------------------------------
  1361. smallCopyRoutine dd sCR1F, sCR01, sCR02, sCR03, sCR04, sCR05, sCR06, sCR07
  1362. dd sCR08, sCR09, sCR0A, sCR0B, sCR0C, sCR0D, sCR0E, sCR0F
  1363. dd sCR10, sCR11, sCR12, sCR13, sCR14, sCR15, sCR16, sCR17
  1364. dd sCR18, sCR19, sCR1A, sCR1B, sCR1C, sCR1D, sCR1E, sCR1F
  1365.  
  1366. CSurfaceCopy32 endp
  1367. end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement