Guest User

Untitled

a guest
Feb 22nd, 2016
94
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.97 KB | None | 0 0
  1. 0:
  2. // rgba_a_x4_a = { r0g0b0a0 r1g1b1a1 r2g2b2a2 r3g3b3a3 }
  3. // rgba_a_x4_b = { r0g0b0a0 r1g1b1a1 r2g2b2a2 r3g3b3a3 }
  4. // rgba_b_x4_a = { R0G0B0A0 R1G1B1A1 R2G2B2A2 R3G3B3A3 }
  5. // rgba_b_x4_b = { R0G0B0A0 R1G1B1A1 R2G2B2A2 R3G3B3A3 }
  6. movaps 0(colors), rgba_a_x4_a
  7. movaps 0(colors), rgba_a_x4_b
  8. movaps 0(reference_colors), rgba_b_x4_a
  9. movaps 0(reference_colors), rgba_b_x4_b
  10.  
  11. // rgba_a_x4_a = { r0r4g0g4 b0b4a0a4 r1r5g1g5 b1b5a1a5 }
  12. // rgba_a_x4_b = { r2r6g2g6 b2b6a2a6 r2r6g2g6 b2b6a2a6 }
  13. // rgba_b_x4_a = { R0R4G0G4 B0B4A0A4 R1R5G1G5 B1B5A1A5 }
  14. // rgba_b_x4_b = { R2R6G2G6 B2B6A2A6 R7R7G7G7 B7B7A7A7 }
  15. punpcklbw 16(colors), rgba_a_x4_a
  16. punpckhbw 16(colors), rgba_a_x4_b
  17. punpcklbw 16(reference_colors), rgba_b_x4_a
  18. punpckhbw 16(reference_colors), rgba_b_x4_b
  19.  
  20. movdqa rgba_a_x4_a, rgba_a2_x4_a
  21. movdqa rgba_b_x4_a, rgba_b2_x4_a
  22.  
  23. // rgba_a_x4_a = { r0r2r4r6 g0g2g4g6 b0b2b4b6 a0a2a4a6 }
  24. // rgba_a2_x4_a = { r1r3r5r7 g1g3g5g7 b1b3b5b7 a1a3a5a7 }
  25. // rgba_b_x4_a = { R0R2R4R6 G0G2G4G6 B0B2B4B6 A0A2A4A6 }
  26. // rgba_b2_x4_a = { R1R3R5R7 G1G3G5G7 B1B3B5B7 A1A3A5A7 }
  27. punpcklbw rgba_a_x4_b, rgba_a_x4_a
  28. punpckhbw rgba_a_x4_b, rgba_a2_x4_a
  29. punpcklbw rgba_b_x4_b, rgba_b_x4_a
  30. punpckhbw rgba_b_x4_b, rgba_b2_x4_a
  31.  
  32. movdqa rgba_a_x4_a, rgba_a3_x4_a
  33. movdqa rgba_b_x4_a, rgba_b3_x4_a
  34.  
  35. // rgba_a_x4_a = { r0r1r2r3 r4r5r6r7 g0g1g2g3 g4g5g6g7 }
  36. // rgba_a3_x4_a = { b0b1b2b3 b4b5b6b7 a0a1a2a3 a4a5a6a7 }
  37. // rgba_b_x4_a = { R0R1R2R3 R4R5R6R7 G0G1G2G3 G4G5G6G7 }
  38. // rgba_b3_x4_a = { B0B1B2B3 B4B5B6B7 A0A1A2A3 A4A5A6A7 }
  39. punpcklbw rgba_a2_x4_a, rgba_a_x4_a
  40. punpckhbw rgba_a2_x4_a, rgba_a3_x4_a
  41. punpcklbw rgba_b2_x4_a, rgba_b_x4_a
  42. punpckhbw rgba_b2_x4_a, rgba_b3_x4_a
  43.  
  44. movdqa rgba_a_x4_a, g_x8
  45. movdqa rgba_a3_x4_a, aa_x8
  46. movaps (const_u8_0x1F_value), ai_x8
  47.  
  48. // r_x8 = { r0R0r1R1 r2R2r3R3 r4R4r5R5 r6R6r7R7 }
  49. // g_x8 = { g0G0g1G1 g2G2g3G3 g4G4g5G5 g6G6g7G7 }
  50. // b_x8 = { b0B0b1B1 b2B2b3B3 b4B4b5B5 b6B6b7B7 }
  51. punpcklbw rgba_b_x4_a, r_x8
  52. punpckhbw rgba_b_x4_a, g_x8
  53. punpcklbw rgba_b3_x4_a, b_x8
  54.  
  55. // aa_x8 = { a0a1a2a3 a4a5a6a7 -------- -------- }
  56. // ab_x8 = { A0A1A2A3 A4A5A6A7 -------- -------- }
  57. psrldq $8, aa_x8
  58. psrldq $8, ab_x8
  59.  
  60. movdqa ab_x8, e_x8
  61. movq aa_x8, 0(pixel_alpha)
  62.  
  63. // e = ab & 0x80 (save edge pass)
  64. movdqa aa_x8, a2_x8
  65. pand (const_u8_0x80_value), e_x8
  66. // ab &= 0x1F (clear edge pass)
  67. pand (const_u8_0x1F_value), ab_x8
  68.  
  69. // ai = 31 - a
  70. psubb aa_x8, ai_x8
  71. // aa = max(aa, ab)
  72. pmaxub ab_x8, aa_x8
  73.  
  74. // combine edge pass into alpha result
  75. por e_x8, aa_x8
  76.  
  77. // ab = ab?0x00:0xFF
  78. pcmpeqb (const_u8_0x00_value), ab_x8
  79.  
  80. // if(ab) a2 = 0xFF
  81. por ab_x8, a2_x8
  82. // if(ab) ab = ai
  83. pandn ai_x8, ab_x8
  84. // if(ab) a2 = 0x1F
  85. pand (const_u8_0x1F_value), a2_x8
  86.  
  87. // a2 = (aa + 1)
  88. paddd (const_u8_0x01_value), a2_x8
  89.  
  90. // a2_x8 = { a0I0a1I1 a2I2a3I3 a4I4a5I5 a6I6a7I7 }
  91. punpcklbw ab_x8, a2_x8
  92.  
  93. // r_x8 = u16{ r0 r1 r2 r3 r4 r5 r6 r7 }
  94. // g_x8 = u16{ g0 g1 g2 g3 g4 g5 g6 g7 }
  95. // b_x8 = u16{ b0 b1 b2 b3 b4 b5 b6 b7 }
  96. pmaddubsw a2_x8, r_x8
  97. pmaddubsw a2_x8, g_x8
  98. pmaddubsw a2_x8, b_x8
  99.  
  100. // shift results
  101. psrlw $5, r_x8
  102. psrlw $5, g_x8
  103. psrlw $5, b_x8
  104.  
  105. // rb_x8 = { r0r1r2r3 r4r5r6r7 b0b1b2b3 b4b5b6b7 }
  106. // ga_x8 = { g0g1g2g3 g4g5g6g7 g0g1g2g3 g4g5g6g7 }
  107. packuswb b_x8, r_x8
  108. packuswb g_x8, g_x8
  109.  
  110. // ga_x8 = { g0g1g2g3 g4g5g6g7 a0a1a2a3 a4a5a6a7 }
  111. punpcklqdq aa_x8, g_x8
  112. movdqa rb_x8, rb2_x8
  113.  
  114. // rb_x8 = { r0g0r1g1 r2g2r3g3 r4g4r5g5 r6g6r7g7 }
  115. // rb2_x8 = { b0a0b1a1 b2a2b3a3 b4a4b5a5 b6a6b7a7 }
  116. punpcklbw ga_x8, rb_x8
  117. punpckhbw ga_x8, rb2_x8
  118.  
  119. movdqa rb_x8, rb3_x8
  120.  
  121. // rb_x8 = { r0g0b0a0 r1g1b1a1 r2g2b2a2 r3g3b3a3 }
  122. // rb3_x8 = { r4g4b4a4 r5g5b5a5 r6g6b6a6 r7g7b7a7 }
  123. punpcklwd rb2_x8, rb_x8
  124. punpckhwd rb2_x8, rb3_x8
  125.  
  126. movaps rgba_x4_a, 0(colors)
  127. movaps rgba_x4_b, 16(colors)
  128.  
  129. add $32, colors
  130. add $32, reference_colors
  131. add $8, pixel_alpha
  132. sub $8, pixels
  133. jg 0b
Advertisement
Add Comment
Please, Sign In to add comment