Ameisen

Untitled

Jun 18th, 2022
1,269
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C# 2.72 KB | None | 0 0
  1.     [MethodImpl(Runtime.MethodImpl.Inline)]
  2.     private static unsafe void ProcessTextureSse2(Span<Color8> data) {
  3.         const uint RegisterElements = 4;
  4.         RegisterElements.AssertEqual((uint)(sizeof(Vector128<uint>) / sizeof(Color8)));
  5.  
  6.         uint offset;
  7.         fixed (Color8* dataPtr8 = data) {
  8.             uint* dataPtr = (uint*)dataPtr8;
  9.  
  10.             for (offset = 0; offset + (RegisterElements - 1U) < data.Length; offset += RegisterElements) {
  11.                 Vector128<uint> rawColor = Sse2.LoadVector128(dataPtr + offset);
  12.  
  13.                 Vector128<uint> alphaMask = Vector128.Create(0xFF000000U);
  14.                 Vector128<uint> alpha = Sse2.And(rawColor, alphaMask);
  15.  
  16.                 Vector128<ushort> lo = Sse2.UnpackLow(rawColor.AsByte(), Vector128<byte>.Zero).AsUInt16();
  17.                 Vector128<ushort> hi = Sse2.UnpackHigh(rawColor.AsByte(), Vector128<byte>.Zero).AsUInt16();
  18.  
  19.                 Vector128<uint> alphaLo, alphaHi;
  20.                 if (Ssse3.IsSupported) {
  21.                     Vector128<byte> alphaShuffle = Vector128.Create(6, 0xFF, 6, 0xFF, 6, 0xFF, 6, 0xFF, 14, 0xFF, 14, 0xFF, 14, 0xFF, 14, 0xFF);
  22.  
  23.                     alphaLo = Ssse3.Shuffle(lo.AsByte(), alphaShuffle).AsUInt32();
  24.                     alphaHi = Ssse3.Shuffle(hi.AsByte(), alphaShuffle).AsUInt32();
  25.                 }
  26.                 else {
  27.                     alphaLo = Sse2.UnpackLow(alpha.AsByte(), Vector128<byte>.Zero).AsUInt32();
  28.                     alphaHi = Sse2.UnpackHigh(alpha.AsByte(), Vector128<byte>.Zero).AsUInt32();
  29.  
  30.                     Vector128<uint> alphaLo16 = Sse2.ShiftRightLogical(alphaLo, 16);
  31.                     Vector128<uint> alphaHi16 = Sse2.ShiftRightLogical(alphaHi, 16);
  32.                     alphaLo = Sse2.Or(alphaLo, alphaLo16);
  33.                     alphaHi = Sse2.Or(alphaHi, alphaHi16);
  34.  
  35.                     Vector128<ulong> alphaLo32 = Sse2.ShiftRightLogical(alphaLo.AsUInt64(), 32);
  36.                     Vector128<ulong> alphaHi32 = Sse2.ShiftRightLogical(alphaHi.AsUInt64(), 32);
  37.                     alphaLo = Sse2.Or(alphaLo.AsUInt64(), alphaLo32).AsUInt32();
  38.                     alphaHi = Sse2.Or(alphaHi.AsUInt64(), alphaHi32).AsUInt32();
  39.                 }
  40.  
  41.                 Vector128<ushort> prodLo = Sse2.MultiplyLow(lo, alphaLo.AsUInt16());
  42.                 Vector128<ushort> prodHi = Sse2.MultiplyLow(hi, alphaHi.AsUInt16());
  43.  
  44.                 Vector128<ushort> addend = Vector128.Create((ushort)0x00FFU);
  45.  
  46.                 var sumLo = Sse2.Add(prodLo, addend);
  47.                 var sumHi = Sse2.Add(prodHi, addend);
  48.  
  49.                 var shiftLo = Sse2.ShiftRightLogical(sumLo, 8);
  50.                 var shiftHi = Sse2.ShiftRightLogical(sumHi, 8);
  51.  
  52.                 var packed = Sse2.PackUnsignedSaturate(shiftLo.AsInt16(), shiftHi.AsInt16()).AsUInt32();
  53.  
  54.                 var mask = Vector128.Create(0x00FFFFFFU);
  55.                 packed = Sse2.And(packed, mask);
  56.                 packed = Sse2.Or(packed, alpha);
  57.  
  58.                 Sse2.Store(dataPtr + offset, packed);
  59.             }
  60.         }
  61.  
  62.         // This is unlikely to happen, but handle when there are still elements left (the texture size isn't aligned to 4)
  63.         if (offset < data.Length) {
  64.             ProcessTextureScalar(data.SliceUnsafe(offset));
  65.         }
  66.     }
Advertisement
Add Comment
Please, Sign In to add comment