Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- [MethodImpl(Runtime.MethodImpl.Inline)]
- private static unsafe void ProcessTextureSse2(Span<Color8> data) {
- const uint RegisterElements = 4;
- RegisterElements.AssertEqual((uint)(sizeof(Vector128<uint>) / sizeof(Color8)));
- uint offset;
- fixed (Color8* dataPtr8 = data) {
- uint* dataPtr = (uint*)dataPtr8;
- for (offset = 0; offset + (RegisterElements - 1U) < data.Length; offset += RegisterElements) {
- Vector128<uint> rawColor = Sse2.LoadVector128(dataPtr + offset);
- Vector128<uint> alphaMask = Vector128.Create(0xFF000000U);
- Vector128<uint> alpha = Sse2.And(rawColor, alphaMask);
- Vector128<ushort> lo = Sse2.UnpackLow(rawColor.AsByte(), Vector128<byte>.Zero).AsUInt16();
- Vector128<ushort> hi = Sse2.UnpackHigh(rawColor.AsByte(), Vector128<byte>.Zero).AsUInt16();
- Vector128<uint> alphaLo, alphaHi;
- if (Ssse3.IsSupported) {
- Vector128<byte> alphaShuffle = Vector128.Create(6, 0xFF, 6, 0xFF, 6, 0xFF, 6, 0xFF, 14, 0xFF, 14, 0xFF, 14, 0xFF, 14, 0xFF);
- alphaLo = Ssse3.Shuffle(lo.AsByte(), alphaShuffle).AsUInt32();
- alphaHi = Ssse3.Shuffle(hi.AsByte(), alphaShuffle).AsUInt32();
- }
- else {
- alphaLo = Sse2.UnpackLow(alpha.AsByte(), Vector128<byte>.Zero).AsUInt32();
- alphaHi = Sse2.UnpackHigh(alpha.AsByte(), Vector128<byte>.Zero).AsUInt32();
- Vector128<uint> alphaLo16 = Sse2.ShiftRightLogical(alphaLo, 16);
- Vector128<uint> alphaHi16 = Sse2.ShiftRightLogical(alphaHi, 16);
- alphaLo = Sse2.Or(alphaLo, alphaLo16);
- alphaHi = Sse2.Or(alphaHi, alphaHi16);
- Vector128<ulong> alphaLo32 = Sse2.ShiftRightLogical(alphaLo.AsUInt64(), 32);
- Vector128<ulong> alphaHi32 = Sse2.ShiftRightLogical(alphaHi.AsUInt64(), 32);
- alphaLo = Sse2.Or(alphaLo.AsUInt64(), alphaLo32).AsUInt32();
- alphaHi = Sse2.Or(alphaHi.AsUInt64(), alphaHi32).AsUInt32();
- }
- Vector128<ushort> prodLo = Sse2.MultiplyLow(lo, alphaLo.AsUInt16());
- Vector128<ushort> prodHi = Sse2.MultiplyLow(hi, alphaHi.AsUInt16());
- Vector128<ushort> addend = Vector128.Create((ushort)0x00FFU);
- var sumLo = Sse2.Add(prodLo, addend);
- var sumHi = Sse2.Add(prodHi, addend);
- var shiftLo = Sse2.ShiftRightLogical(sumLo, 8);
- var shiftHi = Sse2.ShiftRightLogical(sumHi, 8);
- var packed = Sse2.PackUnsignedSaturate(shiftLo.AsInt16(), shiftHi.AsInt16()).AsUInt32();
- var mask = Vector128.Create(0x00FFFFFFU);
- packed = Sse2.And(packed, mask);
- packed = Sse2.Or(packed, alpha);
- Sse2.Store(dataPtr + offset, packed);
- }
- }
- // This is unlikely to happen, but handle when there are still elements left (the texture size isn't aligned to 4)
- if (offset < data.Length) {
- ProcessTextureScalar(data.SliceUnsafe(offset));
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment