Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- private static void ScrambleAccumulatorSse2(ulong* accumulator, byte* secret) {
- byte* accumulatorBytes = (byte*)accumulator;
- var prime32 = Vector128.Create(Prime32.Prime0);
- if (UnrollCount > 2) {
- var accumulatorVec0 = Sse2.LoadVector128(accumulatorBytes + 0).AsUInt64();
- var accumulatorVec1 = Sse2.LoadVector128(accumulatorBytes + 16).AsUInt64();
- var accumulatorVec2 = Sse2.LoadVector128(accumulatorBytes + 32).AsUInt64();
- var accumulatorVec3 = Sse2.LoadVector128(accumulatorBytes + 48).AsUInt64();
- var shifted0 = Sse2.ShiftRightLogical(accumulatorVec0, 47);
- var shifted1 = Sse2.ShiftRightLogical(accumulatorVec1, 47);
- var shifted2 = Sse2.ShiftRightLogical(accumulatorVec2, 47);
- var shifted3 = Sse2.ShiftRightLogical(accumulatorVec3, 47);
- var dataVec0 = Sse2.Xor(accumulatorVec0, shifted0);
- var dataVec1 = Sse2.Xor(accumulatorVec1, shifted1);
- var dataVec2 = Sse2.Xor(accumulatorVec2, shifted2);
- var dataVec3 = Sse2.Xor(accumulatorVec3, shifted3);
- var keyVec0 = Sse2.LoadVector128(secret + 0).AsUInt64();
- var keyVec1 = Sse2.LoadVector128(secret + 16).AsUInt64();
- var keyVec2 = Sse2.LoadVector128(secret + 32).AsUInt64();
- var keyVec3 = Sse2.LoadVector128(secret + 48).AsUInt64();
- var dataKey0 = Sse2.Xor(dataVec0, keyVec0.AsUInt64());
- var dataKey1 = Sse2.Xor(dataVec1, keyVec1.AsUInt64());
- var dataKey2 = Sse2.Xor(dataVec2, keyVec2.AsUInt64());
- var dataKey3 = Sse2.Xor(dataVec3, keyVec3.AsUInt64());
- var dataKeyHi0 = Sse2.Shuffle(dataKey0.AsUInt32(), ShuffleDataKey);
- var dataKeyHi1 = Sse2.Shuffle(dataKey1.AsUInt32(), ShuffleDataKey);
- var dataKeyHi2 = Sse2.Shuffle(dataKey2.AsUInt32(), ShuffleDataKey);
- var dataKeyHi3 = Sse2.Shuffle(dataKey3.AsUInt32(), ShuffleDataKey);
- var productLo0 = Sse2.Multiply(dataKey0.AsUInt32(), prime32);
- var productLo1 = Sse2.Multiply(dataKey1.AsUInt32(), prime32);
- var productLo2 = Sse2.Multiply(dataKey2.AsUInt32(), prime32);
- var productLo3 = Sse2.Multiply(dataKey3.AsUInt32(), prime32);
- var productHi0 = Sse2.Multiply(dataKeyHi0.AsUInt32(), prime32);
- var productHi1 = Sse2.Multiply(dataKeyHi1.AsUInt32(), prime32);
- var productHi2 = Sse2.Multiply(dataKeyHi2.AsUInt32(), prime32);
- var productHi3 = Sse2.Multiply(dataKeyHi3.AsUInt32(), prime32);
- productHi0 = Sse2.ShiftLeftLogical(productHi0, 32);
- productHi1 = Sse2.ShiftLeftLogical(productHi1, 32);
- productHi2 = Sse2.ShiftLeftLogical(productHi2, 32);
- productHi3 = Sse2.ShiftLeftLogical(productHi3, 32);
- var sum0 = Sse2.Add(productLo0, productHi0);
- var sum1 = Sse2.Add(productLo1, productHi1);
- var sum2 = Sse2.Add(productLo2, productHi2);
- var sum3 = Sse2.Add(productLo3, productHi3);
- Sse2.Store((ulong*)(accumulatorBytes + 0), sum0);
- Sse2.Store((ulong*)(accumulatorBytes + 16), sum1);
- Sse2.Store((ulong*)(accumulatorBytes + 32), sum2);
- Sse2.Store((ulong*)(accumulatorBytes + 48), sum3);
- }
- else if (UnrollCount == 2) {
- for (uint i = 0; i < StripeLength; i += 32) {
- var accumulatorVec0 = Sse2.LoadVector128(accumulatorBytes + i + 0).AsUInt64();
- var accumulatorVec1 = Sse2.LoadVector128(accumulatorBytes + i + 16).AsUInt64();
- var shifted0 = Sse2.ShiftRightLogical(accumulatorVec0, 47);
- var shifted1 = Sse2.ShiftRightLogical(accumulatorVec1, 47);
- var dataVec0 = Sse2.Xor(accumulatorVec0, shifted0);
- var dataVec1 = Sse2.Xor(accumulatorVec1, shifted1);
- var keyVec0 = Sse2.LoadVector128(secret + i + 0).AsUInt64();
- var keyVec1 = Sse2.LoadVector128(secret + i + 16).AsUInt64();
- var dataKey0 = Sse2.Xor(dataVec0, keyVec0.AsUInt64());
- var dataKey1 = Sse2.Xor(dataVec1, keyVec1.AsUInt64());
- var dataKeyHi0 = Sse2.Shuffle(dataKey0.AsUInt32(), ShuffleDataKey);
- var dataKeyHi1 = Sse2.Shuffle(dataKey1.AsUInt32(), ShuffleDataKey);
- var productLo0 = Sse2.Multiply(dataKey0.AsUInt32(), prime32);
- var productLo1 = Sse2.Multiply(dataKey1.AsUInt32(), prime32);
- var productHi0 = Sse2.Multiply(dataKeyHi0.AsUInt32(), prime32);
- var productHi1 = Sse2.Multiply(dataKeyHi1.AsUInt32(), prime32);
- productHi0 = Sse2.ShiftLeftLogical(productHi0, 32);
- productHi1 = Sse2.ShiftLeftLogical(productHi1, 32);
- var sum0 = Sse2.Add(productLo0, productHi0);
- var sum1 = Sse2.Add(productLo1, productHi1);
- Sse2.Store((ulong*)(accumulatorBytes + i + 0), sum0);
- Sse2.Store((ulong*)(accumulatorBytes + i + 16), sum1);
- }
- }
- else {
- for (uint i = 0; i < StripeLength; i += 16) {
- var accumulatorVec = Sse2.LoadVector128(accumulatorBytes + i).AsUInt64();
- var shifted = Sse2.ShiftRightLogical(accumulatorVec, 47);
- var dataVec = Sse2.Xor(accumulatorVec, shifted);
- var keyVec = Sse2.LoadVector128(secret + i).AsUInt64();
- var dataKey = Sse2.Xor(dataVec, keyVec.AsUInt64());
- var dataKeyHi = Sse2.Shuffle(dataKey.AsUInt32(), ShuffleDataKey);
- var productLo = Sse2.Multiply(dataKey.AsUInt32(), prime32);
- var productHi = Sse2.Multiply(dataKeyHi.AsUInt32(), prime32);
- productHi = Sse2.ShiftLeftLogical(productHi, 32);
- var sum = Sse2.Add(productLo, productHi);
- Sse2.Store((ulong*)(accumulatorBytes + i), sum);
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement