Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- --- phatk121016.cl 2013-06-07 09:38:40.000000000 +0100
- +++ phatk121016-modified.cl 2013-06-07 10:41:05.000000000 +0100
- @@ -57,27 +57,12 @@
- // SHA-256 Ch function, but provides it in exactly one instruction. If
- // detected, use it for Ch. Otherwise, construct Ch out of simpler logical
- // primitives.
- -
- - #ifdef BFI_INT
- - // Well, slight problem... It turns out BFI_INT isn't actually exposed to
- - // OpenCL (or CAL IL for that matter) in any way. However, there is
- - // a similar instruction, BYTE_ALIGN_INT, which is exposed to OpenCL via
- - // amd_bytealign, takes the same inputs, and provides the same output.
- - // We can use that as a placeholder for BFI_INT and have the application
- - // patch it after compilation.
- - // This is the BFI_INT function
- - #define Ch(x, y, z) amd_bytealign(x,y,z)
- - // Ma can also be implemented in terms of BFI_INT...
- - #define Ma(z, x, y) amd_bytealign(z^x,y,x)
- - #else // BFI_INT
- - // Later SDKs optimise this to BFI INT without patching and GCN
- - // actually fails if manually patched with BFI_INT
- -
- - #define Ch(x, y, z) bitselect((u)z, (u)y, (u)x)
- + //We have an SDK which automatically optimizes to BFI INT, so lets do this
- + #define Ch(x, y, z) bitselect(z, y, x)
- #define Ma(x, y, z) bitselect((u)x, (u)y, (u)z ^ (u)x)
- #define rotr(x, y) amd_bitalign((u)x, (u)x, (u)y)
- - #endif
- +
- #else // BITALIGN
- #define Ch(x, y, z) (z ^ (x & (y ^ z)))
- #define Ma(x, y, z) ((x & z) | (y & (x | z)))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement