Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <stdint.h>
- #include <stdlib.h>
- #include <time.h>
- #include <tmmintrin.h>
- uint32_t str2bin(const char *s) {
- uint32_t val = 0;
- for (int i = 0; s[i]; i++)
- val = val + ((s[i] - '0') << (2*i));
- return val;
- }
- size_t bin2str(uint32_t val, char *s) {
- size_t i;
- for (i = 0; i < 16/* && val*/; i++) {
- s[i] = (val & 3) + '0';
- val >>= 2;
- }
- s[i] = 0;
- return i;
- }
- uint32_t str2bin_sse(const char *s) {
- __m128i reg = _mm_loadu_si128((__m128i*)s);
- reg = _mm_sub_epi8(reg, _mm_set1_epi8('0'));
- __m128i bit0 = _mm_slli_epi16(reg, 7);
- __m128i bit1 = _mm_slli_epi16(reg, 6);
- __m128i lower = _mm_unpacklo_epi8(bit0, bit1);
- __m128i upper = _mm_unpackhi_epi8(bit0, bit1);
- uint32_t val = _mm_movemask_epi8(upper);
- val <<= 16;
- val += _mm_movemask_epi8(lower);
- return val;
- }
- size_t bin2str_sse_a(uint32_t val, char *s) {
- __m128i reg = _mm_cvtsi32_si128(val);
- __m128i bytes = _mm_shuffle_epi8(reg, _mm_setr_epi8(-1, 0, -1, 0, -1, 1, -1, 1, -1, 2, -1, 2, -1, 3, -1, 3));
- __m128i even = _mm_mulhi_epu16(bytes, _mm_set1_epi32(0x00100100)); //epi16: 1<<8, 1<<4 x4 times
- __m128i odd = _mm_mulhi_epu16(bytes, _mm_set1_epi32(0x04004000)); //epi16: 1<<14, 1<<10 x4 times
- even = _mm_and_si128(even, _mm_set1_epi16(0x0003));
- odd = _mm_and_si128(odd , _mm_set1_epi16(0x0300));
- __m128i res = _mm_xor_si128(even, odd);
- res = _mm_add_epi8(res, _mm_set1_epi8('0'));
- _mm_storeu_si128((__m128i*)s, res);
- return 16;
- }
- size_t bin2str_sse_b(uint32_t val, char *s) {
- __m128i reg = _mm_cvtsi32_si128(val);
- __m128i bytes = _mm_shuffle_epi8(reg, _mm_setr_epi8(0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3));
- __m128i twobits = _mm_and_si128(bytes, _mm_set1_epi32(0xC0300C03)); //epi8: 3<<0, 3<<2, 3<<4, 3<<6 x4 times
- twobits = _mm_maddubs_epi16(twobits, _mm_set1_epi16(0x4001)); //epi8: 1<<0, 1<<6 x8 times
- __m128i res = _mm_madd_epi16(twobits, _mm_set1_epi32(0x10000001)); //epi16: 1<<0, 1<<12 x4 times
- res = _mm_add_epi8(res, _mm_set1_epi8('0'));
- _mm_storeu_si128((__m128i*)s, res);
- return 16;
- }
- int main() {
- srand(time(0));
- for (int i = 0; i < 70; i++) {
- uint32_t num = rand();
- num <<= 15;
- num ^= rand();
- num <<= 15;
- num ^= rand();
- num <<= 15;
- num ^= rand();
- char str[20];
- bin2str_sse_b(num, str);
- uint32_t val;
- val = str2bin_sse(str);
- printf("%08X %s %08X %s\n", num, str, val, (num == val ? "" : "!"));
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement