Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #define _POSIX_C_SOURCE 200809L
- #define LOG_ENABLE
- #include "types.h"
- #include "log.h"
- #include "utils.h"
- #include "profiler.h"
- /*
- === Compiled ===
- clang -O3 -fno-inline-functions
- === Output ===
- test: contains_color_basic()
- End_pixel: 0b01010110, End_color: 0b00000010
- test: contains_color_lut()
- End_pixel: 0b01010110, End_color: 0b00000010
- test: contains_color_simd_shift()
- End_pixel: 0b01010110, End_color: 0b00000010
- test: contains_color_simd_mul()
- End_pixel: 0b01010110, End_color: 0b00000010
- test: contains_color_paul_shift()
- End_pixel: 0b01010110, End_color: 0b00000010
- test: contains_color_paul_mul()
- End_pixel: 0b01010110, End_color: 0b00000010
- === Profiling Results ===
- Total Time: 3265.87 ms
- test: contains_color_basic(): 1 hits, 248.45 ms ( 7.61%) with children, 248.45 ms ( 7.61%) no children
- test: contains_color_lut(): 1 hits, 650.16 ms (19.91%) with children, 650.16 ms (19.91%) no children
- test: contains_color_simd_shift(): 1 hits, 630.71 ms (19.31%) with children, 630.71 ms (19.31%) no children
- test: contains_color_simd_mul(): 1 hits, 600.76 ms (18.40%) with children, 600.76 ms (18.40%) no children
- test: contains_color_paul_shift(): 1 hits, 613.90 ms (18.80%) with children, 613.90 ms (18.80%) no children
- test: contains_color_paul_mul(): 1 hits, 521.83 ms (15.98%) with children, 521.83 ms (15.98%) no children
- === Disasm ===
- 00000000000016c0 <contains_color_basic>:
- 16c0: 89 f9 mov ecx,edi
- 16c2: 83 e1 03 and ecx,0x3
- 16c5: b8 01 00 00 00 mov eax,0x1
- 16ca: 39 f1 cmp ecx,esi
- 16cc: 74 22 je 16f0 <contains_color_basic+0x30>
- 16ce: 89 f9 mov ecx,edi
- 16d0: c1 e9 02 shr ecx,0x2
- 16d3: 83 e1 03 and ecx,0x3
- 16d6: 39 f1 cmp ecx,esi
- 16d8: 74 16 je 16f0 <contains_color_basic+0x30>
- 16da: 89 f9 mov ecx,edi
- 16dc: c1 e9 04 shr ecx,0x4
- 16df: 83 e1 03 and ecx,0x3
- 16e2: 39 f1 cmp ecx,esi
- 16e4: 74 0a je 16f0 <contains_color_basic+0x30>
- 16e6: c1 ef 06 shr edi,0x6
- 16e9: 31 c0 xor eax,eax
- 16eb: 39 f7 cmp edi,esi
- 16ed: 0f 94 c0 sete al
- 16f0: c3 ret
- 16f1: 66 2e 0f 1f 84 00 00 cs nop WORD PTR [rax+rax*1+0x0]
- 16f8: 00 00 00
- 16fb: 0f 1f 44 00 00 nop DWORD PTR [rax+rax*1+0x0]
- 0000000000001700 <init_lut>:
- 1700: 41 56 push r14
- 1702: 53 push rbx
- 1703: 50 push rax
- 1704: 31 db xor ebx,ebx
- 1706: 4c 8d 35 c3 49 00 00 lea r14,[rip+0x49c3] # 60d0 <matches_lut>
- 170d: 0f 1f 00 nop DWORD PTR [rax]
- 1710: 0f b6 fb movzx edi,bl
- 1713: 31 f6 xor esi,esi
- 1715: e8 a6 ff ff ff call 16c0 <contains_color_basic>
- 171a: 42 88 04 33 mov BYTE PTR [rbx+r14*1],al
- 171e: 48 83 c3 01 add rbx,0x1
- 1722: 48 81 fb 00 01 00 00 cmp rbx,0x100
- 1729: 75 e5 jne 1710 <init_lut+0x10>
- 172b: 48 83 c4 08 add rsp,0x8
- 172f: 5b pop rbx
- 1730: 41 5e pop r14
- 1732: c3 ret
- 1733: 66 2e 0f 1f 84 00 00 cs nop WORD PTR [rax+rax*1+0x0]
- 173a: 00 00 00
- 173d: 0f 1f 00 nop DWORD PTR [rax]
- 0000000000001740 <contains_color_lut>:
- 1740: 89 f0 mov eax,esi
- 1742: 48 8d 0d 3f 29 00 00 lea rcx,[rip+0x293f] # 4088 <color_dup_lut>
- 1749: 40 32 3c 08 xor dil,BYTE PTR [rax+rcx*1]
- 174d: 40 0f b6 c7 movzx eax,dil
- 1751: 48 8d 0d 78 49 00 00 lea rcx,[rip+0x4978] # 60d0 <matches_lut>
- 1758: 0f b6 04 08 movzx eax,BYTE PTR [rax+rcx*1]
- 175c: c3 ret
- 175d: 0f 1f 00 nop DWORD PTR [rax]
- 0000000000001760 <contains_color_simd_shift>:
- 1760: 8d 04 b5 00 00 00 00 lea eax,[rsi*4+0x0]
- 1767: 40 08 f0 or al,sil
- 176a: 89 f1 mov ecx,esi
- 176c: c0 e1 04 shl cl,0x4
- 176f: 40 c0 e6 06 shl sil,0x6
- 1773: 40 08 ce or sil,cl
- 1776: 40 08 c6 or sil,al
- 1779: 40 30 fe xor sil,dil
- 177c: 89 f1 mov ecx,esi
- 177e: d0 e9 shr cl,1
- 1780: 40 08 f1 or cl,sil
- 1783: f6 d1 not cl
- 1785: 31 c0 xor eax,eax
- 1787: f6 c1 55 test cl,0x55
- 178a: 0f 95 c0 setne al
- 178d: c3 ret
- 178e: 66 90 xchg ax,ax
- 0000000000001790 <contains_color_simd_mul>:
- 1790: 6b c6 55 imul eax,esi,0x55
- 1793: 31 f8 xor eax,edi
- 1795: 89 c1 mov ecx,eax
- 1797: d0 e9 shr cl,1
- 1799: 08 c1 or cl,al
- 179b: f6 d1 not cl
- 179d: 31 c0 xor eax,eax
- 179f: f6 c1 55 test cl,0x55
- 17a2: 0f 95 c0 setne al
- 17a5: c3 ret
- 17a6: 66 2e 0f 1f 84 00 00 cs nop WORD PTR [rax+rax*1+0x0]
- 17ad: 00 00 00
- 00000000000017b0 <contains_color_paul_shift>:
- 17b0: 8d 04 b5 00 00 00 00 lea eax,[rsi*4+0x0]
- 17b7: 40 08 f0 or al,sil
- 17ba: 89 f1 mov ecx,esi
- 17bc: c0 e1 04 shl cl,0x4
- 17bf: 40 c0 e6 06 shl sil,0x6
- 17c3: 40 08 ce or sil,cl
- 17c6: 40 08 c6 or sil,al
- 17c9: 40 30 fe xor sil,dil
- 17cc: 40 f6 d6 not sil
- 17cf: 89 f0 mov eax,esi
- 17d1: d0 e8 shr al,1
- 17d3: 40 20 f0 and al,sil
- 17d6: 24 55 and al,0x55
- 17d8: 0f b6 c0 movzx eax,al
- 17db: c3 ret
- 17dc: 0f 1f 40 00 nop DWORD PTR [rax+0x0]
- 00000000000017e0 <contains_color_paul_mul>:
- 17e0: 6b c6 55 imul eax,esi,0x55
- 17e3: 31 f8 xor eax,edi
- 17e5: f6 d0 not al
- 17e7: 89 c1 mov ecx,eax
- 17e9: d0 e9 shr cl,1
- 17eb: 20 c1 and cl,al
- 17ed: 80 e1 55 and cl,0x55
- 17f0: 0f b6 c1 movzx eax,cl
- 17f3: c3 ret
- 17f4: 66 2e 0f 1f 84 00 00 cs nop WORD PTR [rax+rax*1+0x0]
- 17fb: 00 00 00
- 17fe: 66 90 xchg ax,ax
- */
- bool contains_color_basic(u8 pixel, u8 color)
- {
- return (((pixel >> 0) & 0b11) == color) ||
- (((pixel >> 2) & 0b11) == color) ||
- (((pixel >> 4) & 0b11) == color) ||
- (((pixel >> 6) & 0b11) == color);
- }
- u8 matches_lut[256] = {0};
- u8 color_dup_lut[4] = { 0b00000000, 0b01010101, 0b10101010, 0b11111111, };
- void init_lut()
- {
- for (u32 i = 0; i < 256; ++i)
- {
- matches_lut[i] = contains_color_basic((u8) i, 0);
- }
- }
- bool contains_color_lut(u8 pixel, u8 color)
- {
- return matches_lut[color_dup_lut[color] ^ pixel];
- }
- bool contains_color_simd_shift(u8 pixel, u8 color)
- {
- u8 mask = 0b01010101;
- color = (color << 0) | (color << 2) | (color << 4) | (color << 6);
- u8 xored = pixel ^ color;
- u8 right_bits = xored & mask;
- u8 left_bits = (xored >> 1) & mask;
- return ((left_bits | right_bits) != mask);
- }
- bool contains_color_simd_mul(u8 pixel, u8 color)
- {
- u8 mask = 0b01010101;
- color *= mask;
- u8 xored = pixel ^ color;
- u8 right_bits = xored & mask;
- u8 left_bits = (xored >> 1) & mask;
- return ((left_bits | right_bits) != mask);
- }
- bool contains_color_paul_shift(u8 pixel, u8 color)
- {
- u8 mask = 0b01010101;
- color = (color << 0) | (color << 2) | (color << 4) | (color << 6);
- u8 x = pixel ^ (~color);
- u8 y = x & (x >> 1);
- u8 z = y & mask;
- return z;
- }
- bool contains_color_paul_mul(u8 pixel, u8 color)
- {
- u8 mask = 0b01010101;
- color *= mask;
- u8 x = pixel ^ (~color);
- u8 y = x & (x >> 1);
- u8 z = y & mask;
- return z;
- }
- int main()
- {
- profiler_start();
- u8 pixel_start = 0b10010110;
- u8 color_start = 0b01;
- u8 pixel_inc_true = 55;
- u8 pixel_inc_false = 103;
- u8 color_inc_true = 1;
- u8 color_inc_false = 2;
- u32 iterations = 100000000;
- char *test_name = "";
- u8 pixel = 0;
- u8 color = 0;
- test_name = "test: contains_color_basic()";
- pixel = pixel_start;
- color = color_start;
- {
- profile_block(test_name);
- for (u32 i = 0; i < iterations; ++i)
- {
- pixel += contains_color_basic(pixel, color) ? pixel_inc_true : pixel_inc_false;
- color += contains_color_basic(pixel, color) ? color_inc_true : color_inc_false;
- color &= 0b00000011;
- }
- }
- LOG("%s\n\t\t\t\t\t\t End_pixel: 0b%08b, End_color: 0b%08b", test_name, pixel, color);
- test_name = "test: contains_color_lut()";
- pixel = pixel_start;
- color = color_start;
- init_lut();
- {
- profile_block(test_name);
- for (u32 i = 0; i < iterations; ++i)
- {
- pixel += contains_color_lut(pixel, color) ? pixel_inc_true : pixel_inc_false;
- color += contains_color_lut(pixel, color) ? color_inc_true : color_inc_false;
- color &= 0b00000011;
- }
- }
- LOG("%s\n\t\t\t\t\t\t End_pixel: 0b%08b, End_color: 0b%08b", test_name, pixel, color);
- test_name = "test: contains_color_simd_shift()";
- pixel = pixel_start;
- color = color_start;
- {
- profile_block(test_name);
- for (u32 i = 0; i < iterations; ++i)
- {
- pixel += contains_color_simd_shift(pixel, color) ? pixel_inc_true : pixel_inc_false;
- color += contains_color_simd_shift(pixel, color) ? color_inc_true : color_inc_false;
- color &= 0b00000011;
- }
- }
- LOG("%s\n\t\t\t\t\t\t End_pixel: 0b%08b, End_color: 0b%08b", test_name, pixel, color);
- test_name = "test: contains_color_simd_mul()";
- pixel = pixel_start;
- color = color_start;
- {
- profile_block(test_name);
- for (u32 i = 0; i < iterations; ++i)
- {
- pixel += contains_color_simd_mul(pixel, color) ? pixel_inc_true : pixel_inc_false;
- color += contains_color_simd_mul(pixel, color) ? color_inc_true : color_inc_false;
- color &= 0b00000011;
- }
- }
- LOG("%s\n\t\t\t\t\t\t End_pixel: 0b%08b, End_color: 0b%08b", test_name, pixel, color);
- test_name = "test: contains_color_paul_shift()";
- pixel = pixel_start;
- color = color_start;
- {
- profile_block(test_name);
- for (u32 i = 0; i < iterations; ++i)
- {
- pixel += contains_color_paul_shift(pixel, color) ? pixel_inc_true : pixel_inc_false;
- color += contains_color_paul_shift(pixel, color) ? color_inc_true : color_inc_false;
- color &= 0b00000011;
- }
- }
- LOG("%s\n\t\t\t\t\t\t End_pixel: 0b%08b, End_color: 0b%08b", test_name, pixel, color);
- test_name = "test: contains_color_paul_mul()";
- pixel = pixel_start;
- color = color_start;
- {
- profile_block(test_name);
- for (u32 i = 0; i < iterations; ++i)
- {
- pixel += contains_color_paul_mul(pixel, color) ? pixel_inc_true : pixel_inc_false;
- color += contains_color_paul_mul(pixel, color) ? color_inc_true : color_inc_false;
- color &= 0b00000011;
- }
- }
- LOG("%s\n\t\t\t\t\t\t End_pixel: 0b%08b, End_color: 0b%08b", test_name, pixel, color);
- profiler_end_and_print();
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement