illwieckz

bench ClearSkyBox

Jul 19th, 2021 (edited)
414
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 5.53 KB | None | 0 0
  1. ```c++
  2. #include <string.h>
  3. #include <algorithm>
  4. #include <iterator>
  5. #include <limits>
  6.  
  7. float sky_mins[2][6], sky_maxs[2][6];
  8.  
  9. void ClearSkyBox1()
  10. {
  11.     std::fill( *sky_mins, (*sky_mins)+2*6, 9999 );
  12.     std::fill( *sky_maxs, (*sky_maxs)+2*6, -9999 );
  13. }
  14.  
  15. void ClearSkyBox2()
  16. {
  17.     static const float mins[ 2 ][ 6 ] = {
  18.         { 9999, 9999, 9999, 9999, 9999, 9999 },
  19.         { 9999, 9999, 9999, 9999, 9999, 9999 }
  20.     };
  21.  
  22.     static const float maxs[ 2 ][ 6 ] = {
  23.         { -9999, -9999, -9999, -9999, -9999, -9999 },
  24.         { -9999, -9999, -9999, -9999, -9999, -9999 }
  25.     };
  26.  
  27.     memcpy( sky_mins, mins, sizeof( mins ) );
  28.     memcpy( sky_maxs, maxs, sizeof( maxs ) );
  29. }
  30.  
  31. unsigned char i;
  32.  
  33. __attribute__((noinline)) void call1()
  34. {
  35.     for( i = 0; i < std::numeric_limits<decltype(i)>::max(); ++i )
  36.     {
  37.         ClearSkyBox1();
  38.     }
  39. }
  40.  
  41. __attribute__((noinline)) void call2()
  42. {
  43.     for( i = 0; i < std::numeric_limits<decltype(i)>::max(); ++i )
  44.     {
  45.         ClearSkyBox2();
  46.     }
  47. }
  48.  
  49. int main()
  50. {
  51.     while ( true )
  52.     {
  53.         call1();
  54.         call2();
  55.     }
  56. }
  57. ```
  58.  
  59. # gcc -O2
  60.  
  61. * call1: 88.41%
  62. * call2: 11.52%
  63.  
  64. ```asm
  65. asm: /* call1() */
  66. Platform: X86 64 (Intel syntax)
  67. 0x55812a208200: endbr64      
  68. 0x55812a208204: movss        xmm1, dword ptr [rip + 0xe84]
  69. 0x55812a20820c: movss        xmm0, dword ptr [rip + 0xe80]
  70. 0x55812a208214: mov          esi, 0xffffffff
  71. 0x55812a208219: lea          rcx, [rip + 0x2eb0]
  72. 0x55812a208220: lea          rdx, [rip + 0x2e69]
  73. 0x55812a208227: nop          word ptr [rax + rax]
  74. 0x55812a208230: lea          rax, [rip + 0x2e69]
  75. 0x55812a208237: nop          word ptr [rax + rax]
  76. 0x55812a208240: movss        dword ptr [rax], xmm1
  77. 0x55812a208244: add          rax, 4
  78. 0x55812a208248: cmp          rax, rcx
  79. 0x55812a20824b: jne          0x55812a208240
  80. 0x55812a20824d: lea          rax, [rip + 0x2e0c]
  81. 0x55812a208254: nop          dword ptr [rax]
  82. 0x55812a208258: movss        dword ptr [rax], xmm0
  83. 0x55812a20825c: add          rax, 4
  84. 0x55812a208260: cmp          rax, rdx
  85. 0x55812a208263: jne          0x55812a208258
  86. 0x55812a208265: sub          sil, 1
  87. 0x55812a208269: jne          0x55812a208230
  88. 0x55812a20826b: mov          byte ptr [rip + 0x2dce], 0xff
  89. 0x55812a208272: ret          
  90. 0x55812a208273:
  91. ```
  92.  
  93. ```asm
  94. asm: /* call2() */
  95. Platform: X86 64 (Intel syntax)
  96. 0x55812a208280: endbr64      
  97. 0x55812a208284: lea          r9, [rip + 0x2e15]
  98. 0x55812a20828b: lea          rsi, [rip + 0x2dce]
  99. 0x55812a208292: movdqa       xmm5, xmmword ptr [rip + 0xdd6]
  100. 0x55812a20829a: movdqa       xmm4, xmmword ptr [rip + 0xdde]
  101. 0x55812a2082a2: lea          r8, [r9 + 0x10]
  102. 0x55812a2082a6: lea          rcx, [rsi + 0x10]
  103. 0x55812a2082aa: movdqa       xmm3, xmmword ptr [rip + 0xd7e]
  104. 0x55812a2082b2: movdqa       xmm2, xmmword ptr [rip + 0xd86]
  105. 0x55812a2082ba: movdqa       xmm1, xmmword ptr [rip + 0xd9e]
  106. 0x55812a2082c2: movdqa       xmm0, xmmword ptr [rip + 0xd56]
  107. 0x55812a2082ca: lea          rdi, [r8 + 0x10]
  108. 0x55812a2082ce: lea          rdx, [rcx + 0x10]
  109. 0x55812a2082d2: mov          eax, 1
  110. 0x55812a2082d7: nop          word ptr [rax + rax]
  111. 0x55812a2082e0: movaps       xmmword ptr [r9], xmm1
  112. 0x55812a2082e4: movaps       xmmword ptr [rsi], xmm0
  113. 0x55812a2082e7: movaps       xmmword ptr [r8], xmm5
  114. 0x55812a2082eb: movaps       xmmword ptr [rcx], xmm3
  115. 0x55812a2082ee: mov          byte ptr [rip + 0x2d4c], al
  116. 0x55812a2082f4: movaps       xmmword ptr [rdi], xmm4
  117. 0x55812a2082f7: movaps       xmmword ptr [rdx], xmm2
  118. 0x55812a2082fa: add          al, 1
  119. 0x55812a2082fc: jne          0x55812a2082e0
  120. 0x55812a2082fe: ret          
  121. 0x55812a2082ff:
  122. ```
  123.  
  124. # gcc -O3
  125.  
  126. * call1: 1.25%
  127. * call2: 97.86%
  128.  
  129. ```asm
  130. asm: /* call1() */
  131. Platform: X86 64 (Intel syntax)
  132. 0x55a33459c1f0: endbr64      
  133. 0x55a33459c1f4: movaps       xmm0, xmmword ptr [rip + 0xe95]
  134. 0x55a33459c1fb: mov          byte ptr [rip + 0x2e3e], 0xff
  135. 0x55a33459c202: movaps       xmmword ptr [rip + 0x2e97], xmm0
  136. 0x55a33459c209: movaps       xmmword ptr [rip + 0x2ea0], xmm0
  137. 0x55a33459c210: movaps       xmmword ptr [rip + 0x2ea9], xmm0
  138. 0x55a33459c217: movaps       xmm0, xmmword ptr [rip + 0xe82]
  139. 0x55a33459c21e: movaps       xmmword ptr [rip + 0x2e3b], xmm0
  140. 0x55a33459c225: movaps       xmmword ptr [rip + 0x2e44], xmm0
  141. 0x55a33459c22c: movaps       xmmword ptr [rip + 0x2e4d], xmm0
  142. 0x55a33459c233: ret          
  143. 0x55a33459c234:
  144. ```
  145.  
  146. ```asm
  147. asm: /* call2() */
  148. Platform: X86 64 (Intel syntax)
  149. 0x55a33459c240: endbr64      
  150. 0x55a33459c244: lea          r9, [rip + 0x2e55]
  151. 0x55a33459c24b: lea          rsi, [rip + 0x2e0e]
  152. 0x55a33459c252: movdqa       xmm5, xmmword ptr [rip + 0xe16]
  153. 0x55a33459c25a: movdqa       xmm4, xmmword ptr [rip + 0xe1e]
  154. 0x55a33459c262: lea          r8, [r9 + 0x10]
  155. 0x55a33459c266: lea          rcx, [rsi + 0x10]
  156. 0x55a33459c26a: movdqa       xmm3, xmmword ptr [rip + 0xdbe]
  157. 0x55a33459c272: movdqa       xmm2, xmmword ptr [rip + 0xdc6]
  158. 0x55a33459c27a: movdqa       xmm1, xmmword ptr [rip + 0xdde]
  159. 0x55a33459c282: movdqa       xmm0, xmmword ptr [rip + 0xd96]
  160. 0x55a33459c28a: lea          rdi, [r8 + 0x10]
  161. 0x55a33459c28e: lea          rdx, [rcx + 0x10]
  162. 0x55a33459c292: mov          eax, 1
  163. 0x55a33459c297: nop          word ptr [rax + rax]
  164. 0x55a33459c2a0: movaps       xmmword ptr [r9], xmm1
  165. 0x55a33459c2a4: movaps       xmmword ptr [rsi], xmm0
  166. 0x55a33459c2a7: movaps       xmmword ptr [r8], xmm5
  167. 0x55a33459c2ab: movaps       xmmword ptr [rcx], xmm3
  168. 0x55a33459c2ae: mov          byte ptr [rip + 0x2d8c], al
  169. 0x55a33459c2b4: movaps       xmmword ptr [rdi], xmm4
  170. 0x55a33459c2b7: movaps       xmmword ptr [rdx], xmm2
  171. 0x55a33459c2ba: add          al, 1
  172. 0x55a33459c2bc: jne          0x55a33459c2a0
  173. 0x55a33459c2be: ret  
  174. ```
Add Comment
Please, Sign In to add comment