Advertisement
Guest User

AMD cache-/memory-latency benchmark

a guest
May 5th, 2017
114
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 11.06 KB | None | 0 0
  1. #include <Windows.h>
  2. #include <intrin.h>
  3. #include <cstddef>
  4. #include <iostream>
  5. #include <cstdint>
  6. #include <cassert>
  7. #include <cstdio>
  8. #include <string>
  9. #include <sstream>
  10.  
  11. using namespace std;
  12.  
  13. struct TlbInfo
  14. {
  15.     static unsigned const LEVEL1    = 0,
  16.                           LEVEL2    = 1,
  17.                           PAGES_4KB = 0,
  18.                           PAGES_4MB = 1;
  19.     struct
  20.     {
  21.         struct
  22.         {
  23.             bool     fKnown;
  24.             unsigned entries;
  25.             int      associativity;
  26.         } aPageSizes[2];
  27.     } aLevels[2];
  28. };
  29.  
  30. union CacheLineChain
  31. {
  32.     CacheLineChain volatile *pclcNext;
  33. };
  34.  
  35. void AutoExit( bool fExit, char *message );
  36. bool GetCacheInfo( unsigned level, size_t *pSize, unsigned *pAssociativity, unsigned *pLineSizeShift, unsigned *pSectors );
  37. void GetTlbInfo( TlbInfo *pTlbInfo );
  38.  
  39. size_t InitializePointerChain( void *pBlock, unsigned blockBits, unsigned randBlockBits , unsigned lineBits );
  40. string GetSizeString( unsigned blockBits );
  41.  
  42. template<typename UNSIGNED_INTEGER>
  43. int highest_set_bit( UNSIGNED_INTEGER ui );
  44.  
  45. int main()
  46. {
  47.     size_t const  MAX_BLOCK_SIZE = 256 * 1024 * 1024;
  48.     TlbInfo       ti;
  49.     SYSTEM_INFO   si;
  50.     unsigned      pageBits;
  51.     unsigned      lineBits;
  52.     unsigned      rbBits;
  53.     void         *pBlock;
  54.    
  55.     AutoExit( !SetPriorityClass( GetCurrentProcess(), HIGH_PRIORITY_CLASS ), "can't set process-priority" );;
  56.     AutoExit( !SetThreadPriority( GetCurrentThread(), THREAD_PRIORITY_HIGHEST ), "can't set thread-priority" );;
  57.     AutoExit( SetThreadAffinityMask( GetCurrentThread(), 1 ) == 0, "can't set thread affinity");
  58.  
  59.     GetTlbInfo( &ti );
  60.     AutoExit( !ti.aLevels[TlbInfo::LEVEL1].aPageSizes[TlbInfo::PAGES_4MB].fKnown, "can't determine l1 dtlb-size" );
  61.     GetSystemInfo( &si );
  62.     pageBits = highest_set_bit<DWORD>( si.dwPageSize );
  63.     AutoExit( !GetCacheInfo( 1, nullptr, nullptr, &lineBits, nullptr ), "can't determine l1-cache parameters" );
  64.     rbBits = highest_set_bit<unsigned>( ti.aLevels[TlbInfo::LEVEL1].aPageSizes[TlbInfo::PAGES_4MB].entries ) + pageBits + lineBits;
  65.  
  66.     AutoExit( (pBlock = VirtualAlloc( NULL, MAX_BLOCK_SIZE, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE )) == NULL, "can't allocate memory for test-block" );
  67.  
  68.     unsigned                 blockBits;
  69.     size_t                   nCacheLines;
  70.     CacheLineChain volatile *pclcFirst;
  71.     DWORDLONG                dwlTick,
  72.                              dwlFastestTick;
  73.     unsigned                 turn;
  74.     double                   ticksPerCacheline;
  75.  
  76.     for( blockBits = pageBits; blockBits <= 28; blockBits++ )
  77.     {
  78.         nCacheLines = InitializePointerChain( pBlock, blockBits, pageBits, lineBits );
  79.         pclcFirst   = (CacheLineChain *)pBlock;
  80.  
  81.         for( dwlFastestTick = (DWORDLONG)(LONGLONG)-1, turn = 0; turn < 100; turn++ )
  82.         {
  83.             dwlTick        = __rdtsc();
  84.             for (CacheLineChain volatile *pclc = pclcFirst; (pclc = pclc->pclcNext) != nullptr; );
  85.             dwlTick        = __rdtsc() - dwlTick;
  86.             dwlFastestTick = dwlTick < dwlFastestTick ? dwlTick : dwlFastestTick;
  87.         }
  88.  
  89.         ticksPerCacheline = (double)dwlFastestTick / nCacheLines;
  90.         cout << GetSizeString( blockBits ) << " " << ticksPerCacheline << endl;
  91.     }
  92.  
  93.     return EXIT_SUCCESS;
  94. }
  95.  
  96. uint64_t ReverseBits( uint64_t reverse, unsigned bits );
  97.  
  98. size_t InitializePointerChain( void *pBlock, unsigned blockBits, unsigned randBlockBits , unsigned lineBits )
  99. {
  100.     assert(blockBits >= randBlockBits && randBlockBits >= lineBits);
  101.  
  102.     union ClcPointer
  103.     {
  104.         BYTE                    *pbCl;
  105.         CacheLineChain volatile *pclc;
  106.     };
  107.  
  108.     CacheLineChain volatile *pclcFirst,
  109.                             *pclcPrev;
  110.     ClcPointer               cpRandBlock,
  111.                              cp;
  112.     size_t                   randBlock,
  113.                              reverseCacheLine,
  114.                              nRandBlocks;
  115.  
  116.     pclcFirst        = (CacheLineChain *)pBlock;
  117.     cpRandBlock.pclc = pclcFirst;
  118.     pclcPrev         = nullptr;
  119.     randBlock        = 0;
  120.     nRandBlocks      = (size_t)1 << (blockBits - randBlockBits);
  121.     do
  122.     {
  123.         reverseCacheLine = 0;
  124.         do
  125.         {
  126.             cp.pbCl = &cpRandBlock.pbCl[(size_t)ReverseBits( reverseCacheLine, randBlockBits - lineBits ) << lineBits];
  127.             if( pclcPrev )
  128.                 pclcPrev->pclcNext = cp.pclc;
  129.             pclcPrev = cp.pclc;
  130.         } while( ++reverseCacheLine < ((size_t)1 << (randBlockBits - lineBits)) );
  131.         cpRandBlock.pbCl += (size_t)1 << randBlockBits;
  132.     } while( ++randBlock < nRandBlocks );
  133.     pclcPrev->pclcNext = nullptr;
  134.  
  135.     return (size_t)1 << (blockBits - lineBits);
  136. }
  137.  
  138. string GetSizeString( unsigned blockBits )
  139. {
  140.     size_t size;
  141.     stringstream ss;
  142.  
  143.     if( (size = (size_t)1 << blockBits) < 1024 )
  144.         ss << size << "B";
  145.     else if( size < (1024 * 1024) )
  146.         ss << (size / 1024) << "kB";
  147.     else if( size < (1024 * 1024 * 1024) )
  148.         ss << (size / (1024 * 1024)) << "MB";
  149.     else
  150.         ss << (size / (1024 * 1024 * 1024)) << "GB";
  151.  
  152.     return ss.str();
  153. }
  154.  
  155. void AutoExit( bool fExit, char *message )
  156. {
  157.     if( fExit )
  158.     {
  159.         cout << message << endl;
  160.         getchar();
  161.         ExitProcess( EXIT_FAILURE );
  162.     }
  163. }
  164.  
  165.  
  166. uint64_t ReverseBits( uint64_t reverse, unsigned bits )
  167. {
  168.     assert(bits >= 1 && bits <= 64);
  169.     reverse   = ((reverse & 0xFFFFFFFF00000000) >> 32) | ((reverse & 0x00000000FFFFFFFF) << 32);
  170.     reverse   = ((reverse & 0xFFFF0000FFFF0000) >> 16) | ((reverse & 0x0000FFFF0000FFFF) << 16);
  171.     reverse   = ((reverse & 0xFF00FF00FF00FF00) >>  8) | ((reverse & 0x00FF00FF00FF00FF) <<  8);
  172.     reverse   = ((reverse & 0xF0F0F0F0F0F0F0F0) >>  4) | ((reverse & 0x0F0F0F0F0F0F0F0F) <<  4);
  173.     reverse   = ((reverse & 0xCCCCCCCCCCCCCCCC) >>  2) | ((reverse & 0x3333333333333333) <<  2);
  174.     reverse   = ((reverse & 0xAAAAAAAAAAAAAAAA) >>  1) | ((reverse & 0x5555555555555555) <<  1);
  175.     reverse >>= 64 - bits;
  176.  
  177.     return reverse;
  178. }
  179.  
  180. DWORD CpuId( DWORD dwCode, DWORD dwEcx2ndParameter, DWORD *pdwRegisters );
  181.  
  182. bool GetCacheInfo( unsigned level, size_t *pSize, unsigned *pAssociativity, unsigned *pLineSizeShift, unsigned *pSectors )
  183. {
  184.     DWORD adwCpuIdRetRegisters[4];
  185.  
  186.     if( (CpuId( 0, 0, adwCpuIdRetRegisters ),
  187.          adwCpuIdRetRegisters[1] == 'uneG' &&
  188.          adwCpuIdRetRegisters[3] == 'Ieni' &&
  189.          adwCpuIdRetRegisters[2] == 'letn') )
  190.         return false;
  191.  
  192.     if( adwCpuIdRetRegisters[1] == 'htuA' &&
  193.         adwCpuIdRetRegisters[3] == 'itne' &&
  194.         adwCpuIdRetRegisters[2] == 'DMAc' &&
  195.         CpuId( 0x80000000u, 0, adwCpuIdRetRegisters ) >= (0x80000005u + (level > 1)) )
  196.     {
  197.         size_t   size;
  198.         unsigned associativity;
  199.         unsigned clShift;
  200.         unsigned sectors;
  201.  
  202.         CpuId( 0x80000005u + (level > 1), 0, adwCpuIdRetRegisters );
  203.  
  204.         if( level == 1 )
  205.             size          = (adwCpuIdRetRegisters[2] >> 24) * 1024,
  206.             associativity = (unsigned)(signed char)(adwCpuIdRetRegisters[2] >> 16),
  207.             clShift       = highest_set_bit<DWORD>( adwCpuIdRetRegisters[2] & 0x0FF ),
  208.             sectors       = (unsigned)((adwCpuIdRetRegisters[2] >> 8) & 0x0FF);
  209.         else if( level == 2 )
  210.             size          = (adwCpuIdRetRegisters[2] >> 16) * 1024,
  211.             associativity = (unsigned)(signed char)((adwCpuIdRetRegisters[2] >> 12) & 0x0F),
  212.             clShift       = highest_set_bit<DWORD>( adwCpuIdRetRegisters[2] & 0x0FF ),
  213.             sectors       = (unsigned)((adwCpuIdRetRegisters[2] >> 8) & 0x0F);
  214.         else if( level == 3 )
  215.             size          = ((adwCpuIdRetRegisters[3] >> 18) & 0x3FFF) * 512 * 1024,
  216.             associativity = (unsigned)(signed char)((adwCpuIdRetRegisters[3] >> 12) & 0x0F),
  217.             clShift       = highest_set_bit<DWORD>( adwCpuIdRetRegisters[3] & 0x0FF ),
  218.             sectors       = (unsigned)((adwCpuIdRetRegisters[2] >> 8) & 0x0F);
  219.         else
  220.             return false;
  221.  
  222.         if( pSize )
  223.             *pSize = size;
  224.  
  225.         if( pAssociativity )
  226.             *pAssociativity = associativity;
  227.  
  228.         if( pLineSizeShift )
  229.             *pLineSizeShift = clShift;
  230.  
  231.         if( pSectors )
  232.             *pSectors = sectors;
  233.  
  234.         return true;
  235.     }
  236.  
  237.     return false;
  238. }
  239.  
  240. void GetTlbInfo( TlbInfo *pti )
  241. {
  242.     DWORD adwCpuidRetRegisters[4];
  243.     DWORD dwMaxExtendedCpuId;
  244.  
  245.     pti->aLevels[TlbInfo::LEVEL1].aPageSizes[TlbInfo::PAGES_4KB].fKnown = false;
  246.     pti->aLevels[TlbInfo::LEVEL1].aPageSizes[TlbInfo::PAGES_4MB].fKnown = false;
  247.     pti->aLevels[TlbInfo::LEVEL2].aPageSizes[TlbInfo::PAGES_4KB].fKnown = false;
  248.     pti->aLevels[TlbInfo::LEVEL2].aPageSizes[TlbInfo::PAGES_4MB].fKnown = false;
  249.  
  250.     if( (CpuId( 0, 0, adwCpuidRetRegisters ),
  251.          adwCpuidRetRegisters[1] == 'uneG' &&
  252.          adwCpuidRetRegisters[3] == 'Ieni' &&
  253.          adwCpuidRetRegisters[2] == 'letn') )
  254.         return;
  255.  
  256.     if( (CpuId( 0, 0, adwCpuidRetRegisters ),
  257.          adwCpuidRetRegisters[1] == 'htuA' &&
  258.          adwCpuidRetRegisters[3] == 'itne' &&
  259.          adwCpuidRetRegisters[2] == 'DMAc') &&
  260.         (dwMaxExtendedCpuId = CpuId( 0x80000000u, 0, adwCpuidRetRegisters )) >= 0x80000005u )
  261.     {
  262.         int associativity;
  263.  
  264.         CpuId( 0x80000005u, 0, adwCpuidRetRegisters );
  265.         pti->aLevels[TlbInfo::LEVEL1].aPageSizes[TlbInfo::PAGES_4KB].fKnown        = true;
  266.         pti->aLevels[TlbInfo::LEVEL1].aPageSizes[TlbInfo::PAGES_4KB].entries       =         (unsigned)((adwCpuidRetRegisters[1] & 0x00FF0000u) >> 16);
  267.         pti->aLevels[TlbInfo::LEVEL1].aPageSizes[TlbInfo::PAGES_4KB].associativity = (int)(signed char)((adwCpuidRetRegisters[1] & 0xFF000000u) >> 24);
  268.         pti->aLevels[TlbInfo::LEVEL1].aPageSizes[TlbInfo::PAGES_4MB].fKnown        = true;
  269.         pti->aLevels[TlbInfo::LEVEL1].aPageSizes[TlbInfo::PAGES_4MB].entries       =         (unsigned)((adwCpuidRetRegisters[0] & 0x00FF0000u) >> 16);
  270.         pti->aLevels[TlbInfo::LEVEL1].aPageSizes[TlbInfo::PAGES_4MB].associativity = (int)(signed char)((adwCpuidRetRegisters[0] & 0xFF000000u) >> 24);
  271.  
  272.         if( dwMaxExtendedCpuId >= 0x80000006u )
  273.             CpuId( 0x80000006u, 0, adwCpuidRetRegisters ),
  274.             pti->aLevels[TlbInfo::LEVEL2].aPageSizes[TlbInfo::PAGES_4KB].fKnown        = true,
  275.             pti->aLevels[TlbInfo::LEVEL2].aPageSizes[TlbInfo::PAGES_4KB].entries       = (unsigned)((adwCpuidRetRegisters[1] & 0x0FFF0000u) >> 16),
  276.             associativity                                                              = (int)((adwCpuidRetRegisters[1] & 0xF0000000u) >> 28),
  277.             pti->aLevels[TlbInfo::LEVEL2].aPageSizes[TlbInfo::PAGES_4KB].associativity = (associativity < 0xF) ? associativity : -1,
  278.             pti->aLevels[TlbInfo::LEVEL2].aPageSizes[TlbInfo::PAGES_4MB].fKnown        = true,
  279.             pti->aLevels[TlbInfo::LEVEL2].aPageSizes[TlbInfo::PAGES_4MB].entries       = (unsigned)((adwCpuidRetRegisters[0] & 0x0FFF0000u) >> 16),
  280.             associativity                                                              = (int)((adwCpuidRetRegisters[0] & 0xF0000000u) >> 28),
  281.             pti->aLevels[TlbInfo::LEVEL2].aPageSizes[TlbInfo::PAGES_4MB].associativity = (associativity < 0xF) ? associativity : -1;
  282.  
  283.     }
  284. }
  285.  
  286. DWORD CpuId( DWORD dwCode, DWORD dwEcx2ndParameter, DWORD *pdwRegisters )
  287. {
  288.     int aRegs[4];
  289.  
  290.     __cpuidex( aRegs, (int)dwCode, (int)dwEcx2ndParameter );
  291.     for( int i = 0; i < 4; pdwRegisters[i] = (DWORD)aRegs[i], i++ );
  292.  
  293.     return (DWORD)aRegs[0];
  294. }
  295.  
  296. template<typename UNSIGNED_INTEGER>
  297. inline
  298. int highest_set_bit( UNSIGNED_INTEGER ui )
  299. {
  300.     assert((UNSIGNED_INTEGER)-1 >= 0);
  301.  
  302.     unsigned bits = 0;
  303.  
  304.     if( !ui )
  305.         return -1;
  306.  
  307.     if( (ui & 0x0FFFFFFFF00000000u) )
  308.         ui   >>= 32,
  309.         bits   = 32;
  310.  
  311.     if( (ui & 0x0FFFF0000u) )
  312.         ui   >>= 16,
  313.         bits  += 16;
  314.  
  315.     if( (ui & 0x0FF00u) )
  316.         ui   >>= 8,
  317.         bits  += 8;
  318.  
  319.     if( (ui & 0x0F0u) )
  320.         ui   >>= 4,
  321.         bits  += 4;
  322.  
  323.     if( (ui & 0x0Cu) )
  324.         ui   >>= 2,
  325.         bits  += 2;
  326.  
  327.     if( (ui & 0x02u) )
  328.         bits += 1;
  329.  
  330.     return (int)bits;
  331. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement