Guest
Public paste!

MemRead

By: a guest | Jun 20th, 2010 | Syntax: C++ | Size: 3.57 KB | Hits: 581 | Expires: Never
Copy text to clipboard
  1. // MemRead.cpp
  2. //
  3. //      This program is meant to test memory read performance under various
  4. //  kinds of situations.
  5. //
  6.  
  7. #include <stdlib.h>
  8. #include <stdio.h>
  9. #include <conio.h>
  10.  
  11. #include <windows.h>
  12. #include <mmsystem.h>
  13.  
  14. // the size of a single fetch over the bus.
  15. #define MEM_FETCH_SIZE          128
  16.  
  17. // the maximum size of any processor cache.  i7 has an 8MB cache.
  18. #define MAX_CACHE_SIZE          (8*1024*1024)
  19.  
  20. // the maximum test buffer size.  For this test, I use 512MB.
  21. #define TEST_BUFFER_SIZE        (1024*1024*1024)
  22. #define HALF_BUFFER_SIZE        (512*1024*1024)
  23.  
  24. // forward declaration.
  25. void TestLinear( void* buffer );
  26. void TestSparse( void* buffer );
  27. void FlushCache( void* buffer );
  28.  
  29. // main app.
  30. void main()
  31. {
  32.     // begin timing.
  33.     timeBeginPeriod( 1 );
  34.  
  35.     // allocate a several megabyte buffer.  Note that we intentially allocate
  36.     // a little extra to ensure that the cache is not poluted with data.
  37.     char* buffer = ( char* )malloc( TEST_BUFFER_SIZE + MAX_CACHE_SIZE );
  38.  
  39.     // begin timing tests.
  40.     unsigned int start = 0;
  41.     unsigned int linearTotal = 0;
  42.     unsigned int sparseTotal = 0;
  43.  
  44.     // flush the cache for a test.
  45.     FlushCache( buffer );
  46.  
  47.     // time the current operation.
  48.     start = timeGetTime();
  49.  
  50.     // perform a test of non-temporal reads.
  51.     TestLinear( buffer );
  52.  
  53.     // calculate the total time needed.
  54.     linearTotal = timeGetTime() - start;
  55.  
  56.     // flush the cache for the next test.
  57.     FlushCache( buffer );
  58.  
  59.     // time the current operation.
  60.     start = timeGetTime();
  61.  
  62.     // perform a test of non-temporal reads.
  63.     TestSparse( buffer );
  64.  
  65.     // calculate the total time needed.
  66.     sparseTotal = timeGetTime() - start;
  67.  
  68.     // print out our statistics.
  69.     printf( "Total linear access time: %d\n", linearTotal );
  70.     printf( "Total sparse access time: %d\n", sparseTotal );
  71.  
  72.     // spin for a bit.
  73.     _getch();
  74.  
  75.     // end the time period.
  76.     timeEndPeriod( 1 );
  77. }
  78.  
  79. void TestLinear( void* buffer )
  80. {
  81.     for ( unsigned int i = 0; i < 512; ++i )
  82.     {
  83.         __asm
  84.         {
  85.             mov         esi, buffer
  86.             mov         ecx, HALF_BUFFER_SIZE
  87.  
  88.         loop_start:
  89.             ; pull in data with a non-temporal read.
  90.             movntdqa    xmm0, [ esi +               0 ]
  91.             movntdqa    xmm1, [ esi +  MEM_FETCH_SIZE ]
  92.  
  93.             ; loop.
  94.             add         esi, MEM_FETCH_SIZE + MEM_FETCH_SIZE
  95.             sub         ecx, MEM_FETCH_SIZE + MEM_FETCH_SIZE
  96.             jnz loop_start
  97.         }
  98.     }
  99. }
  100.  
  101. void TestSparse( void* buffer )
  102. {
  103.     for ( unsigned int i = 0; i < 512; ++i )
  104.     {
  105.         __asm
  106.         {
  107.             mov         esi, buffer
  108.             mov         ecx, HALF_BUFFER_SIZE
  109.  
  110.         loop_start:
  111.             ; pull in data with a non-temporal read.
  112.             movntdqa    xmm0, [ esi +                 0 ]
  113.             movntdqa    xmm1, [ esi +  HALF_BUFFER_SIZE ]
  114.  
  115.             ; loop.
  116.             add         esi, MEM_FETCH_SIZE + MEM_FETCH_SIZE
  117.             sub         ecx, MEM_FETCH_SIZE + MEM_FETCH_SIZE
  118.             jnz loop_start
  119.         }
  120.     }
  121. }
  122.  
  123. void FlushCache( void* buffer )
  124. {
  125.     // initialize the back portion of the buffer in order to ensure that no
  126.     // data cached.  Note that I intentially use a read and write operation
  127.     // to ensure that the CPU cannot optimize the operation with non-temporal
  128.     // stores.
  129.     char* scratch = ( char* )buffer + TEST_BUFFER_SIZE;
  130.     for ( unsigned int i = 0; i < MAX_CACHE_SIZE; i += 4, scratch += 4 )
  131.         ( ( int* )scratch )[ 0 ] ^= i;
  132. }