Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ====================================================
- Results (seconds, lower is better)
- ====================================================
- Total Derived Base
- 1) 0.38 0.09 0.29
- 2) 0.38 0.09 0.29
- 3) 0.41 0.09 0.32
- ====================================================
- Disassembly
- ====================================================
- Version1
- base->DoStuff();
- 002B3EF4 mov eax,dword ptr [esp+60h]
- 002B3EF8 mov edx,dword ptr [eax]
- 002B3EFA lea ecx,[esp+60h]
- 002B3EFE mov dword ptr [esp+64h],edi
- 002B3F02 call edx
- struct B : public A { void DoStuff() { b = a; } int b; };
- 002B32A0 mov eax,dword ptr [ecx+4]
- 002B32A3 mov dword ptr [ecx+8],eax
- 002B32A6 ret
- Version2
- A_DoStuff( base );
- 002B3FB0 mov ecx,dword ptr [esp+60h]
- 002B3FB4 lea eax,[esp+60h]
- 002B3FB8 mov dword ptr [esp+64h],edi
- 002B3FBC mov edx,dword ptr [ecx]
- 002B3FBE push eax
- 002B3FBF call edx
- void B_DoStuff( B* obj ) { obj->b = obj->base.a; }
- 002B32C0 mov eax,dword ptr [esp+4]
- 002B32C4 mov ecx,dword ptr [eax+4]
- 002B32C7 mov dword ptr [eax+8],ecx
- 002B32CA ret
- Version3
- base->DoStuff();
- 002B4070 lea ecx,[esp+74h]
- 002B4074 mov dword ptr [esp+78h],edi
- 002B4078 call dword ptr [esp+74h]
- void DoStuff() { b = a; }
- 002B3300 mov eax,dword ptr [ecx+4]
- 002B3303 mov dword ptr [ecx+8],eax
- 002B3306 ret
- ====================================================
- C++ code
- ====================================================
- #define FORCEINLINE __forceinline
- #define FASTCALL __fastcall
- namespace Version1
- {
- struct A { virtual void DoStuff() {}; int a; };
- struct B : public A { void DoStuff() { b = a; } int b; };
- }
- namespace Version2 // translate Version1 into C
- {
- struct A;
- struct A_VTable { typedef void (*FnDoStuff)(A*); FnDoStuff DoStuff; };
- struct A { A_VTable* vtable; int a; };
- FORCEINLINE void FASTCALL A_DoStuff( A* obj ) { (*obj->vtable->DoStuff)(obj); }
- struct B { A base; int b; };
- void B_DoStuff( B* obj ) { obj->b = obj->base.a; }
- A_VTable g_B_VTable = { (A_VTable::FnDoStuff)&B_DoStuff };
- FORCEINLINE void FASTCALL B_Construct( B* obj ) { obj->base.vtable = &g_B_VTable; }
- B obj;
- }
- namespace Version3 // optimize version2 by embedding the vtable in the object
- {
- struct A {
- typedef void (A::*FnDoStuff)();
- FORCEINLINE A(FnDoStuff p) : pfnDoStuff(p) {}
- FnDoStuff pfnDoStuff;
- int a;
- FORCEINLINE void FASTCALL DoStuff() { (this->*pfnDoStuff)(); }
- };
- struct B : public A {
- FORCEINLINE B() : A(static_cast<FnDoStuff>(&B::DoStuff)) {}
- void DoStuff() { b = a; }
- int b;
- };
- }
- static const int cacheSize = eiMiB(10);
- int FlushCache(void* cache)
- {
- memset(cache, 0, cacheSize);
- int result = 0;
- int* data = (int*)cache;
- for( int i=0; i<(cacheSize/sizeof(int)); ++i )
- result += data[i];
- return result;
- }
- #include <stdio.h>
- void TestAB(Timer& timer)
- {
- void* cache = malloc(cacheSize);
- double v1Derived = 0, v2Derived = 0, v3Derived = 0;
- double v1Base = 0, v2Base = 0, v3Base = 0;
- double v1Total = 0, v2Total = 0, v3Total = 0;
- int dontOptimize1 = 0, dontOptimize2 = 0, dontOptimize3 = 0;
- int stress = 10000;
- dontOptimize1 += FlushCache(cache);
- double v1Start = timer.Elapsed();
- for( int c=0; c!=stress; ++c )
- {
- Version1::B obj;
- Version1::A* base = &obj;
- double start = timer.Elapsed();
- for( int i=0; i!=stress; ++i )
- {
- obj.a = i;
- obj.DoStuff();
- }
- v1Derived += timer.Elapsed() - start;
- start = timer.Elapsed();
- for( int i=0; i!=stress; ++i )
- {
- base->a = i;
- base->DoStuff();
- }
- v1Base += timer.Elapsed() - start;
- dontOptimize1 += obj.b;
- }
- v1Total = timer.Elapsed() - v1Start;
- dontOptimize2 += FlushCache(cache);
- double v2Start = timer.Elapsed();
- for( int c=0; c!=stress; ++c )
- {
- Version2::B obj;
- B_Construct( &obj );
- Version2::A* base = &obj.base;
- double start = timer.Elapsed();
- for( int i=0; i!=stress; ++i )
- {
- obj.base.a = i;
- B_DoStuff( &obj );
- }
- v2Derived += timer.Elapsed() - start;
- start = timer.Elapsed();
- for( int i=0; i!=stress; ++i )
- {
- base->a = i;
- A_DoStuff( base );
- }
- v2Base += timer.Elapsed() - start;
- dontOptimize2 += obj.b;
- }
- v2Total = timer.Elapsed() - v2Start;
- dontOptimize3 += FlushCache(cache);
- double v3Start = timer.Elapsed();
- for( int c=0; c!=stress; ++c )
- {
- Version3::B obj;
- Version3::A* base = &obj;
- double start = timer.Elapsed();
- for( int i=0; i!=stress; ++i )
- {
- obj.a = i;
- obj.DoStuff();
- }
- v3Derived += timer.Elapsed() - start;
- start = timer.Elapsed();
- for( int i=0; i!=stress; ++i )
- {
- base->a = i;
- base->DoStuff();
- }
- v3Base += timer.Elapsed() - start;
- dontOptimize3 += obj.b;
- }
- v3Total = timer.Elapsed() - v3Start;
- eiASSERT( dontOptimize1 == dontOptimize2 );
- eiASSERT( dontOptimize1 == dontOptimize3 );
- free(cache);
- printf( " Total Derived Base\n", dontOptimize1, dontOptimize2, dontOptimize3 );
- printf( "1) %.2f %.2f %.2f\n", v1Total, v1Derived, v1Base );
- printf( "2) %.2f %.2f %.2f\n", v2Total, v2Derived, v2Base );
- printf( "3) %.2f %.2f %.2f\n", v3Total, v3Derived, v3Base );
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement