Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // This is the third algorithm, a hand-coded assembly algorithm:
- __declspec(naked)
- int rick_stricmp3(void)
- {
- _asm {
- mov edx,leftptr // parameter 1, left data
- mov edi,rightptr // parameter 2, right data
- xor ecx,ecx // iterator
- top_loop:
- // Try a case-insensitive compare
- movzx eax,byte ptr [edx+ecx] // Load left
- movzx ebx,byte ptr [edi+ecx] // Load right
- cmp eax,0 // Is left char null?
- jz done1 // Yes if branch
- mov al,byte ptr lower[eax] // Make lower-case
- cmp ebx,0 // Is right char null?
- jz done2 // Yes if branch
- mov bl,byte ptr lower[ebx] // Make lower-case
- cmp al,bl // Relationship?
- jb left_less_than_right // If branch, left <
- ja left_greater_than_right // If branch, left >
- // If we get here, we know they are both still equal
- inc ecx // ++iterator
- jmp top_loop // Loop again
- left_greater_than_right:
- mov eax,1 // Return left >
- ret
- left_less_than_right:
- mov eax,-1 // Return left <
- ret
- done1:
- // When we get here, we know left is null
- xor ebx,ebx
- add bl,byte ptr [edi+ecx]
- // If we branch, they are both null, which means they are equal
- jz equal
- // If we get here, we know left is null, and right is not null
- mov eax,-1 // Return left <
- ret
- done2:
- // Still data in _left, but _right is exhausted
- mov eax,1 // Return left >
- ret
- equal:
- xor eax,eax // Return equal
- ret
- }
- }
- // Required global variables (to remove stack-based parameters):
- char *leftptr;
- char *rightptr;
- char lower[256];
- // Populate lower-case translation lookup table
- for (int i = 0; i <= 255; i++)
- lower[i] = tolower(i);
- // Call for each comparison
- for (int count = 0; count < 50000000; count++)
- {
- for (int i = 0; i < 5; i++)
- {
- leftptr = left[i];
- rightptr = right[i];
- rick_stricmp3();
- }
- }
- And if your compiler uses variables for those loops, then you will
- need to write the loop in assembly so you are not required to
- push and pop the registers:
- #define rick_stricmp3_asm(a, b) \
- { \
- leftptr = a; \
- rightptr = b; \
- rick_stricmp3()); \
- }
- _asm
- {
- mov ecx,50000000
- top_loop:
- push ecx
- }
- rick_stricmp4_asm(left[0], right[0]);
- rick_stricmp4_asm(left[1], right[1]);
- rick_stricmp4_asm(left[2], right[2]);
- rick_stricmp4_asm(left[3], right[3]);
- rick_stricmp4_asm(left[4], right[4]);
- _asm
- {
- pop ecx
- loop alternate_loop // Loop is out of range for near jmp
- jmp done
- alternate_loop:
- jmp top_loop // Hard jump on opposite condition
- done:
- }
- UPDATE: 6:36PM Jun.24.2017 -- Some additional minor optimizations which
- improved the score (from 105 to 102).
- __declspec(naked)
- int rick_stricmp3(void)
- {
- _asm {
- mov edx,leftptr // parameter 1, left data
- mov edi,rightptr // parameter 2, right data
- xor ecx,ecx // iterator
- top_loop:
- // Try a case-insensitive compare
- movzx eax,byte ptr [edx+ecx] // Load left
- movzx ebx,byte ptr [edi+ecx] // Load right
- cmp eax,0 // Is left char null?
- jz done1 // Yes if branch
- mov al,byte ptr lower[eax] // Make lower-case
- cmp ebx,0 // Is right char null?
- jz left_greater_than_right // Yes if branch
- mov bl,byte ptr lower[ebx] // Make lower-case
- cmp al,bl // Relationship?
- jb left_less_than_right // If branch, left <
- ja left_greater_than_right // If branch, left >
- // If we get here, we know they are both still equal
- inc ecx // ++iterator
- jmp top_loop // Loop again
- left_greater_than_right:
- mov eax,1 // Return left >
- ret
- left_less_than_right:
- mov eax,-1 // Return left <
- ret
- done1:
- // When we get here, we know left is null
- add al,byte ptr [edi+ecx]
- // If we branch, they are both null, which means they are equal
- jz equal
- // If we get here, we know left is null, and right is not null
- mov eax,-1 // Return left <
- ret
- equal:
- xor eax,eax // Return equal
- ret
- }
- }
- UPDATE: 8:54AM Jun.28.2017 -- Some additional minor optimizations which
- improved the score (from 102 to 85).
- NOTE: These adjustments were taken from an example by Rod Pemberton, which
- introduced the added test to test if they match before conversion to
- lower-case, plus the 32-bit code generated by DJGPP for DOS, which was
- actually a very efficient implementation. His raw code was able to
- get an 86 without any changes. The only reason mine is faster is
- because I do not use the stack for parameters, and I use ecx for the
- incrementer, rather than updating both esi and edi in his example,
- which would be edx and edi in this example.
- __declspec(naked)
- int rick_stricmp3(void)
- {
- _asm {
- mov edx,leftptr // parameter 1, left data
- mov edi,rightptr // parameter 2, right data
- xor ecx,ecx // iterator
- xor esi,esi // placeholder for 0
- top_loop:
- // Try a case-insensitive compare
- movzx eax,byte ptr [edx+ecx] // Load left
- movzx ebx,byte ptr [edi+ecx] // Load right
- inc ecx // ++iterator
- cmp eax,esi // Is left char null?
- jz done // Yes if branch
- cmp ebx,esi // Is right char null?
- jz done // Yes if branch
- // Do they match exactly?
- cmp al,bl
- jz top_loop
- // Try lower-case
- mov al,byte ptr lower[eax] // Make lower-case
- mov bl,byte ptr lower[ebx] // Make lower-case
- cmp al,bl // Relationship?
- jz top_loop
- done:
- sub eax,ebx
- ret
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement