Advertisement
Guest User

Replacement String Library Strcmp Using SSE4.2

a guest
May 7th, 2013
312
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. // http://www.intel.com/content/dam/doc/manual/64-ia-32-architectures-optimization-manual.pdf
  2. // return 0 if strings are equal, 1 if greater, -1if less
  3. int strcmp_sse4_2(const char *src1, const char *src2) {
  4.     int val;
  5.     __asm{
  6.         mov esi, src1 ;
  7.         mov edi, src2
  8.         mov edx, -16 ; common index relative to base of either string pointer xor eax, eax
  9.     topofloop:
  10.         add edx, 16 ; prevent loop carry dependency
  11.     next:
  12.         lea ecx, [esi+edx] ; address of fragment that we want to load
  13.         and ecx, 0x0fff ; check least significant12 bits of addr for page boundary cmp ecx, 0x0ff0
  14.         jg too_close_pgb ; branch to byte-granular if within 16 bytes of boundary lea ecx, [edi+edx] ; do the same check for each fragment of 2nd string
  15.         and ecx, 0x0fff
  16.         cmp ecx, 0x0ff0
  17.         jg too_close_pgb
  18.         movdqu xmm2, BYTE PTR[esi+edx]
  19.         movdqu xmm1, BYTE PTR[edi+edx]
  20.         pcmpistri xmm2, xmm1, 0x18 ; equal each
  21.         ja topofloop
  22.         jnc ret_tag
  23.         add edx, ecx ; ecx points to the byte offset that differ
  24.     not_equal:
  25.         movzx eax, BYTE PTR[esi+edx] movzx edx, BYTE PTR[edi+edx] cmp eax, edx
  26.         cmova eax, ONE
  27.         cmovb eax, NEG_ONE
  28.         jmp ret_tag
  29.     too_close_pgb:
  30.         add edx, 1 ; do byte granular compare movzx ecx, BYTE PTR[esi+edx-1]
  31.         movzx ebx, BYTE PTR[edi+edx-1]
  32.         cmp ecx, ebx
  33.         jne inequality
  34.         add ebx, ecx
  35.         jnz next
  36.         jmp ret_tag
  37.     inequality:
  38.         cmovb eax, NEG_ONE
  39.         cmova eax, ONE ret_tag:
  40.         mov [val], eax
  41.     }
  42.     return(val);
  43. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement