Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- void* memcpyrev(void* dest, void* src, int n) {
- // All instructions can be googled by searching "x86 <instruction name>"
- asm ( // Instructions are meant for x86 cpus, not x86_64.
- "movl %0, %%edi\n\t"
- "movl %1, %%esi\n\t"
- "movl %2, %%ecx\n\t"
- "decl %%ecx\n\t"
- "addl %%ecx, %%edi\n\t"
- "addl %%ecx, %%esi\n\t"
- "incl %%ecx\n\t"
- "std\n\t"
- "rep movsb\n\t"
- "cld\n\t"
- "incl %%edi\n\t"
- "movl %%edi, %%eax" // return
- : // No output
- : "g" (dest), "g" (src), "g" (n) // Inputs
- : "%eax", "%ecx", "%esi", "%edi" // Clobbered registers
- );
- /*
- // Move variables from stack to registers for fast execution, since using registers for instruction is faster than fetching the value from memory again and again.
- // edi = destination register, esi = source register, ecx = count register
- register void* dest_reg = dest, src_reg = src, count = n;
- // Decrease count for pointer correction
- count--;
- // Move the source and destination pointer to the end of the copying memory block
- dest_reg += count;
- src_reg += count;
- // Increase count to fix the counting
- count++;
- // Set the direction flag so that the string instruction moves the pointer backward
- // asm ("std\n");
- // This is where the optimization goes
- for (;count != 0; count--) { // rep instruction prefix substitution
- *dest_reg = *src_reg; // movsb string instruction substitution
- dest_reg--;
- src_reg--;
- }
- // rep means do the string instruction (movsb) repeatedly until ecx (count) is 0. Each execution decreases ecx (count) by 1
- // movsb means move (copy) the value from the source memory to destination memory, byte by byte.
- // But on each execution, increase/decrease the source and destination pointer by 1, according to the direction flag.
- // If direction flag is set (1), the pointers decreases.
- // If direction flag is cleared (0), the pointers increases.
- // Note that the rep instruction is not affected by the direction flag.
- // The code above will do a lot of work while the assembly version will work quickly, and when compiled, the assembly version will be smaller.
- // Clear the direction back so everything else will work as usual again.
- // asm ("cld\n");
- // Fix the destination register
- dest_reg++;
- // Return the destination pointer
- return dest_reg;
- // This is converted as:
- // eax = dest_reg;
- // where eax is an accumulator register, usually used to store function return value.
- */
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement