Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <stdint.h>
- #define ITERATIONS 25000000LL
- union ticks
- {
- uint64_t t64;
- struct s32
- {
- uint32_t th,tl;
- } t32;
- };
- uint64_t commands_per_tick = 16;
- int main() {
- union ticks start, end;
- __asm__ __volatile__(
- "xor %rax, %rax\n\t"
- "movb $2, %al\n\t"
- "cvtsi2sd %rax, %xmm0\n\t"
- "xor %rax, %rax\n\t"
- "movb $2, %al\n\t"
- "cvtsi2sd %rax, %xmm1\n\t"
- "xor %rax, %rax\n\t"
- "movb $2, %al\n\t"
- "cvtsi2sd %rax, %xmm2\n\t"
- "xor %rax, %rax\n\t"
- "movb $2, %al\n\t"
- "cvtsi2sd %rax, %xmm3\n\t"
- "xor %rax, %rax\n\t"
- "movb $2, %al\n\t"
- "cvtsi2sd %rax, %xmm4\n\t"
- "xor %rax, %rax\n\t"
- "movb $2, %al\n\t"
- "cvtsi2sd %rax, %xmm5\n\t"
- "xor %rax, %rax\n\t"
- "movb $2, %al\n\t"
- "cvtsi2sd %rax, %xmm6\n\t"
- "xor %rax, %rax\n\t"
- "movb $2, %al\n\t"
- "cvtsi2sd %rax, %xmm7\n\t"
- "xor %rax, %rax\n\t"
- "movb $2, %al\n\t"
- "cvtsi2sd %rax, %xmm8\n\t"
- "xor %rax, %rax\n\t"
- "movb $2, %al\n\t"
- "cvtsi2sd %rax, %xmm9\n\t"
- "xor %rax, %rax\n\t"
- "movb $2, %al\n\t"
- "cvtsi2sd %rax, %xmm10\n\t"
- "xor %rax, %rax\n\t"
- "movb $2, %al\n\t"
- "cvtsi2sd %rax, %xmm11\n\t"
- "xor %rax, %rax\n\t"
- "movb $2, %al\n\t"
- "cvtsi2sd %rax, %xmm12\n\t"
- "xor %rax, %rax\n\t"
- "movb $2, %al\n\t"
- "cvtsi2sd %rax, %xmm13\n\t"
- "xor %rax, %rax\n\t"
- "movb $2, %al\n\t"
- "cvtsi2sd %rax, %xmm14\n\t"
- "xor %rax, %rax\n\t"
- "movb $2, %al\n\t"
- "cvtsi2sd %rax, %xmm15\n\t"
- );
- for (uint64_t i = commands_per_tick * ITERATIONS; i > 0; --i) {
- __asm__ __volatile__(
- "sqrtsd %xmm0, %xmm0\n\t"
- );
- }
- asm("rdtsc\n\t":"=a"(end.t32.th),"=d"(end.t32.tl));
- uint64_t result = end.t64-start.t64;
- printf("Tacts taken: %lu for %llu commands\n", result, commands_per_tick * ITERATIONS);
- printf("%lf for each\n", (result + 0.0) / (commands_per_tick * ITERATIONS));
- for (uint64_t i = ITERATIONS; i > 0; --i) {
- __asm__ __volatile__(
- "sqrtsd %xmm0, %xmm0\n\t"
- "sqrtsd %xmm1, %xmm1\n\t"
- "sqrtsd %xmm2, %xmm2\n\t"
- "sqrtsd %xmm3, %xmm3\n\t"
- "sqrtsd %xmm4, %xmm4\n\t"
- "sqrtsd %xmm5, %xmm5\n\t"
- "sqrtsd %xmm6, %xmm6\n\t"
- "sqrtsd %xmm7, %xmm7\n\t"
- "sqrtsd %xmm8, %xmm8\n\t"
- "sqrtsd %xmm9, %xmm9\n\t"
- "sqrtsd %xmm10, %xmm10\n\t"
- "sqrtsd %xmm11, %xmm11\n\t"
- "sqrtsd %xmm12, %xmm12\n\t"
- "sqrtsd %xmm13, %xmm13\n\t"
- "sqrtsd %xmm14, %xmm14\n\t"
- "sqrtsd %xmm15, %xmm15\n\t"
- );
- }
- asm("rdtsc\n\t":"=a"(start.t32.th),"=d"(start.t32.tl));
- for (uint64_t i = ITERATIONS; i > 0; --i) {
- __asm__ __volatile__(
- "sqrtsd %xmm0, %xmm0\n\t"
- "sqrtsd %xmm1, %xmm1\n\t"
- "sqrtsd %xmm2, %xmm2\n\t"
- "sqrtsd %xmm3, %xmm3\n\t"
- "sqrtsd %xmm4, %xmm4\n\t"
- "sqrtsd %xmm5, %xmm5\n\t"
- "sqrtsd %xmm6, %xmm6\n\t"
- "sqrtsd %xmm7, %xmm7\n\t"
- "sqrtsd %xmm8, %xmm8\n\t"
- "sqrtsd %xmm9, %xmm9\n\t"
- "sqrtsd %xmm10, %xmm10\n\t"
- "sqrtsd %xmm11, %xmm11\n\t"
- "sqrtsd %xmm12, %xmm12\n\t"
- "sqrtsd %xmm13, %xmm13\n\t"
- "sqrtsd %xmm14, %xmm14\n\t"
- "sqrtsd %xmm15, %xmm15\n\t"
- );
- }
- asm("rdtsc\n\t":"=a"(end.t32.th),"=d"(end.t32.tl));
- result = end.t64-start.t64;
- printf("Tacts taken: %lu for %llu commands\n", result, (16*ITERATIONS));
- printf("%lf for each\n", (result + 0.0) / (16*ITERATIONS));
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement