Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <stdlib.h>
- #include <assert.h>
- #include <fcntl.h>
- #include <unistd.h>
- #include <string.h>
- #define CACHE_SIZE (64*64*8)
- unsigned char *cache_L1;
- unsigned long *T = NULL;
- void measures(){
- asm volatile(
- "mov $1000000,%%r11\n\t"// number of loop iteration
- "mov %%rax,%%r12\n\t"
- "mov %%rbx,%%r10\n\t"
- "loop:\n\t"
- // Serialize before beginning
- "xor %%rax,%%rax\n\tcpuid\n\t"
- // Make sure that [r12+4096] is not is cache
- "mov %%r12,%%rax\n\t"
- "add $4096, %%rax\n\t" // For experiment 1, this line is changed to 'add $0,%%rax'
- "clflush (%%rax)\n\t"
- // Make sure that [r12 + 32768] is not in cache
- "mov %%r12,%%rax\n\t"
- "add $32768, %%rax\n\t"
- "clflush (%%rax)\n\t"
- // Serialize to make sure that all previous operation are finished before continuing.
- "xor %%rax,%%rax\n\tcpuid\n\t" // 1 load
- // Put [r12 + 4096] in cache
- "mov %%r12,%%rax\n\t"
- "add $4096,%%rax\n\t" // For experiment 1, this line is changed to 'add $0,%%rax'
- "mov (%%rax),%%r8\n\t"
- // Serialize to make sure that all previous operation are finished before continuing.
- "xor %%rax,%%rax\n\tcpuid\n\t"
- // ------ Now I will try to access [r12 + 32768]. Since it was fflushed, I expect it is not in cache and I expect to measure a cache miss when reading [r12 + 32768].
- // Read PMC configured with MEM_LOAD_UOPS_RETIRED.L1_MISS
- "xor %%rcx,%%rcx\n\trdpmc\n\tshl $32,%%rdx\n\tor %%rdx,%%rax\n\tmov %%rax,%%r9\n\t"
- // Serialize to make sure that all previous operation are finished before continuing.
- "xor %%rax,%%rax\n\tcpuid\n\t" // 1 load
- // Access [r12 + 32768 ]
- "mov %%r12, %%rax\n\t"
- "add $32768,%%rax\n\t"
- "mov (%%rax),%%r8\n\t"
- // Serialize to make sure that all previous operation are finished before continuing.
- "xor %%rax,%%rax\n\tcpuid\n\t" // 1 load
- // Read PMC configured with MEM_LOAD_UOPS_RETIRED.L1_MISS. Substract with the first read. Store the result in r15
- "xor %%rcx,%%rcx\n\trdpmc\n\tshl $32,%%rdx\n\tor %%rdx,%%rax\n\tmov %%rax,%%r15\n\t"
- // Serialize to make sure that all previous operation are finished before continuing.
- "xor %%rax,%%rax\n\tcpuid\n\t" // 1 load
- // STORE
- "mov %%r10,%%rbx\n\t"
- "sub %%r9,%%r15\n\t"
- "movq %%r15, (%%rbx)\n\t"
- "clflush (%%r10)\n\t"
- "add $8,%%r10\n\t"
- // Serialize to make sure that all previous operation are finished before continuing.
- "xor %%rax,%%rax\n\tcpuid\n\t" // 1 load
- "dec %%r11\n\t"
- "jnz loop\n\t"
- "endloop:"
- :
- :"a"(cache_L1),"b"(T)
- :"rcx","rdx","r8","r9","r10","r11","r15");
- }
- int main()
- {
- // Configure PMC by writting in /dev/CONFIG_MODULE0 (This file is created by a kernel module I made)
- int pmcfd;
- char *pmcname;
- pmcname = "MEM_LOAD_UOPS_RETIRED.L1_MISS";
- pmcfd = open("/dev/CONFIG_MODULE0", O_RDWR);
- assert(pmcfd != -1);
- write(pmcfd, "CHANGEPMC=0",strlen("CHANGEPMC=0"));
- write(pmcfd, pmcname,strlen(pmcname));
- close(pmcfd);
- // Allocate a memory buffer and align it to L1 set 0
- cache_L1 = calloc(6*CACHE_SIZE,1);
- unsigned long currentSet = ((unsigned long)cache_L1 >> 6) & 63;
- while(currentSet != 0){
- cache_L1 += 64; // cache line size to jump to the next set
- currentSet = ((long)cache_L1 >> 6) & 63;
- }
- // Allocate an array to store results
- T = calloc(1000000,sizeof(unsigned long));
- // Perform the measures
- measures();
- // Print the measures
- for(int nbrmes=0;nbrmes<1000000;++nbrmes)
- printf("%ld\n",T[nbrmes]);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement