Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /* Includes */
- #include "libperfcount.h"
- #include <ctype.h>
- #include <stdint.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- /* Function prototypes */
- void code1(void);
- void code2(void);
- void code3(void);
- void code4(void);
- void code5(void);
- /* Global variables */
- void ((*FN_TABLE[])(void)) = {
- code1,
- code2,
- code3,
- code4,
- code5
- };
- /**
- * Code snippets to bench
- */
- void code1(void){
- asm volatile(
- ".intel_syntax noprefix\n\t"
- "vzeroall\n\t"
- "mov rcx, 1000000000\n\t"
- "LstartLabel1:\n\t"
- "vfmadd231ps %%ymm0, %%ymm0, %%ymm0\n\t"
- "vfmadd231ps ymm1, ymm1, ymm1\n\t"
- "vfmadd231ps ymm2, ymm2, ymm2\n\t"
- "vfmadd231ps ymm3, ymm3, ymm3\n\t"
- "vfmadd231ps ymm4, ymm4, ymm4\n\t"
- "vfmadd231ps ymm5, ymm5, ymm5\n\t"
- "vfmadd231ps ymm6, ymm6, ymm6\n\t"
- "vfmadd231ps ymm7, ymm7, ymm7\n\t"
- "vfmadd231ps ymm8, ymm8, ymm8\n\t"
- "vfmadd231ps ymm9, ymm9, ymm9\n\t"
- "vpaddd ymm10, ymm10, ymm10\n\t"
- "vpaddd ymm11, ymm11, ymm11\n\t"
- "vpaddd ymm12, ymm12, ymm12\n\t"
- "vpaddd ymm13, ymm13, ymm13\n\t"
- "vpaddd ymm14, ymm14, ymm14\n\t"
- "dec rcx\n\t"
- "jnz LstartLabel1\n\t"
- ".att_syntax noprefix\n\t"
- : /* No outputs we care about */
- : /* No inputs we care about */
- : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
- "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15",
- "rcx",
- "memory"
- );
- }
- void code2(void){
- }
- void code3(void){
- }
- void code4(void){
- }
- void code5(void){
- }
- /* Test Schedule */
- const char* const SCHEDULE[] = {
- /* Batch */
- "uops_issued.any",
- "uops_issued.any<1",
- "uops_issued.any>=1",
- "uops_issued.any>=2",
- /* Batch */
- "uops_issued.any>=3",
- "uops_issued.any>=4",
- "uops_issued.any>=5",
- "uops_issued.any>=6",
- /* Batch */
- "uops_executed_port.port_0",
- "uops_executed_port.port_1",
- "uops_executed_port.port_2",
- "uops_executed_port.port_3",
- /* Batch */
- "uops_executed_port.port_4",
- "uops_executed_port.port_5",
- "uops_executed_port.port_6",
- "uops_executed_port.port_7",
- /* Batch */
- "resource_stalls.any",
- "resource_stalls.rs",
- "resource_stalls.sb",
- "resource_stalls.rob",
- /* Batch */
- "uops_retired.all",
- "uops_retired.all<1",
- "uops_retired.all>=1",
- "uops_retired.all>=2",
- /* Batch */
- "uops_retired.all>=3",
- "uops_retired.all>=4",
- "uops_retired.all>=5",
- "uops_retired.all>=6",
- /* Batch */
- "inst_retired.any_p",
- "inst_retired.any_p<1",
- "inst_retired.any_p>=1",
- "inst_retired.any_p>=2",
- /* Batch */
- "inst_retired.any_p>=3",
- "inst_retired.any_p>=4",
- "inst_retired.any_p>=5",
- "inst_retired.any_p>=6",
- /* Batch */
- "idq_uops_not_delivered.core",
- "idq_uops_not_delivered.core<1",
- "idq_uops_not_delivered.core>=1",
- "idq_uops_not_delivered.core>=2",
- /* Batch */
- "idq_uops_not_delivered.core>=3",
- "idq_uops_not_delivered.core>=4",
- "rs_events.empty",
- "idq.empty",
- /* Batch */
- "idq.mite_all_uops",
- "idq.mite_all_uops<1",
- "idq.mite_all_uops>=1",
- "idq.mite_all_uops>=2",
- /* Batch */
- "idq.mite_all_uops>=3",
- "idq.mite_all_uops>=4",
- "move_elimination.int_not_eliminated",
- "move_elimination.simd_not_eliminated",
- /* Batch */
- "lsd.uops",
- "lsd.uops<1",
- "lsd.uops>=1",
- "lsd.uops>=2",
- /* Batch */
- "lsd.uops>=3",
- "lsd.uops>=4",
- "ild_stall.lcp",
- "ild_stall.iq_full",
- /* Batch */
- "br_inst_exec.all_branches",
- "br_inst_exec.0x81",
- "br_inst_exec.0x82",
- "icache.misses",
- /* Batch */
- "br_misp_exec.all_branches",
- "br_misp_exec.0x81",
- "br_misp_exec.0x82",
- "fp_assist.any",
- /* Batch */
- "cpu_clk_unhalted.core_clk",
- "cpu_clk_unhalted.ref_xclk",
- "baclears.any"
- };
- const int NUMCOUNTS = sizeof(SCHEDULE)/sizeof(*SCHEDULE);
- /**
- * Main
- */
- int main(int argc, char* argv[]){
- int i;
- /**
- * Initialize
- */
- pfcInit();
- if(argc <= 1){
- pfcDumpEvents();
- exit(1);
- }
- pfcPinThread(3);
- /**
- * Arguments are:
- *
- * perfcountdemo #codesnippet
- *
- * There is a schedule of configuration that is followed.
- */
- void (*fn)(void) = FN_TABLE[strtoull(argv[1], NULL, 0)];
- static const uint64_t ZERO_CNT[7] = {0,0,0,0,0,0,0};
- static const uint64_t ZERO_CFG[7] = {0,0,0,0,0,0,0};
- uint64_t cnt[7] = {0,0,0,0,0,0,0};
- uint64_t cfg[7] = {2,2,2,0,0,0,0};
- /* Warmup */
- for(i=0;i<10;i++){
- fn();
- }
- /* Run master loop */
- for(i=0;i<NUMCOUNTS;i+=4){
- /* Configure counters */
- const char* sched0 = i+0 < NUMCOUNTS ? SCHEDULE[i+0] : "";
- const char* sched1 = i+1 < NUMCOUNTS ? SCHEDULE[i+1] : "";
- const char* sched2 = i+2 < NUMCOUNTS ? SCHEDULE[i+2] : "";
- const char* sched3 = i+3 < NUMCOUNTS ? SCHEDULE[i+3] : "";
- cfg[3] = pfcParseConfig(sched0);
- cfg[4] = pfcParseConfig(sched1);
- cfg[5] = pfcParseConfig(sched2);
- cfg[6] = pfcParseConfig(sched3);
- pfcWrConfigCnts(0, 7, cfg);
- pfcWrCountsCnts(0, 7, ZERO_CNT);
- pfcRdCountsCnts(0, 7, cnt);
- /* ^ Should report 0s, and launch the counters. */
- /************** Hot section **************/
- fn();
- /************ End Hot section ************/
- pfcRdCountsCnts(0, 7, cnt);
- pfcWrConfigCnts(0, 7, ZERO_CFG);
- /* ^ Should clear the counter config and disable them. */
- /**
- * Print the lovely results
- */
- printf("Instructions Issued : %20llu\n", cnt[0]);
- printf("Unhalted core cycles : %20llu\n", cnt[1]);
- printf("Unhalted reference cycles : %20llu\n", cnt[2]);
- printf("%-35s: %20llu\n", sched0, cnt[3]);
- printf("%-35s: %20llu\n", sched1, cnt[4]);
- printf("%-35s: %20llu\n", sched2, cnt[5]);
- printf("%-35s: %20llu\n", sched3, cnt[6]);
- }
- /**
- * Close up shop
- */
- pfcFini();
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement