Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- $ cat t3.c
- #include <stdio.h>
- #include <stdlib.h>
- #include <math.h>
- #define DSIZE 1000
- #define TOL 0.01f
- #define DAT 2.0f
- int main(){
- float temp;
- int i_v, ntv, i_el, len_tv;
- float *tva, *tv_sq, *tv_sq_cpu;
- tva = (float *)malloc(DSIZE*DSIZE*sizeof(float));
- tv_sq = (float *)malloc(DSIZE*sizeof(float));
- tv_sq_cpu = (float *)malloc(DSIZE*sizeof(float));
- if ((tv_sq == 0) || (tva == 0)) {printf("malloc fail\n"); return 1;}
- for (i_v = 0; i_v < ntv; i_v++)
- for (i_el = 0; i_el < len_tv; i_el++)
- tva[(i_v*len_tv) + i_el] = DAT;
- len_tv = DSIZE;
- ntv = DSIZE;
- for( i_v = 0; i_v < ntv; i_v++ )
- {
- temp = 0;
- for( i_el = 0; i_el < len_tv; i_el++ )
- temp += pow( tva[i_v*len_tv + i_el], (float)2.0 );
- tv_sq_cpu[i_v]=temp;
- }
- #pragma acc data copyin(tva[:(len_tv*ntv)]) copyout(tv_sq[:ntv]) create(temp)
- {
- #pragma acc kernels loop independent
- for( i_v = 0; i_v < ntv; i_v++ )
- {
- temp = 0;
- #pragma acc loop independent gang vector reduction(+:temp)
- for( i_el = 0; i_el < len_tv; i_el++ )
- temp += pow( tva[i_v*len_tv + i_el], (float)2.0 );
- tv_sq[i_v]=temp;
- }
- }
- for (i_v = 0; i_v < ntv; i_v++)
- if (abs(tv_sq[i_v] - tv_sq_cpu[i_v]) > TOL) {printf("mismatch at idx: %d cpu: %f gpu: %f\n", i_v, tv_sq_cpu[i_v], tv_sq[i_v]); return 1;}
- printf("Success\n");
- return 0;
- }
- $ pgcc -O3 -acc -ta=nvidia,cc20,cuda5.0 -Minfo=accel t3.c -o t3
- main:
- 32, Generating create(temp)
- Generating copyout(tv_sq[0:ntv])
- Generating copyin(tva[0:ntv*len_tv])
- 34, Generating present_or_copyout(tv_sq[0:ntv])
- Generating present_or_copyin(tva[0:ntv*len_tv])
- Generating NVIDIA code
- Generating compute capability 2.0 binary
- 35, Loop is parallelizable
- Accelerator kernel generated
- 35, #pragma acc loop gang /* blockIdx.x */
- 40, #pragma acc loop vector(128) /* threadIdx.x */
- Loop is parallelizable
- $ ./t3
- Success
- $ pgcc -V
- pgcc 13.10-0 64-bit target on x86-64 Linux -tp nehalem
- The Portland Group - PGI Compilers and Tools
- Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved.
- $
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement