Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- 0: (3125, 5, 4, 2, 2, 1, 4, 4) float.ptr(4000000) (1280, 16, 320, 8, 4, 0, 80, 1) ShapeTracker(views=(View(shape=(3125, 5, 4, 2, 2, 1, 4, 4), strides=(1280, 16, 320, 8, 4, 0, 80, 1), offset=0, mask=None, contiguous=False),))
- 1: (3125, 5, 4, 2, 2, 6250, 4, 4) int.ptr(50000) (0, 0, 0, 0, 25000, 1, 0, 6250) ShapeTracker(views=(View(shape=(3125, 5, 4, 2, 2, 6250, 4, 4), strides=(0, 0, 0, 0, 25000, 1, 0, 6250), offset=0, mask=None, contiguous=False),))
- 2: (3125, 5, 4, 2, 2, 6250, 4, 4) int.ptr(50000) (16, 0, 4, 0, 0, 0, 1, 0) ShapeTracker(views=(View(shape=(3125, 5, 4, 2, 2, 6250, 4, 4), strides=(16, 0, 4, 0, 0, 0, 1, 0), offset=0, mask=None, contiguous=False),))
- 3: (3125, 5, 4, 2, 2, 6250, 4, 4) float.ptr(500000) (0, 2, 0, 1, 250000, 10, 0, 62500) ShapeTracker(views=(View(shape=(3125, 5, 4, 2, 2, 6250, 4, 4), strides=(0, 2, 0, 1, 250000, 10, 0, 62500), offset=0, mask=None, contiguous=False),))
- [Opt(op=OptOps.UPCAST, axis=0, arg=4), Opt(op=OptOps.UPCAST, axis=2, arg=4), Opt(op=OptOps.LOCAL, axis=0, arg=4), Opt(op=OptOps.LOCAL, axis=1, arg=2), Opt(op=OptOps.LOCAL, axis=2, arg=2)]
- __kernel void r_3125_5_4_2_2_6250_4_4(__global float* data0, __global float* data1, __global int* data2, __global int* data3) {
- int gidx0 = get_group_id(0); /* 5 */
- int gidx1 = get_group_id(1); /* 3125 */
- int lidx0 = get_local_id(0); /* 4 */
- int lidx1 = get_local_id(1); /* 2 */
- int lidx2 = get_local_id(2); /* 2 */
- int alu0 = ((gidx1<<4)+(lidx0<<2));
- int val0 = *(data2+alu0);
- int val1 = *(data2+(alu0+1));
- int val2 = *(data2+(alu0+2));
- int val3 = *(data2+(alu0+3));
- float acc0 = 0.0f;
- float acc1 = 0.0f;
- float acc2 = 0.0f;
- float acc3 = 0.0f;
- float acc4 = 0.0f;
- float acc5 = 0.0f;
- float acc6 = 0.0f;
- float acc7 = 0.0f;
- float acc8 = 0.0f;
- float acc9 = 0.0f;
- float acc10 = 0.0f;
- float acc11 = 0.0f;
- float acc12 = 0.0f;
- float acc13 = 0.0f;
- float acc14 = 0.0f;
- float acc15 = 0.0f;
- for (int ridx5 = 0; ridx5 < 6250; ridx5++) {
- int alu1 = ((lidx2*25000)+ridx5);
- int val4 = *(data3+alu1);
- int val5 = *(data3+(alu1+6250));
- int val6 = *(data3+(alu1+12500));
- int val7 = *(data3+(alu1+18750));
- int alu2 = (lidx1+(gidx0<<1)+(lidx2*250000)+(ridx5*10));
- float val8 = *(data1+alu2);
- float val9 = *(data1+(alu2+62500));
- float val10 = *(data1+(alu2+125000));
- float val11 = *(data1+(alu2+187500));
- acc0 = (acc0+(val8*((float)(((val0!=val4)!=1)))));
- acc1 = (acc1+(val9*((float)(((val0!=val5)!=1)))));
- acc2 = (acc2+(val10*((float)(((val0!=val6)!=1)))));
- acc3 = (acc3+(val11*((float)(((val0!=val7)!=1)))));
- acc4 = (acc4+(val8*((float)(((val1!=val4)!=1)))));
- acc5 = (acc5+(val9*((float)(((val1!=val5)!=1)))));
- acc6 = (acc6+(val10*((float)(((val1!=val6)!=1)))));
- acc7 = (acc7+(val11*((float)(((val1!=val7)!=1)))));
- acc8 = (acc8+(val8*((float)(((val2!=val4)!=1)))));
- acc9 = (acc9+(val9*((float)(((val2!=val5)!=1)))));
- acc10 = (acc10+(val10*((float)(((val2!=val6)!=1)))));
- acc11 = (acc11+(val11*((float)(((val2!=val7)!=1)))));
- acc12 = (acc12+(val8*((float)(((val3!=val4)!=1)))));
- acc13 = (acc13+(val9*((float)(((val3!=val5)!=1)))));
- acc14 = (acc14+(val10*((float)(((val3!=val6)!=1)))));
- acc15 = (acc15+(val11*((float)(((val3!=val7)!=1)))));
- }
- int alu20 = ((gidx0<<4)+(gidx1*1280)+(lidx0*320)+(lidx1<<3)+(lidx2<<2));
- *((__global float4*)((data0+alu20))) = (float4)(acc0,acc1,acc2,acc3);
- *((__global float4*)((data0+(alu20+80)))) = (float4)(acc4,acc5,acc6,acc7);
- *((__global float4*)((data0+(alu20+160)))) = (float4)(acc8,acc9,acc10,acc11);
- *((__global float4*)((data0+(alu20+240)))) = (float4)(acc12,acc13,acc14,acc15);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement