Advertisement
Guest User

5 second kernel

a guest
Jun 5th, 2025
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.66 KB | None | 0 0
  1. 0: (3125, 5, 4, 2, 2, 1, 4, 4) float.ptr(4000000) (1280, 16, 320, 8, 4, 0, 80, 1) ShapeTracker(views=(View(shape=(3125, 5, 4, 2, 2, 1, 4, 4), strides=(1280, 16, 320, 8, 4, 0, 80, 1), offset=0, mask=None, contiguous=False),))
  2. 1: (3125, 5, 4, 2, 2, 6250, 4, 4) int.ptr(50000) (0, 0, 0, 0, 25000, 1, 0, 6250) ShapeTracker(views=(View(shape=(3125, 5, 4, 2, 2, 6250, 4, 4), strides=(0, 0, 0, 0, 25000, 1, 0, 6250), offset=0, mask=None, contiguous=False),))
  3. 2: (3125, 5, 4, 2, 2, 6250, 4, 4) int.ptr(50000) (16, 0, 4, 0, 0, 0, 1, 0) ShapeTracker(views=(View(shape=(3125, 5, 4, 2, 2, 6250, 4, 4), strides=(16, 0, 4, 0, 0, 0, 1, 0), offset=0, mask=None, contiguous=False),))
  4. 3: (3125, 5, 4, 2, 2, 6250, 4, 4) float.ptr(500000) (0, 2, 0, 1, 250000, 10, 0, 62500) ShapeTracker(views=(View(shape=(3125, 5, 4, 2, 2, 6250, 4, 4), strides=(0, 2, 0, 1, 250000, 10, 0, 62500), offset=0, mask=None, contiguous=False),))
  5. [Opt(op=OptOps.UPCAST, axis=0, arg=4), Opt(op=OptOps.UPCAST, axis=2, arg=4), Opt(op=OptOps.LOCAL, axis=0, arg=4), Opt(op=OptOps.LOCAL, axis=1, arg=2), Opt(op=OptOps.LOCAL, axis=2, arg=2)]
  6. __kernel void r_3125_5_4_2_2_6250_4_4(__global float* data0, __global float* data1, __global int* data2, __global int* data3) {
  7. int gidx0 = get_group_id(0); /* 5 */
  8. int gidx1 = get_group_id(1); /* 3125 */
  9. int lidx0 = get_local_id(0); /* 4 */
  10. int lidx1 = get_local_id(1); /* 2 */
  11. int lidx2 = get_local_id(2); /* 2 */
  12. int alu0 = ((gidx1<<4)+(lidx0<<2));
  13. int val0 = *(data2+alu0);
  14. int val1 = *(data2+(alu0+1));
  15. int val2 = *(data2+(alu0+2));
  16. int val3 = *(data2+(alu0+3));
  17. float acc0 = 0.0f;
  18. float acc1 = 0.0f;
  19. float acc2 = 0.0f;
  20. float acc3 = 0.0f;
  21. float acc4 = 0.0f;
  22. float acc5 = 0.0f;
  23. float acc6 = 0.0f;
  24. float acc7 = 0.0f;
  25. float acc8 = 0.0f;
  26. float acc9 = 0.0f;
  27. float acc10 = 0.0f;
  28. float acc11 = 0.0f;
  29. float acc12 = 0.0f;
  30. float acc13 = 0.0f;
  31. float acc14 = 0.0f;
  32. float acc15 = 0.0f;
  33. for (int ridx5 = 0; ridx5 < 6250; ridx5++) {
  34. int alu1 = ((lidx2*25000)+ridx5);
  35. int val4 = *(data3+alu1);
  36. int val5 = *(data3+(alu1+6250));
  37. int val6 = *(data3+(alu1+12500));
  38. int val7 = *(data3+(alu1+18750));
  39. int alu2 = (lidx1+(gidx0<<1)+(lidx2*250000)+(ridx5*10));
  40. float val8 = *(data1+alu2);
  41. float val9 = *(data1+(alu2+62500));
  42. float val10 = *(data1+(alu2+125000));
  43. float val11 = *(data1+(alu2+187500));
  44. acc0 = (acc0+(val8*((float)(((val0!=val4)!=1)))));
  45. acc1 = (acc1+(val9*((float)(((val0!=val5)!=1)))));
  46. acc2 = (acc2+(val10*((float)(((val0!=val6)!=1)))));
  47. acc3 = (acc3+(val11*((float)(((val0!=val7)!=1)))));
  48. acc4 = (acc4+(val8*((float)(((val1!=val4)!=1)))));
  49. acc5 = (acc5+(val9*((float)(((val1!=val5)!=1)))));
  50. acc6 = (acc6+(val10*((float)(((val1!=val6)!=1)))));
  51. acc7 = (acc7+(val11*((float)(((val1!=val7)!=1)))));
  52. acc8 = (acc8+(val8*((float)(((val2!=val4)!=1)))));
  53. acc9 = (acc9+(val9*((float)(((val2!=val5)!=1)))));
  54. acc10 = (acc10+(val10*((float)(((val2!=val6)!=1)))));
  55. acc11 = (acc11+(val11*((float)(((val2!=val7)!=1)))));
  56. acc12 = (acc12+(val8*((float)(((val3!=val4)!=1)))));
  57. acc13 = (acc13+(val9*((float)(((val3!=val5)!=1)))));
  58. acc14 = (acc14+(val10*((float)(((val3!=val6)!=1)))));
  59. acc15 = (acc15+(val11*((float)(((val3!=val7)!=1)))));
  60. }
  61. int alu20 = ((gidx0<<4)+(gidx1*1280)+(lidx0*320)+(lidx1<<3)+(lidx2<<2));
  62. *((__global float4*)((data0+alu20))) = (float4)(acc0,acc1,acc2,acc3);
  63. *((__global float4*)((data0+(alu20+80)))) = (float4)(acc4,acc5,acc6,acc7);
  64. *((__global float4*)((data0+(alu20+160)))) = (float4)(acc8,acc9,acc10,acc11);
  65. *((__global float4*)((data0+(alu20+240)))) = (float4)(acc12,acc13,acc14,acc15);
  66. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement