Advertisement
Guest User

Untitled

a guest
Jun 3rd, 2020
47
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 44.85 KB | None | 0 0
  1. Profiling PyTorch1.2:
  2. -------------------------- --------------- --------------- --------------- --------------- --------------- --------------- --------------- --------------- --------------- -----------------------------------
  3. Name Self CPU total % Self CPU total CPU total % CPU total CPU time avg CUDA total % CUDA total CUDA time avg Number of Calls Input Shapes
  4. -------------------------- --------------- --------------- --------------- --------------- --------------- --------------- --------------- --------------- --------------- -----------------------------------
  5. view 0.06% 37.819us 0.06% 37.819us 37.819us 0.01% 37.888us 37.888us 1 []
  6. transpose 0.02% 10.549us 0.02% 10.549us 10.549us 0.00% 10.240us 10.240us 1 []
  7. matmul 1.38% 938.004us 1.38% 938.004us 938.004us 0.36% 1.368ms 1.368ms 1 []
  8. expand 0.01% 7.727us 0.01% 7.727us 7.727us 0.00% 8.192us 8.192us 1 []
  9. contiguous 0.09% 58.713us 0.09% 58.713us 58.713us 0.02% 60.416us 60.416us 1 []
  10. empty_like 0.02% 10.872us 0.02% 10.872us 10.872us 0.00% 11.264us 11.264us 1 []
  11. empty 0.01% 4.776us 0.01% 4.776us 4.776us 0.00% 5.120us 5.120us 1 []
  12. view 0.01% 5.582us 0.01% 5.582us 5.582us 0.00% 5.120us 5.120us 1 []
  13. expand 0.00% 3.333us 0.00% 3.333us 3.333us 0.00% 4.032us 4.032us 1 []
  14. contiguous 0.00% 1.935us 0.00% 1.935us 1.935us 0.00% 1.024us 1.024us 1 []
  15. view 0.00% 3.382us 0.00% 3.382us 3.382us 0.00% 4.096us 4.096us 1 []
  16. bmm 1.15% 784.401us 1.15% 784.401us 784.401us 0.33% 1.235ms 1.235ms 1 []
  17. _unsafe_view 0.02% 12.367us 0.02% 12.367us 12.367us 0.00% 1.024us 1.024us 1 []
  18. mul 0.51% 349.196us 0.51% 349.196us 349.196us 0.25% 920.576us 920.576us 1 []
  19. pow 0.07% 47.909us 0.07% 47.909us 47.909us 0.00% 6.144us 6.144us 1 []
  20. sum 0.11% 75.900us 0.11% 75.900us 75.900us 0.00% 11.264us 11.264us 1 []
  21. neg 0.05% 33.897us 0.05% 33.897us 33.897us 0.00% 4.096us 4.096us 1 []
  22. sub 0.04% 29.229us 0.04% 29.229us 29.229us 0.25% 931.840us 931.840us 1 []
  23. transpose 0.01% 5.855us 0.01% 5.855us 5.855us 0.00% 2.048us 2.048us 1 []
  24. sub 0.44% 301.858us 0.44% 301.858us 301.858us 0.26% 960.480us 960.480us 1 []
  25. topk 0.12% 81.283us 0.12% 81.283us 81.283us 2.89% 10.856ms 10.856ms 1 []
  26. view 0.01% 8.930us 0.01% 8.930us 8.930us 0.00% 2.048us 2.048us 1 []
  27. mul 0.03% 19.651us 0.03% 19.651us 19.651us 0.00% 4.096us 4.096us 1 []
  28. add 0.03% 19.795us 0.03% 19.795us 19.795us 0.01% 23.552us 23.552us 1 []
  29. view 0.01% 4.918us 0.01% 4.918us 4.918us 0.00% 2.049us 2.049us 1 []
  30. transpose 0.01% 4.150us 0.01% 4.150us 4.150us 0.00% 1.024us 1.024us 1 []
  31. contiguous 0.03% 22.186us 0.03% 22.186us 22.186us 0.00% 16.448us 16.448us 1 []
  32. empty_like 0.01% 7.776us 0.01% 7.776us 7.776us 0.00% 6.144us 6.144us 1 []
  33. empty 0.01% 3.510us 0.01% 3.510us 3.510us 0.00% 2.048us 2.048us 1 []
  34. view 0.01% 4.462us 0.01% 4.462us 4.462us 0.00% 2.049us 2.049us 1 []
  35. slice 0.01% 5.751us 0.01% 5.751us 5.751us 0.00% 2.047us 2.047us 1 []
  36. to 0.00% 2.185us 0.00% 2.185us 2.185us 0.00% 2.048us 2.048us 1 []
  37. index 0.07% 45.861us 0.07% 45.861us 45.861us 0.01% 46.080us 46.080us 1 []
  38. view 0.01% 4.380us 0.01% 4.380us 4.380us 0.00% 2.047us 2.047us 1 []
  39. view 0.01% 3.632us 0.01% 3.632us 3.632us 0.00% 2.048us 2.048us 1 []
  40. repeat 0.07% 45.777us 0.07% 45.777us 45.777us 0.01% 24.575us 24.575us 1 []
  41. sub 0.02% 12.702us 0.02% 12.702us 12.702us 0.01% 45.057us 45.057us 1 []
  42. cat 2.40% 1.632ms 2.40% 1.632ms 1.632ms 0.02% 92.160us 92.160us 1 []
  43. permute 0.01% 9.079us 0.01% 9.079us 9.079us 0.00% 1.024us 1.024us 1 []
  44. conv2d 17.08% 11.629ms 17.08% 11.629ms 11.629ms 0.20% 738.305us 738.305us 1 []
  45. convolution 17.06% 11.618ms 17.06% 11.618ms 11.618ms 0.20% 734.208us 734.208us 1 []
  46. _convolution 17.04% 11.607ms 17.04% 11.607ms 11.607ms 0.19% 730.112us 730.112us 1 []
  47. contiguous 0.04% 28.265us 0.04% 28.265us 28.265us 0.02% 81.919us 81.919us 1 []
  48. empty_like 0.01% 9.194us 0.01% 9.194us 9.194us 0.00% 5.120us 5.120us 1 []
  49. empty 0.01% 4.692us 0.01% 4.692us 4.692us 0.00% 2.049us 2.049us 1 []
  50. cudnn_convolution 16.97% 11.556ms 16.97% 11.556ms 11.556ms 0.17% 642.047us 642.047us 1 []
  51. add_ 0.03% 23.519us 0.03% 23.519us 23.519us 0.00% 4.097us 4.097us 1 []
  52. batch_norm 0.15% 98.906us 0.15% 98.906us 98.906us 0.26% 958.464us 958.464us 1 []
  53. _batch_norm_impl_index 0.12% 84.679us 0.12% 84.679us 84.679us 0.25% 954.369us 954.369us 1 []
  54. contiguous 0.00% 2.061us 0.00% 2.061us 2.061us 0.00% 1.024us 1.024us 1 []
  55. contiguous 0.00% 1.632us 0.00% 1.632us 1.632us 0.00% 2.046us 2.046us 1 []
  56. contiguous 0.00% 1.676us 0.00% 1.676us 1.676us 0.00% 2.049us 2.049us 1 []
  57. contiguous 0.00% 1.583us 0.00% 1.583us 1.583us 0.00% 2.049us 2.049us 1 []
  58. contiguous 0.00% 1.642us 0.00% 1.642us 1.642us 0.00% 2.048us 2.048us 1 []
  59. cudnn_batch_norm 0.08% 57.092us 0.08% 57.092us 57.092us 0.25% 932.863us 932.863us 1 []
  60. leaky_relu 0.04% 27.673us 0.04% 27.673us 27.673us 0.15% 577.535us 577.535us 1 []
  61. max 0.07% 44.544us 0.07% 44.544us 44.544us 0.37% 1.397ms 1.397ms 1 []
  62. view 0.01% 5.784us 0.01% 5.784us 5.784us 0.00% 2.049us 2.049us 1 []
  63. transpose 0.01% 4.278us 0.01% 4.278us 4.278us 0.00% 2.047us 2.047us 1 []
  64. matmul 0.52% 353.116us 0.52% 353.116us 353.116us 0.26% 990.207us 990.207us 1 []
  65. expand 0.01% 3.961us 0.01% 3.961us 3.961us 0.00% 1.023us 1.023us 1 []
  66. contiguous 0.03% 22.133us 0.03% 22.133us 22.133us 0.02% 75.777us 75.777us 1 []
  67. empty_like 0.01% 6.836us 0.01% 6.836us 6.836us 0.00% 5.121us 5.121us 1 []
  68. empty 0.00% 3.062us 0.00% 3.062us 3.062us 0.00% 2.018us 2.018us 1 []
  69. view 0.01% 4.099us 0.01% 4.099us 4.099us 0.00% 2.016us 2.016us 1 []
  70. expand 0.00% 2.989us 0.00% 2.989us 2.989us 0.00% 2.049us 2.049us 1 []
  71. contiguous 0.00% 1.656us 0.00% 1.656us 1.656us 0.00% 2.047us 2.047us 1 []
  72. view 0.00% 3.181us 0.00% 3.181us 3.181us 0.00% 1.025us 1.025us 1 []
  73. bmm 0.42% 286.626us 0.42% 286.626us 286.626us 0.24% 886.785us 886.785us 1 []
  74. _unsafe_view 0.01% 5.851us 0.01% 5.851us 5.851us 0.00% 2.049us 2.049us 1 []
  75. mul 0.36% 244.384us 0.36% 244.384us 244.384us 0.25% 922.625us 922.625us 1 []
  76. pow 0.02% 15.159us 0.02% 15.159us 15.159us 0.01% 34.814us 34.814us 1 []
  77. sum 0.02% 16.473us 0.02% 16.473us 16.473us 0.01% 25.602us 25.602us 1 []
  78. neg 0.02% 13.143us 0.02% 13.143us 13.143us 0.00% 5.121us 5.121us 1 []
  79. sub 0.02% 12.325us 0.02% 12.325us 12.325us 0.25% 932.863us 932.863us 1 []
  80. transpose 0.01% 4.650us 0.01% 4.650us 4.650us 0.00% 2.049us 2.049us 1 []
  81. sub 0.37% 248.944us 0.37% 248.944us 248.944us 0.26% 958.465us 958.465us 1 []
  82. topk 0.04% 27.292us 0.04% 27.292us 27.292us 2.85% 10.711ms 10.711ms 1 []
  83. view 0.01% 5.949us 0.01% 5.949us 5.949us 0.00% 1.023us 1.023us 1 []
  84. mul 0.02% 11.780us 0.02% 11.780us 11.780us 0.00% 4.098us 4.098us 1 []
  85. add 0.02% 11.048us 0.02% 11.048us 11.048us 0.01% 24.574us 24.574us 1 []
  86. view 0.01% 3.669us 0.01% 3.669us 3.669us 0.00% 1.023us 1.023us 1 []
  87. transpose 0.01% 4.372us 0.01% 4.372us 4.372us 0.00% 2.051us 2.051us 1 []
  88. contiguous 0.03% 18.664us 0.03% 18.664us 18.664us 0.02% 75.773us 75.773us 1 []
  89. empty_like 0.01% 6.501us 0.01% 6.501us 6.501us 0.00% 5.121us 5.121us 1 []
  90. empty 0.00% 2.934us 0.00% 2.934us 2.934us 0.00% 1.023us 1.023us 1 []
  91. view 0.01% 4.423us 0.01% 4.423us 4.423us 0.00% 2.047us 2.047us 1 []
  92. slice 0.01% 4.208us 0.01% 4.208us 4.208us 0.00% 2.047us 2.047us 1 []
  93. to 0.00% 1.996us 0.00% 1.996us 1.996us 0.00% 2.047us 2.047us 1 []
  94. index 0.03% 20.012us 0.03% 20.012us 20.012us 0.18% 684.031us 684.031us 1 []
  95. view 0.01% 3.799us 0.01% 3.799us 3.799us 0.00% 1.055us 1.055us 1 []
  96. view 0.00% 3.215us 0.00% 3.215us 3.215us 0.00% 2.051us 2.051us 1 []
  97. repeat 0.03% 21.831us 0.03% 21.831us 21.831us 0.11% 416.770us 416.770us 1 []
  98. sub 0.02% 10.959us 0.02% 10.959us 10.959us 0.23% 861.184us 861.184us 1 []
  99. cat 0.47% 317.969us 0.47% 317.969us 317.969us 0.36% 1.355ms 1.355ms 1 []
  100. permute 0.01% 5.347us 0.01% 5.347us 5.347us 0.00% 2.047us 2.047us 1 []
  101. conv2d 0.53% 359.416us 0.53% 359.416us 359.416us 2.68% 10.055ms 10.055ms 1 []
  102. convolution 0.52% 354.719us 0.52% 354.719us 354.719us 2.68% 10.051ms 10.051ms 1 []
  103. _convolution 0.51% 350.513us 0.51% 350.513us 350.513us 2.68% 10.046ms 10.046ms 1 []
  104. contiguous 0.44% 300.717us 0.44% 300.717us 300.717us 2.35% 8.825ms 8.825ms 1 []
  105. -------------------------- --------------- --------------- --------------- --------------- --------------- --------------- --------------- --------------- --------------- -----------------------------------
  106. Self CPU time total: 68.103ms
  107. CUDA time total: 375.536ms
  108.  
  109.  
  110. PyTorch 1.5:
  111. Profiling:
  112. -------------------------- --------------- --------------- --------------- --------------- --------------- --------------- --------------- --------------- --------------- -----------------------------------
  113. Name Self CPU total % Self CPU total CPU total % CPU total CPU time avg CUDA total % CUDA total CUDA time avg Number of Calls Input Shapes
  114. -------------------------- --------------- --------------- --------------- --------------- --------------- --------------- --------------- --------------- --------------- -----------------------------------
  115. view 0.01% 7.796us 0.01% 7.796us 7.796us 0.00% 8.096us 8.096us 1 []
  116. transpose 0.01% 6.534us 0.01% 6.534us 6.534us 0.00% 7.072us 7.072us 1 []
  117. matmul 1.15% 714.146us 1.15% 714.146us 714.146us 0.11% 1.162ms 1.162ms 1 []
  118. expand 0.01% 4.776us 0.01% 4.776us 4.776us 0.00% 5.024us 5.024us 1 []
  119. contiguous 0.07% 45.864us 0.07% 45.864us 45.864us 0.00% 50.176us 50.176us 1 []
  120. empty_like 0.02% 13.670us 0.02% 13.670us 13.670us 0.00% 13.312us 13.312us 1 []
  121. empty 0.01% 5.912us 0.01% 5.912us 5.912us 0.00% 5.120us 5.120us 1 []
  122. view 0.01% 3.323us 0.01% 3.323us 3.323us 0.00% 2.048us 2.048us 1 []
  123. expand 0.00% 2.794us 0.00% 2.794us 2.794us 0.00% 2.048us 2.048us 1 []
  124. contiguous 0.00% 1.833us 0.00% 1.833us 1.833us 0.00% 2.048us 2.048us 1 []
  125. view 0.00% 2.642us 0.00% 2.642us 2.642us 0.00% 2.144us 2.144us 1 []
  126. bmm 0.97% 599.610us 0.97% 599.610us 599.610us 0.10% 1.063ms 1.063ms 1 []
  127. _unsafe_view 0.01% 6.444us 0.01% 6.444us 6.444us 0.00% 2.048us 2.048us 1 []
  128. mul 0.49% 302.799us 0.49% 302.799us 302.799us 0.09% 925.696us 925.696us 1 []
  129. pow 0.06% 35.092us 0.06% 35.092us 35.092us 0.00% 7.168us 7.168us 1 []
  130. sum 0.04% 27.173us 0.04% 27.173us 27.173us 0.00% 8.192us 8.192us 1 []
  131. neg 0.04% 25.660us 0.04% 25.660us 25.660us 0.00% 4.096us 4.096us 1 []
  132. sub 0.04% 23.064us 0.04% 23.064us 23.064us 0.09% 930.816us 930.816us 1 []
  133. transpose 0.01% 5.338us 0.01% 5.338us 5.338us 0.00% 2.048us 2.048us 1 []
  134. sub 0.41% 253.309us 0.41% 253.309us 253.309us 0.09% 958.464us 958.464us 1 []
  135. topk 0.10% 61.421us 0.10% 61.421us 61.421us 0.92% 9.712ms 9.712ms 1 []
  136. view 0.01% 5.888us 0.01% 5.888us 5.888us 0.00% 1.023us 1.023us 1 []
  137. mul 0.03% 16.798us 0.03% 16.798us 16.798us 0.00% 5.120us 5.120us 1 []
  138. add 0.02% 14.184us 0.02% 14.184us 14.184us 0.00% 24.576us 24.576us 1 []
  139. view 0.01% 3.626us 0.01% 3.626us 3.626us 0.00% 2.047us 2.047us 1 []
  140. transpose 0.01% 4.321us 0.01% 4.321us 4.321us 0.00% 2.048us 2.048us 1 []
  141. contiguous 0.04% 22.141us 0.04% 22.141us 22.141us 0.00% 16.384us 16.384us 1 []
  142. empty_like 0.01% 7.823us 0.01% 7.823us 7.823us 0.00% 6.145us 6.145us 1 []
  143. empty 0.01% 3.320us 0.01% 3.320us 3.320us 0.00% 2.047us 2.047us 1 []
  144. view 0.01% 3.128us 0.01% 3.128us 3.128us 0.00% 1.024us 1.024us 1 []
  145. slice 0.01% 3.929us 0.01% 3.929us 3.929us 0.00% 2.048us 2.048us 1 []
  146. to 0.00% 2.107us 0.00% 2.107us 2.107us 0.00% 2.049us 2.049us 1 []
  147. index 0.06% 34.122us 0.06% 34.122us 34.122us 0.01% 53.248us 53.248us 1 []
  148. view 0.01% 3.756us 0.01% 3.756us 3.756us 0.00% 2.048us 2.048us 1 []
  149. view 0.00% 2.607us 0.00% 2.607us 2.607us 0.00% 2.048us 2.048us 1 []
  150. repeat 0.04% 25.673us 0.04% 25.673us 25.673us 0.00% 25.600us 25.600us 1 []
  151. sub 0.02% 11.319us 0.02% 11.319us 11.319us 0.00% 45.057us 45.057us 1 []
  152. cat 2.17% 1.348ms 2.17% 1.348ms 1.348ms 0.01% 90.111us 90.111us 1 []
  153. permute 0.01% 7.439us 0.01% 7.439us 7.439us 0.00% 2.047us 2.047us 1 []
  154. conv2d 17.69% 10.962ms 17.69% 10.962ms 10.962ms 0.07% 776.191us 776.191us 1 []
  155. convolution 17.67% 10.954ms 17.67% 10.954ms 10.954ms 0.07% 772.097us 772.097us 1 []
  156. _convolution 17.66% 10.948ms 17.66% 10.948ms 10.948ms 0.07% 768.000us 768.000us 1 []
  157. contiguous 0.00% 2.277us 0.00% 2.277us 2.277us 0.00% 2.016us 2.016us 1 []
  158. cudnn_convolution 17.63% 10.926ms 17.63% 10.926ms 10.926ms 0.07% 760.833us 760.833us 1 []
  159. add 0.04% 26.855us 0.04% 26.855us 26.855us 0.00% 5.119us 5.119us 1 []
  160. batch_norm 0.17% 104.225us 0.17% 104.225us 104.225us 2.95% 30.992ms 30.992ms 1 []
  161. _batch_norm_impl_index 0.15% 94.261us 0.15% 94.261us 94.261us 2.95% 30.988ms 30.988ms 1 []
  162. contiguous 0.00% 2.242us 0.00% 2.242us 2.242us 0.00% 2.048us 2.048us 1 []
  163. contiguous 0.00% 1.804us 0.00% 1.804us 1.804us 0.00% 2.047us 2.047us 1 []
  164. contiguous 0.00% 1.700us 0.00% 1.700us 1.700us 0.00% 1.024us 1.024us 1 []
  165. contiguous 0.00% 1.707us 0.00% 1.707us 1.707us 0.00% 2.049us 2.049us 1 []
  166. contiguous 0.00% 1.677us 0.00% 1.677us 1.677us 0.00% 2.048us 2.048us 1 []
  167. cudnn_batch_norm 0.10% 62.648us 0.10% 62.648us 62.648us 2.95% 30.965ms 30.965ms 1 []
  168. leaky_relu 0.04% 26.307us 0.04% 26.307us 26.307us 0.06% 581.629us 581.629us 1 []
  169. max 0.43% 264.290us 0.43% 264.290us 264.290us 0.37% 3.897ms 3.897ms 1 []
  170. view 0.01% 6.879us 0.01% 6.879us 6.879us 0.00% 1.023us 1.023us 1 []
  171. transpose 0.01% 5.262us 0.01% 5.262us 5.262us 0.00% 2.043us 2.043us 1 []
  172. matmul 0.57% 350.224us 0.57% 350.224us 350.224us 0.10% 1.013ms 1.013ms 1 []
  173. expand 0.01% 6.017us 0.01% 6.017us 6.017us 0.00% 2.051us 2.051us 1 []
  174. contiguous 0.04% 24.700us 0.04% 24.700us 24.700us 0.01% 76.797us 76.797us 1 []
  175. empty_like 0.02% 9.631us 0.02% 9.631us 9.631us 0.00% 5.121us 5.121us 1 []
  176. empty 0.01% 5.201us 0.01% 5.201us 5.201us 0.00% 2.051us 2.051us 1 []
  177. view 0.01% 3.949us 0.01% 3.949us 3.949us 0.00% 1.023us 1.023us 1 []
  178. expand 0.01% 4.844us 0.01% 4.844us 4.844us 0.00% 2.051us 2.051us 1 []
  179. contiguous 0.00% 1.863us 0.00% 1.863us 1.863us 0.00% 2.051us 2.051us 1 []
  180. view 0.01% 3.381us 0.01% 3.381us 3.381us 0.00% 2.051us 2.051us 1 []
  181. bmm 0.44% 273.620us 0.44% 273.620us 273.620us 0.09% 907.266us 907.266us 1 []
  182. _unsafe_view 0.01% 5.699us 0.01% 5.699us 5.699us 0.00% 2.051us 2.051us 1 []
  183. mul 0.40% 248.458us 0.40% 248.458us 248.458us 0.09% 929.789us 929.789us 1 []
  184. pow 0.04% 22.604us 0.04% 22.604us 22.604us 0.00% 35.836us 35.836us 1 []
  185. sum 0.03% 17.347us 0.03% 17.347us 17.347us 0.00% 27.711us 27.711us 1 []
  186. neg 0.02% 15.336us 0.02% 15.336us 15.336us 0.00% 5.152us 5.152us 1 []
  187. sub 0.02% 14.694us 0.02% 14.694us 14.694us 0.09% 935.934us 935.934us 1 []
  188. transpose 0.01% 5.474us 0.01% 5.474us 5.474us 0.00% 2.047us 2.047us 1 []
  189. sub 0.40% 250.967us 0.40% 250.967us 250.967us 0.09% 964.613us 964.613us 1 []
  190. topk 0.05% 32.507us 0.05% 32.507us 32.507us 0.91% 9.511ms 9.511ms 1 []
  191. view 0.01% 5.033us 0.01% 5.033us 5.033us 0.00% 4.098us 4.098us 1 []
  192. mul 0.02% 14.173us 0.02% 14.173us 14.173us 0.00% 5.121us 5.121us 1 []
  193. add 0.02% 12.398us 0.02% 12.398us 12.398us 0.00% 28.672us 28.672us 1 []
  194. view 0.01% 3.119us 0.01% 3.119us 3.119us 0.00% 2.043us 2.043us 1 []
  195. transpose 0.01% 5.510us 0.01% 5.510us 5.510us 0.00% 2.047us 2.047us 1 []
  196. contiguous 0.04% 22.864us 0.04% 22.864us 22.864us 0.01% 79.871us 79.871us 1 []
  197. empty_like 0.01% 7.786us 0.01% 7.786us 7.786us 0.00% 6.117us 6.117us 1 []
  198. empty 0.01% 3.595us 0.01% 3.595us 3.595us 0.00% 2.047us 2.047us 1 []
  199. view 0.01% 4.547us 0.01% 4.547us 4.547us 0.00% 2.047us 2.047us 1 []
  200. slice 0.01% 4.868us 0.01% 4.868us 4.868us 0.00% 1.051us 1.051us 1 []
  201. to 0.00% 2.024us 0.00% 2.024us 2.024us 0.00% 2.047us 2.047us 1 []
  202. index 0.04% 22.766us 0.04% 22.766us 22.766us 0.08% 817.152us 817.152us 1 []
  203. view 0.01% 5.427us 0.01% 5.427us 5.427us 0.00% 2.055us 2.055us 1 []
  204. view 0.01% 3.500us 0.01% 3.500us 3.500us 0.00% 2.051us 2.051us 1 []
  205. repeat 0.04% 26.239us 0.04% 26.239us 26.239us 0.04% 420.859us 420.859us 1 []
  206. sub 0.02% 14.137us 0.02% 14.137us 14.137us 0.08% 870.398us 870.398us 1 []
  207. cat 0.54% 334.395us 0.54% 334.395us 334.395us 0.13% 1.342ms 1.342ms 1 []
  208. permute 0.01% 7.325us 0.01% 7.325us 7.325us 0.00% 2.047us 2.047us 1 []
  209. conv2d 0.11% 66.843us 0.11% 66.843us 66.843us 0.12% 1.272ms 1.272ms 1 []
  210. convolution 0.10% 62.313us 0.10% 62.313us 62.313us 0.12% 1.269ms 1.269ms 1 []
  211. _convolution 0.09% 57.641us 0.09% 57.641us 57.641us 0.12% 1.265ms 1.265ms 1 []
  212. contiguous 0.00% 2.126us 0.00% 2.126us 2.126us 0.00% 2.039us 2.039us 1 []
  213. cudnn_convolution 0.07% 45.104us 0.07% 45.104us 45.104us 0.12% 1.255ms 1.255ms 1 []
  214. add 0.03% 16.567us 0.03% 16.567us 16.567us 0.00% 6.148us 6.148us 1 []
  215. -------------------------- --------------- --------------- --------------- --------------- --------------- --------------- --------------- --------------- --------------- -----------------------------------
  216. Self CPU time total: 61.985ms
  217. CUDA time total: 1.050s
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement