Advertisement
Guest User

Untitled

a guest
Jun 24th, 2018
117
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 17.78 KB | None | 0 0
  1. Type Time(%) Time Calls Avg Min Max Name
  2. GPU activities: 15.85% 2.62204s 1261 2.0793ms 2.0501ms 2.0966ms void _GLOBAL__N__45_tmpxft_0001765e_00000000_6_diff_smag2_cpp1_ii_89250fe7::strain2_g<float>(float*, float, float, float, float, float, float, float, float, float, float, _GLOBAL__N__45_tmpxft_0001765e_00000000_6_diff_smag2_cpp1_ii_89250fe7::strain2_g<float>, _GLOBAL__N__45_tmpxft_0001765e_00000000_6_diff_smag2_cpp1_ii_89250fe7::strain2_g<float>, int, int, int, int, int, int, int, int)
  3. 11.43% 1.89129s 1261 1.4998ms 1.4687ms 1.5224ms void _GLOBAL__N__45_tmpxft_0001765e_00000000_6_diff_smag2_cpp1_ii_89250fe7::diff_uvw_g<float>(float*, float, float, float, float, float, float, float, float, float, float, float, float, _GLOBAL__N__45_tmpxft_0001765e_00000000_6_diff_smag2_cpp1_ii_89250fe7::diff_uvw_g<float>, _GLOBAL__N__45_tmpxft_0001765e_00000000_6_diff_smag2_cpp1_ii_89250fe7::diff_uvw_g<float>, float, float, int, int, int, int, int, int, int, int)
  4. 5.87% 970.95ms 1261 769.98us 765.80us 873.92us void _GLOBAL__N__42_tmpxft_00016cc6_00000000_6_advec_2_cpp1_ii_45b0b051::advec_uvw_g<float>(float*, float, float, float, float, float, float, float, float, float, _GLOBAL__N__42_tmpxft_00016cc6_00000000_6_advec_2_cpp1_ii_45b0b051::advec_uvw_g<float>, _GLOBAL__N__42_tmpxft_00016cc6_00000000_6_advec_2_cpp1_ii_45b0b051::advec_uvw_g<float>, int, int, int, int, int, int, int, int)
  5. 5.76% 952.42ms 2522 377.64us 372.29us 383.20us void _GLOBAL__N__45_tmpxft_0001765e_00000000_6_diff_smag2_cpp1_ii_89250fe7::diff_c_g<float>(float*, float, float, float, float, float, float, _GLOBAL__N__45_tmpxft_0001765e_00000000_6_diff_smag2_cpp1_ii_89250fe7::diff_c_g<float>, _GLOBAL__N__45_tmpxft_0001765e_00000000_6_diff_smag2_cpp1_ii_89250fe7::diff_c_g<float>, float, float, _GLOBAL__N__45_tmpxft_0001765e_00000000_6_diff_smag2_cpp1_ii_89250fe7::diff_c_g<float>, int, int, int, int, int, int, int, int)
  6. 5.23% 865.61ms 2522 343.22us 337.47us 360.77us void _GLOBAL__N__42_tmpxft_00016cc6_00000000_6_advec_2_cpp1_ii_45b0b051::advec_s_g<float>(float*, float, float, float, float, float, float, float, _GLOBAL__N__42_tmpxft_00016cc6_00000000_6_advec_2_cpp1_ii_45b0b051::advec_s_g<float>, _GLOBAL__N__42_tmpxft_00016cc6_00000000_6_advec_2_cpp1_ii_45b0b051::advec_s_g<float>, int, int, int, int, int, int, int, int)
  7. 4.14% 685.73ms 5044 135.95us 133.51us 139.75us void _GLOBAL__N__39_tmpxft_0000edf4_00000000_6_pres_cpp1_ii_84c4c81a::complex_TF_x_g<float, float2>(float2*, float*, unsigned int, unsigned int, unsigned int, unsigned int, bool)
  8. 3.73% 616.36ms 5044 122.20us 118.98us 125.44us void _GLOBAL__N__39_tmpxft_0000edf4_00000000_6_pres_cpp1_ii_84c4c81a::transpose_g<float>(float*, _GLOBAL__N__39_tmpxft_0000edf4_00000000_6_pres_cpp1_ii_84c4c81a::transpose_g<float> const *, int, int, int)
  9. 3.50% 579.21ms 16393 35.332us 4.5120us 48.416us void _GLOBAL__N__50_tmpxft_0000ec38_00000000_6_boundary_cyclic_cpp1_ii_f45aa84e::boundary_cyclic_x_g<float>(float*, int, int, int, int, int, int, int, int, int)
  10. 3.00% 496.81ms 1261 393.98us 389.41us 420.61us void _GLOBAL__N__41_tmpxft_00016e0e_00000000_6_pres_2_cpp1_ii_b9d3d3de::tdma_g<float>(float*, float, float, float, float, int, int, int, int, int)
  11. 2.92% 483.22ms 1261 383.21us 379.68us 385.92us void _GLOBAL__N__41_tmpxft_00016e0e_00000000_6_pres_2_cpp1_ii_b9d3d3de::pres_in_g<float>(float*, float, float, float, float, float, float, float, float, float, _GLOBAL__N__41_tmpxft_00016e0e_00000000_6_pres_2_cpp1_ii_b9d3d3de::pres_in_g<float>, _GLOBAL__N__41_tmpxft_00016e0e_00000000_6_pres_2_cpp1_ii_b9d3d3de::pres_in_g<float>, _GLOBAL__N__41_tmpxft_00016e0e_00000000_6_pres_2_cpp1_ii_b9d3d3de::pres_in_g<float>, int, int, int, int, int, int, int, int, int, int)
  12. 2.86% 472.60ms 1261 374.78us 371.68us 378.11us void _GLOBAL__N__41_tmpxft_00016e0e_00000000_6_pres_2_cpp1_ii_b9d3d3de::pres_out_g<float>(float*, float, float, float, float, _GLOBAL__N__41_tmpxft_00016e0e_00000000_6_pres_2_cpp1_ii_b9d3d3de::pres_out_g<float>, _GLOBAL__N__41_tmpxft_00016e0e_00000000_6_pres_2_cpp1_ii_b9d3d3de::pres_out_g<float>, int, int, int, int, int, int, int, int)
  13. 2.67% 442.01ms 1261 350.52us 285.86us 414.72us void _GLOBAL__N__47_tmpxft_00017641_00000000_6_thermo_moist_cpp1_ii_950297f2::calc_buoyancy_tend_2nd_g<float>(float*, float, float, float, float, float, int, int, int, int, int, int, int, int)
  14. 2.42% 400.59ms 2100 190.75us 188.00us 193.70us void _GLOBAL__N__43_tmpxft_00016e2f_00000000_6_timeloop_cpp1_ii_b9d3d3de::rk3_g<float, int=0>(float*, float, double, int, int, int, int, int, int, int, int)
  15. 2.42% 400.16ms 2100 190.55us 187.97us 193.57us void _GLOBAL__N__43_tmpxft_00016e2f_00000000_6_timeloop_cpp1_ii_b9d3d3de::rk3_g<float, int=2>(float*, float, double, int, int, int, int, int, int, int, int)
  16. 2.42% 399.78ms 2100 190.37us 187.71us 193.25us void _GLOBAL__N__43_tmpxft_00016e2f_00000000_6_timeloop_cpp1_ii_b9d3d3de::rk3_g<float, int=1>(float*, float, double, int, int, int, int, int, int, int, int)
  17. 2.40% 397.73ms 1261 315.41us 312.61us 318.91us void _GLOBAL__N__40_tmpxft_00016dd9_00000000_6_force_cpp1_ii_f10283a0::coriolis_2nd_g<float>(float*, float, float, float, float, float, _GLOBAL__N__40_tmpxft_00016dd9_00000000_6_force_cpp1_ii_f10283a0::coriolis_2nd_g<float>, _GLOBAL__N__40_tmpxft_00016dd9_00000000_6_force_cpp1_ii_f10283a0::coriolis_2nd_g<float>, _GLOBAL__N__40_tmpxft_00016dd9_00000000_6_force_cpp1_ii_f10283a0::coriolis_2nd_g<float>, int, int, int, int, int, int, int, int)
  18. 2.30% 380.71ms 6305 60.382us 58.112us 62.945us void _GLOBAL__N__41_tmpxft_00016d62_00000000_6_buffer_cpp1_ii_59972844::buffer_g<float>(float*, float, float, float, _GLOBAL__N__41_tmpxft_00016d62_00000000_6_buffer_cpp1_ii_59972844::buffer_g<float>, _GLOBAL__N__41_tmpxft_00016d62_00000000_6_buffer_cpp1_ii_59972844::buffer_g<float>, _GLOBAL__N__41_tmpxft_00016d62_00000000_6_buffer_cpp1_ii_59972844::buffer_g<float>, _GLOBAL__N__41_tmpxft_00016d62_00000000_6_buffer_cpp1_ii_59972844::buffer_g<float>, int, int, int, int, int, int, int, int)
  19. 2.15% 355.70ms 3783 94.024us 90.049us 103.84us [CUDA memcpy DtoD]
  20. 2.01% 332.90ms 2522 132.00us 131.04us 133.06us void _GLOBAL__N__40_tmpxft_00016dd9_00000000_6_force_cpp1_ii_f10283a0::advec_wls_2nd_g<float>(float*, float, _GLOBAL__N__40_tmpxft_00016dd9_00000000_6_force_cpp1_ii_f10283a0::advec_wls_2nd_g<float> const *, _GLOBAL__N__40_tmpxft_00016dd9_00000000_6_force_cpp1_ii_f10283a0::advec_wls_2nd_g<float> const , int, int, int, int, int, int, int, int)
  21. 1.73% 286.44ms 1261 227.16us 222.66us 234.56us void _GLOBAL__N__45_tmpxft_0001765e_00000000_6_diff_smag2_cpp1_ii_89250fe7::evisc_g<float>(float*, float, float, float, float, float, _GLOBAL__N__45_tmpxft_0001765e_00000000_6_diff_smag2_cpp1_ii_89250fe7::evisc_g<float>, _GLOBAL__N__45_tmpxft_0001765e_00000000_6_diff_smag2_cpp1_ii_89250fe7::evisc_g<float>, _GLOBAL__N__45_tmpxft_0001765e_00000000_6_diff_smag2_cpp1_ii_89250fe7::evisc_g<float>, int, int, int, int, int, int, int, int)
  22. 1.70% 281.19ms 2522 111.50us 110.50us 112.71us void _GLOBAL__N__40_tmpxft_00016dd9_00000000_6_force_cpp1_ii_f10283a0::large_scale_source_g<float>(float*, float, int, int, int, int, int, int, int, int)
  23. 1.63% 269.55ms 2522 106.88us 103.97us 110.15us void spVector0064C::kernelMem<unsigned int, float, fftDirection_t=1, unsigned int=16, unsigned int=4, LUT, ALL, WRITEBACK>(kernel_parameters_t<fft_mem_t, unsigned int, float>)
  24. 1.63% 269.23ms 2522 106.75us 103.26us 109.09us void spVector0064C::kernelMem<unsigned int, float, fftDirection_t=-1, unsigned int=16, unsigned int=4, LUT, ALL, WRITEBACK>(kernel_parameters_t<fft_mem_t, unsigned int, float>)
  25. 1.55% 256.09ms 2522 101.54us 99.777us 103.94us __nv_static_73__60_tmpxft_00004097_00000000_10_spRealComplex_compute_70_cpp1_ii_1f28721c__ZN13spRealComplex23preprocessC2C_kernelMemIjfL9fftAxii_t1EEEvP7ComplexIT0_EPKS4_T_15coordDivisors_tIS8_E7coord_tIS8_ESC_S8_S3_10callback_t
  26. 1.44% 238.67ms 2522 94.636us 92.448us 96.544us __nv_static_73__60_tmpxft_00004097_00000000_10_spRealComplex_compute_70_cpp1_ii_1f28721c__ZN13spRealComplex24postprocessC2C_kernelMemIjfL9fftAxii_t1EEEvP7ComplexIT0_EPKS4_T_15coordDivisors_tIS8_E7coord_tIS8_ESC_S8_S3_10callback_t
  27. 1.37% 226.96ms 1261 179.99us 177.83us 181.95us void _GLOBAL__N__47_tmpxft_00017641_00000000_6_thermo_moist_cpp1_ii_950297f2::calc_N2_g<float>(float*, float, float, float, int, int, int, int, int, int, int, int)
  28. 1.16% 191.88ms 1261 152.17us 151.17us 153.34us void _GLOBAL__N__41_tmpxft_00016e0e_00000000_6_pres_2_cpp1_ii_b9d3d3de::solve_in_g<float>(float*, float, float, float, float, float, float, float, float, int, int, int, int, int, int)
  29. 0.88% 146.41ms 1261 116.11us 114.85us 117.47us void _GLOBAL__N__41_tmpxft_00016e0e_00000000_6_pres_2_cpp1_ii_b9d3d3de::solve_out_g<float>(float*, float, int, int, int, int, int, int, int, int, int, int)
  30. 0.76% 126.31ms 1261 100.17us 99.617us 100.61us void _GLOBAL__N__39_tmpxft_0000edf4_00000000_6_pres_cpp1_ii_84c4c81a::normalize_g<float>(float*, int, int, int, _GLOBAL__N__39_tmpxft_0000edf4_00000000_6_pres_cpp1_ii_84c4c81a::normalize_g<float>)
  31. 0.55% 91.081ms 432 210.84us 207.36us 213.67us void _GLOBAL__N__42_tmpxft_00016cc6_00000000_6_advec_2_cpp1_ii_45b0b051::calc_cfl_g<float>(float*, float, float, float, float, _GLOBAL__N__42_tmpxft_00016cc6_00000000_6_advec_2_cpp1_ii_45b0b051::calc_cfl_g<float>, _GLOBAL__N__42_tmpxft_00016cc6_00000000_6_advec_2_cpp1_ii_45b0b051::calc_cfl_g<float>, int, int, int, int, int, int, int, int)
  32. 0.49% 80.313ms 16393 4.8990us 2.2080us 12.448us void _GLOBAL__N__50_tmpxft_0000ec38_00000000_6_boundary_cyclic_cpp1_ii_f45aa84e::boundary_cyclic_y_g<float>(float*, int, int, int, int, int, int, int, int, int)
  33. 0.36% 58.765ms 442 132.95us 132.23us 135.01us void Tools_g::reduce_interior_kernel<float, Tools_g::Reduce_type, int=64>(float const *, Tools_g::reduce_interior_kernel<float, Tools_g::Reduce_type, int=64>*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int)
  34. 0.28% 45.709ms 2522 18.124us 15.904us 20.608us void _GLOBAL__N__51_tmpxft_0001765c_00000000_6_boundary_surface_cpp1_ii_89250fe7::surfs_g<float>(float*, float, float, float, float, float, _GLOBAL__N__51_tmpxft_0001765c_00000000_6_boundary_surface_cpp1_ii_89250fe7::surfs_g<float>, _GLOBAL__N__51_tmpxft_0001765c_00000000_6_boundary_surface_cpp1_ii_89250fe7::surfs_g<float>, int, int, int, int, int, Boundary_type)
  35. 0.22% 36.131ms 2624 13.769us 992ns 2.7830ms [CUDA memcpy HtoD]
  36. 0.20% 32.354ms 1261 25.657us 24.576us 27.552us void _GLOBAL__N__51_tmpxft_0001765c_00000000_6_boundary_surface_cpp1_ii_89250fe7::surfm_flux_g<float>(float*, float, float, float, float, float, float, float, _GLOBAL__N__51_tmpxft_0001765c_00000000_6_boundary_surface_cpp1_ii_89250fe7::surfm_flux_g<float>, _GLOBAL__N__51_tmpxft_0001765c_00000000_6_boundary_surface_cpp1_ii_89250fe7::surfm_flux_g<float>, int, int, int, int, int, int, int, Boundary_type)
  37. 0.17% 28.866ms 3050 9.4640us 2.1120us 3.1697ms [CUDA memcpy DtoH]
  38. 0.12% 20.352ms 10 2.0352ms 2.0316ms 2.0580ms void _GLOBAL__N__41_tmpxft_00016dd0_00000000_6_fields_cpp1_ii_f10283a0::calc_tke_2nd_g<float>(float*, float, float, float, float, int, int, int, int, int, int, int, int)
  39. 0.12% 19.924ms 5044 3.9490us 3.6160us 8.9600us void _GLOBAL__N__43_tmpxft_00016d2c_00000000_6_boundary_cpp1_ii_59972844::calc_ghost_cells_top_2nd_g<float>(float*, float, Boundary_type, float, float, int, int, int)
  40. 0.09% 15.616ms 2522 6.1910us 5.0880us 10.080us void _GLOBAL__N__47_tmpxft_00017641_00000000_6_thermo_moist_cpp1_ii_950297f2::calc_buoyancy_flux_bot_g<float>(float*, float, float, float, float, float, int, int, int, int, int)
  41. 0.09% 14.945ms 5044 2.9620us 2.2080us 8.9280us void _GLOBAL__N__43_tmpxft_00016d2c_00000000_6_boundary_cpp1_ii_59972844::calc_ghost_cells_bot_2nd_g<float>(float*, float, Boundary_type, float, float, int, int, int)
  42. 0.07% 11.748ms 1261 9.3160us 8.8640us 15.616us void _GLOBAL__N__51_tmpxft_0001765c_00000000_6_boundary_surface_cpp1_ii_89250fe7::du_tot_g<float>(float*, float, float, float, float, int, int, int, int, int, int, int)
  43. 0.07% 10.852ms 1261 8.6060us 8.1600us 9.0240us void _GLOBAL__N__51_tmpxft_0001765c_00000000_6_boundary_surface_cpp1_ii_89250fe7::stability_g<float>(float*, float, float, float, float, float, float*, float*, int*, _GLOBAL__N__51_tmpxft_0001765c_00000000_6_boundary_surface_cpp1_ii_89250fe7::stability_g<float>, _GLOBAL__N__51_tmpxft_0001765c_00000000_6_boundary_surface_cpp1_ii_89250fe7::stability_g<float>, _GLOBAL__N__51_tmpxft_0001765c_00000000_6_boundary_surface_cpp1_ii_89250fe7::stability_g<float>, int, int, int, int, int, Boundary_type, int*)
  44. 0.06% 9.8687ms 3875 2.5460us 2.3680us 9.6320us void Tools_g::reduce_all_kernel<float, Tools_g::Reduce_type, int=64>(float const *, Tools_g::reduce_all_kernel<float, Tools_g::Reduce_type, int=64>*, unsigned int, unsigned int, Tools_g::reduce_all_kernel<float, Tools_g::Reduce_type, int=64>)
  45. 0.06% 9.5917ms 1261 7.6060us 6.9120us 9.0560us void _GLOBAL__N__47_tmpxft_00017641_00000000_6_thermo_moist_cpp1_ii_950297f2::calc_buoyancy_bot_g<float>(float*, float, float, float, float, float, float, float, int, int, int, int, int)
  46. 0.04% 7.3149ms 1261 5.8000us 5.3760us 11.744us void _GLOBAL__N__51_tmpxft_0001765c_00000000_6_boundary_surface_cpp1_ii_89250fe7::surfm_grad_g<float>(float*, float, float, float, float, float, _GLOBAL__N__51_tmpxft_0001765c_00000000_6_boundary_surface_cpp1_ii_89250fe7::surfm_grad_g<float>, int, int, int, int, int)
  47. 0.03% 4.6457ms 35 132.73us 131.84us 134.91us void Tools_g::reduce_interior_kernel<float, Tools_g::Reduce_type, int=64>(float const *, Tools_g::reduce_interior_kernel<float, Tools_g::Reduce_type, int=64>*, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int)
  48. 0.02% 2.6849ms 884 3.0370us 2.4000us 7.4240us void Tools_g::reduce_all_kernel<float, Tools_g::Reduce_type, int=64>(float const *, Tools_g::reduce_all_kernel<float, Tools_g::Reduce_type, int=64>*, unsigned int, unsigned int, Tools_g::reduce_all_kernel<float, Tools_g::Reduce_type, int=64>)
  49. 0.01% 2.2091ms 10 220.91us 219.94us 221.60us void _GLOBAL__N__41_tmpxft_00016e0e_00000000_6_pres_2_cpp1_ii_b9d3d3de::calc_divergence_g<float>(float*, float, float, float, float, float, float, _GLOBAL__N__41_tmpxft_00016e0e_00000000_6_pres_2_cpp1_ii_b9d3d3de::calc_divergence_g<float>, _GLOBAL__N__41_tmpxft_00016e0e_00000000_6_pres_2_cpp1_ii_b9d3d3de::calc_divergence_g<float>, int, int, int, int, int, int, int, int)
  50. 0.01% 2.1365ms 10 213.65us 212.51us 214.88us void _GLOBAL__N__41_tmpxft_00016dd0_00000000_6_fields_cpp1_ii_f10283a0::calc_mom_2nd_g<float>(float*, float, float, float, float, int, int, int, int, int, int, int, int)
  51. API calls: 55.95% 9.81214s 9454 1.0379ms 8.9790us 7.5793ms cudaMemcpy
  52. 28.93% 5.07278s 5044 1.0057ms 388.16us 2.5708ms cudaThreadSynchronize
  53. 7.62% 1.33656s 109095 12.251us 10.278us 1.4049ms cudaLaunch
  54. 3.20% 560.58ms 125 4.4847ms 1.1370us 542.08ms cudaFree
  55. 1.60% 281.15ms 1241720 226ns 158ns 1.4572ms cudaSetupArgument
  56. 1.57% 276.13ms 118 2.3401ms 5.4420us 270.23ms cudaMalloc
  57. 0.77% 134.97ms 10088 13.378us 11.665us 607.72us cudaLaunchKernel
  58. 0.22% 38.155ms 109095 349ns 265ns 588.83us cudaConfigureCall
  59. 0.06% 9.7390ms 5172 1.8830us 619ns 16.090us cudaFuncSetCacheConfig
  60. 0.02% 3.7240ms 8 465.50us 435.53us 592.56us cudaGetDeviceProperties
  61. 0.02% 3.6284ms 10088 359ns 167ns 6.9250us cudaPeekAtLastError
  62. 0.02% 3.3084ms 10088 327ns 266ns 25.721us cudaGetLastError
  63. 0.01% 2.1536ms 370 5.8200us 196ns 238.83us cuDeviceGetAttribute
  64. 0.01% 917.32us 4 229.33us 221.31us 241.11us cuDeviceTotalMem
  65. 0.00% 254.91us 1 254.91us 254.91us 254.91us cudaMemGetInfo
  66. 0.00% 209.24us 4 52.309us 40.561us 69.623us cuDeviceGetName
  67. 0.00% 163.10us 3 54.365us 45.208us 64.524us cudaMemcpy2D
  68. 0.00% 45.064us 48 938ns 415ns 5.3630us cudaGetDevice
  69. 0.00% 4.3520us 4 1.0880us 265ns 3.0280us cuDeviceGetCount
  70. 0.00% 2.7850us 6 464ns 271ns 925ns cuDeviceGet
  71. 0.00% 1.0380us 1 1.0380us 1.0380us 1.0380us cuInit
  72. 0.00% 824ns 1 824ns 824ns 824ns cuDriverGetVersion
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement