Guest User

Intel MPI benchmarks

a guest
Feb 13th, 2014
72
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #---------------------------------------------------
  2. # Intel (R) MPI Benchmark Suite V4.0.0, MPI-1 part
  3. #---------------------------------------------------
  4. # Date : Wed Jan 29 00:18:01 2014
  5. # Machine : x86_64
  6. # System : Linux
  7. # Release : 2.6.32-431.3.1.el6.x86_64
  8. # Version : #1 SMP Fri Jan 3 21:39:27 UTC 2014
  9. # MPI Version : 3.0
  10. # MPI Thread Environment:
  11.  
  12. # New default behavior from Version 3.2 on:
  13.  
  14. # the number of iterations per message size is cut down
  15. # dynamically when a certain run time (per message size sample)
  16. # is expected to be exceeded. Time limit is defined by variable
  17. # "SECS_PER_SAMPLE" (=> IMB_settings.h)
  18. # or through the flag => -time
  19.  
  20.  
  21.  
  22. # Calling sequence was:
  23.  
  24. # ./IMB-MPI1
  25.  
  26. # Minimum message length in bytes: 0
  27. # Maximum message length in bytes: 4194304
  28. #
  29. # MPI_Datatype : MPI_BYTE
  30. # MPI_Datatype for reductions : MPI_FLOAT
  31. # MPI_Op : MPI_SUM
  32. #
  33. #
  34.  
  35. # List of Benchmarks to run:
  36.  
  37. # PingPong
  38. # PingPing
  39. # Sendrecv
  40. # Exchange
  41. # Allreduce
  42. # Reduce
  43. # Reduce_scatter
  44. # Allgather
  45. # Allgatherv
  46. # Gather
  47. # Gatherv
  48. # Scatter
  49. # Scatterv
  50. # Alltoall
  51. # Alltoallv
  52. # Bcast
  53. # Barrier
  54.  
  55. #---------------------------------------------------
  56. # Benchmarking PingPong
  57. # #processes = 2
  58. # ( 6 additional processes waiting in MPI_Barrier)
  59. #---------------------------------------------------
  60. #bytes #repetitions t[usec] Mbytes/sec
  61. 0 1000 0.46 0.00
  62. 1 1000 0.45 2.10
  63. 2 1000 0.45 4.20
  64. 4 1000 0.46 8.34
  65. 8 1000 0.47 16.39
  66. 16 1000 0.46 32.99
  67. 32 1000 0.73 42.03
  68. 64 1000 0.73 83.55
  69. 128 1000 0.77 158.44
  70. 256 1000 0.79 309.41
  71. 512 1000 0.84 578.20
  72. 1024 1000 1.04 936.23
  73. 2048 1000 1.45 1351.15
  74. 4096 1000 1.86 2098.90
  75. 8192 1000 2.84 2749.45
  76. 16384 1000 5.46 2862.77
  77. 32768 1000 10.89 2870.55
  78. 65536 640 20.11 3107.52
  79. 131072 320 36.41 3433.45
  80. 262144 160 80.66 3099.46
  81. 524288 80 133.92 3733.62
  82. 1048576 40 250.66 3989.45
  83. 2097152 20 481.85 4150.67
  84. 4194304 10 1243.20 3217.51
  85.  
  86. #---------------------------------------------------
  87. # Benchmarking PingPing
  88. # #processes = 2
  89. # ( 6 additional processes waiting in MPI_Barrier)
  90. #---------------------------------------------------
  91. #bytes #repetitions t[usec] Mbytes/sec
  92. 0 1000 0.97 0.00
  93. 1 1000 1.06 0.90
  94. 2 1000 1.03 1.85
  95. 4 1000 1.07 3.58
  96. 8 1000 1.10 6.96
  97. 16 1000 1.08 14.08
  98. 32 1000 1.05 29.06
  99. 64 1000 1.08 56.78
  100. 128 1000 1.13 108.20
  101. 256 1000 1.16 210.83
  102. 512 1000 1.25 391.51
  103. 1024 1000 1.44 678.60
  104. 2048 1000 1.84 1064.31
  105. 4096 1000 2.45 1593.77
  106. 8192 1000 3.53 2210.62
  107. 16384 1000 6.58 2374.23
  108. 32768 1000 12.97 2408.66
  109. 65536 640 39.96 1564.08
  110. 131072 320 72.23 1730.56
  111. 262144 160 160.73 1555.39
  112. 524288 80 267.06 1872.23
  113. 1048576 40 494.60 2021.84
  114. 2097152 20 938.70 2130.60
  115. 4194304 10 2498.10 1601.22
  116.  
  117. #-----------------------------------------------------------------------------
  118. # Benchmarking Sendrecv
  119. # #processes = 2
  120. # ( 6 additional processes waiting in MPI_Barrier)
  121. #-----------------------------------------------------------------------------
  122. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec
  123. 0 1000 0.84 0.84 0.84 0.00
  124. 1 1000 0.94 0.94 0.94 2.03
  125. 2 1000 0.94 0.94 0.94 4.06
  126. 4 1000 0.97 0.97 0.97 7.89
  127. 8 1000 0.94 0.94 0.94 16.23
  128. 16 1000 0.97 0.97 0.97 31.47
  129. 32 1000 0.94 0.94 0.94 64.94
  130. 64 1000 0.97 0.97 0.97 125.83
  131. 128 1000 1.06 1.06 1.06 229.85
  132. 256 1000 1.01 1.01 1.01 485.77
  133. 512 1000 1.14 1.14 1.14 858.16
  134. 1024 1000 1.33 1.33 1.33 1467.31
  135. 2048 1000 1.80 1.80 1.80 2166.62
  136. 4096 1000 2.39 2.39 2.39 3266.35
  137. 8192 1000 3.51 3.51 3.51 4446.43
  138. 16384 1000 6.45 6.45 6.45 4842.68
  139. 32768 1000 12.36 12.36 12.36 5054.94
  140. 65536 640 39.24 39.24 39.24 3185.62
  141. 131072 320 71.65 71.66 71.66 3488.71
  142. 262144 160 156.84 156.87 156.86 3187.40
  143. 524288 80 260.42 260.49 260.46 3838.96
  144. 1048576 40 484.72 484.90 484.81 4124.55
  145. 2097152 20 936.54 936.85 936.70 4269.61
  146. 4194304 10 2453.11 2454.71 2453.91 3259.04
  147.  
  148. #-----------------------------------------------------------------------------
  149. # Benchmarking Sendrecv
  150. # #processes = 4
  151. # ( 4 additional processes waiting in MPI_Barrier)
  152. #-----------------------------------------------------------------------------
  153. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec
  154. 0 1000 1.04 1.05 1.05 0.00
  155. 1 1000 1.13 1.13 1.13 1.68
  156. 2 1000 1.26 1.26 1.26 3.03
  157. 4 1000 1.13 1.13 1.13 6.76
  158. 8 1000 1.13 1.13 1.13 13.53
  159. 16 1000 1.12 1.12 1.12 27.20
  160. 32 1000 1.21 1.21 1.21 50.40
  161. 64 1000 1.12 1.12 1.12 108.89
  162. 128 1000 1.14 1.14 1.14 213.60
  163. 256 1000 1.19 1.19 1.19 411.41
  164. 512 1000 1.44 1.44 1.44 679.16
  165. 1024 1000 1.70 1.70 1.70 1146.22
  166. 2048 1000 2.38 2.38 2.38 1639.22
  167. 4096 1000 3.34 3.34 3.34 2338.40
  168. 8192 1000 5.01 5.01 5.01 3118.09
  169. 16384 1000 9.20 9.21 9.20 3394.86
  170. 32768 1000 19.12 19.13 19.13 3266.96
  171. 65536 640 39.06 39.07 39.07 3199.13
  172. 131072 320 72.50 72.56 72.53 3445.44
  173. 262144 160 177.38 177.52 177.45 2816.60
  174. 524288 80 373.65 374.39 374.02 2671.04
  175. 1048576 40 749.90 750.13 750.00 2666.20
  176. 2097152 20 1892.29 1897.80 1895.07 2107.70
  177. 4194304 10 4288.48 4289.70 4289.10 1864.93
  178.  
  179. #-----------------------------------------------------------------------------
  180. # Benchmarking Sendrecv
  181. # #processes = 8
  182. #-----------------------------------------------------------------------------
  183. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec
  184. 0 1000 1.12 1.12 1.12 0.00
  185. 1 1000 1.20 1.20 1.20 1.59
  186. 2 1000 1.20 1.21 1.20 3.16
  187. 4 1000 1.20 1.20 1.20 6.34
  188. 8 1000 1.20 1.20 1.20 12.67
  189. 16 1000 1.19 1.19 1.19 25.58
  190. 32 1000 1.28 1.28 1.28 47.68
  191. 64 1000 1.20 1.21 1.21 101.13
  192. 128 1000 1.25 1.25 1.25 195.79
  193. 256 1000 1.30 1.30 1.30 376.19
  194. 512 1000 1.60 1.60 1.60 611.53
  195. 1024 1000 1.86 1.86 1.86 1048.37
  196. 2048 1000 2.77 2.77 2.77 1411.20
  197. 4096 1000 4.11 4.11 4.11 1899.04
  198. 8192 1000 7.31 7.31 7.31 2136.88
  199. 16384 1000 14.82 14.82 14.82 2108.49
  200. 32768 1000 31.58 31.60 31.59 1977.97
  201. 65536 640 55.07 55.10 55.09 2268.40
  202. 131072 320 106.59 106.69 106.63 2343.30
  203. 262144 160 259.56 260.19 259.92 1921.65
  204. 524288 80 518.66 520.28 519.49 1922.05
  205. 1048576 40 1437.77 1445.12 1442.72 1383.97
  206. 2097152 20 3733.15 3779.95 3764.56 1058.22
  207. 4194304 10 8054.49 8161.02 8119.88 980.27
  208.  
  209. #-----------------------------------------------------------------------------
  210. # Benchmarking Exchange
  211. # #processes = 2
  212. # ( 6 additional processes waiting in MPI_Barrier)
  213. #-----------------------------------------------------------------------------
  214. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec
  215. 0 1000 1.99 1.99 1.99 0.00
  216. 1 1000 2.08 2.08 2.08 1.83
  217. 2 1000 2.13 2.13 2.13 3.59
  218. 4 1000 2.14 2.14 2.14 7.12
  219. 8 1000 2.12 2.12 2.12 14.40
  220. 16 1000 2.13 2.13 2.13 28.59
  221. 32 1000 2.10 2.10 2.10 58.10
  222. 64 1000 2.14 2.14 2.14 113.98
  223. 128 1000 2.22 2.22 2.22 219.65
  224. 256 1000 2.34 2.34 2.34 416.98
  225. 512 1000 2.62 2.62 2.62 746.63
  226. 1024 1000 3.06 3.06 3.06 1276.11
  227. 2048 1000 3.49 3.49 3.49 2239.17
  228. 4096 1000 4.65 4.65 4.65 3360.82
  229. 8192 1000 7.06 7.06 7.06 4428.86
  230. 16384 1000 13.98 13.98 13.98 4469.71
  231. 32768 1000 27.34 27.34 27.34 4571.71
  232. 65536 640 80.08 80.08 80.08 3121.76
  233. 131072 320 144.74 144.75 144.74 3454.31
  234. 262144 160 321.46 321.49 321.47 3110.55
  235. 524288 80 540.01 540.07 540.04 3703.19
  236. 1048576 40 998.22 998.35 998.29 4006.60
  237. 2097152 20 2001.86 2002.20 2002.03 3995.60
  238. 4194304 10 4831.91 4832.70 4832.30 3310.78
  239.  
  240. #-----------------------------------------------------------------------------
  241. # Benchmarking Exchange
  242. # #processes = 4
  243. # ( 4 additional processes waiting in MPI_Barrier)
  244. #-----------------------------------------------------------------------------
  245. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec
  246. 0 1000 2.15 2.15 2.15 0.00
  247. 1 1000 2.28 2.28 2.28 1.67
  248. 2 1000 2.28 2.28 2.28 3.35
  249. 4 1000 2.36 2.36 2.36 6.47
  250. 8 1000 2.29 2.29 2.29 13.30
  251. 16 1000 2.34 2.35 2.35 26.02
  252. 32 1000 2.29 2.30 2.29 53.17
  253. 64 1000 2.39 2.39 2.39 101.98
  254. 128 1000 2.43 2.43 2.43 200.69
  255. 256 1000 2.61 2.61 2.61 374.17
  256. 512 1000 2.81 2.81 2.81 694.06
  257. 1024 1000 3.33 3.33 3.33 1173.05
  258. 2048 1000 4.25 4.25 4.25 1839.45
  259. 4096 1000 6.00 6.00 6.00 2604.15
  260. 8192 1000 9.61 9.61 9.61 3250.87
  261. 16384 1000 18.12 18.13 18.13 3447.36
  262. 32768 1000 37.60 37.61 37.60 3323.50
  263. 65536 640 102.80 102.83 102.82 2431.20
  264. 131072 320 195.68 195.74 195.71 2554.40
  265. 262144 160 432.88 433.20 433.08 2308.40
  266. 524288 80 831.40 831.54 831.47 2405.19
  267. 1048576 40 1575.80 1576.30 1576.04 2537.58
  268. 2097152 20 4041.35 4041.80 4041.54 1979.31
  269. 4194304 10 8601.90 8604.38 8603.14 1859.52
  270.  
  271. #-----------------------------------------------------------------------------
  272. # Benchmarking Exchange
  273. # #processes = 8
  274. #-----------------------------------------------------------------------------
  275. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec
  276. 0 1000 2.25 2.26 2.26 0.00
  277. 1 1000 2.31 2.31 2.31 1.65
  278. 2 1000 2.32 2.33 2.32 3.28
  279. 4 1000 2.38 2.39 2.39 6.39
  280. 8 1000 2.31 2.31 2.31 13.20
  281. 16 1000 2.39 2.39 2.39 25.52
  282. 32 1000 2.31 2.32 2.31 52.69
  283. 64 1000 2.42 2.42 2.42 100.85
  284. 128 1000 2.42 2.43 2.43 201.10
  285. 256 1000 2.61 2.62 2.61 373.31
  286. 512 1000 2.95 2.95 2.95 661.18
  287. 1024 1000 3.63 3.64 3.63 1074.29
  288. 2048 1000 5.15 5.15 5.15 1516.41
  289. 4096 1000 7.81 7.82 7.81 1999.15
  290. 8192 1000 14.03 14.04 14.04 2225.63
  291. 16384 1000 27.18 27.19 27.18 2298.88
  292. 32768 1000 55.46 55.49 55.48 2252.58
  293. 65536 640 165.41 165.46 165.44 1510.96
  294. 131072 320 324.96 325.25 325.13 1537.28
  295. 262144 160 705.85 707.58 707.13 1413.26
  296. 524288 80 1224.10 1230.12 1228.19 1625.85
  297. 1048576 40 3552.77 3555.55 3554.16 1125.00
  298. 2097152 20 8016.46 8028.21 8023.92 996.49
  299. 4194304 10 16013.79 16396.00 16229.31 975.85
  300.  
  301. #----------------------------------------------------------------
  302. # Benchmarking Allreduce
  303. # #processes = 2
  304. # ( 6 additional processes waiting in MPI_Barrier)
  305. #----------------------------------------------------------------
  306. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  307. 0 1000 0.05 0.05 0.05
  308. 4 1000 1.56 1.56 1.56
  309. 8 1000 1.60 1.60 1.60
  310. 16 1000 1.59 1.59 1.59
  311. 32 1000 1.64 1.64 1.64
  312. 64 1000 1.68 1.68 1.68
  313. 128 1000 1.92 1.92 1.92
  314. 256 1000 1.98 1.99 1.99
  315. 512 1000 2.29 2.29 2.29
  316. 1024 1000 2.79 2.79 2.79
  317. 2048 1000 3.48 3.48 3.48
  318. 4096 1000 6.63 6.64 6.64
  319. 8192 1000 9.45 9.45 9.45
  320. 16384 1000 16.13 16.13 16.13
  321. 32768 1000 30.03 30.04 30.03
  322. 65536 640 68.60 68.61 68.61
  323. 131072 320 126.49 126.50 126.49
  324. 262144 160 245.30 245.32 245.31
  325. 524288 80 513.14 513.19 513.16
  326. 1048576 40 992.87 993.03 992.95
  327. 2097152 20 2121.85 2122.15 2122.00
  328. 4194304 10 6768.51 6774.81 6771.66
  329.  
  330. #----------------------------------------------------------------
  331. # Benchmarking Allreduce
  332. # #processes = 4
  333. # ( 4 additional processes waiting in MPI_Barrier)
  334. #----------------------------------------------------------------
  335. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  336. 0 1000 0.05 0.05 0.05
  337. 4 1000 2.52 2.52 2.52
  338. 8 1000 2.52 2.52 2.52
  339. 16 1000 2.59 2.59 2.59
  340. 32 1000 2.70 2.71 2.70
  341. 64 1000 2.67 2.67 2.67
  342. 128 1000 3.05 3.05 3.05
  343. 256 1000 3.09 3.09 3.09
  344. 512 1000 3.85 3.85 3.85
  345. 1024 1000 4.35 4.36 4.36
  346. 2048 1000 5.67 5.67 5.67
  347. 4096 1000 10.85 10.85 10.85
  348. 8192 1000 15.49 15.49 15.49
  349. 16384 1000 26.79 26.80 26.80
  350. 32768 1000 49.49 49.50 49.50
  351. 65536 640 105.52 105.53 105.53
  352. 131072 320 204.24 204.25 204.25
  353. 262144 160 467.91 467.98 467.94
  354. 524288 80 998.75 998.86 998.81
  355. 1048576 40 2179.32 2179.60 2179.47
  356. 2097152 20 5642.70 5643.65 5643.17
  357. 4194304 10 14539.60 14540.91 14540.23
  358.  
  359. #----------------------------------------------------------------
  360. # Benchmarking Allreduce
  361. # #processes = 8
  362. #----------------------------------------------------------------
  363. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  364. 0 1000 0.05 0.05 0.05
  365. 4 1000 4.04 4.04 4.04
  366. 8 1000 3.98 3.98 3.98
  367. 16 1000 4.09 4.09 4.09
  368. 32 1000 4.11 4.11 4.11
  369. 64 1000 4.27 4.27 4.27
  370. 128 1000 4.62 4.63 4.62
  371. 256 1000 4.91 4.91 4.91
  372. 512 1000 5.98 5.99 5.98
  373. 1024 1000 7.48 7.48 7.48
  374. 2048 1000 9.65 9.65 9.65
  375. 4096 1000 16.77 16.77 16.77
  376. 8192 1000 24.55 24.55 24.55
  377. 16384 1000 44.51 44.52 44.51
  378. 32768 1000 78.76 78.76 78.76
  379. 65536 640 153.80 153.82 153.81
  380. 131072 320 355.72 355.78 355.75
  381. 262144 160 850.33 850.49 850.43
  382. 524288 80 1972.30 1972.65 1972.48
  383. 1048576 40 5503.93 5507.25 5505.62
  384. 2097152 20 11941.65 11946.61 11944.48
  385. 4194304 10 28002.21 28030.40 28014.63
  386.  
  387. #----------------------------------------------------------------
  388. # Benchmarking Reduce
  389. # #processes = 2
  390. # ( 6 additional processes waiting in MPI_Barrier)
  391. #----------------------------------------------------------------
  392. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  393. 0 1000 0.05 0.05 0.05
  394. 4 1000 0.92 0.92 0.92
  395. 8 1000 0.93 0.93 0.93
  396. 16 1000 0.95 0.95 0.95
  397. 32 1000 1.00 1.01 1.00
  398. 64 1000 1.03 1.03 1.03
  399. 128 1000 1.09 1.09 1.09
  400. 256 1000 1.15 1.15 1.15
  401. 512 1000 1.48 1.48 1.48
  402. 1024 1000 1.77 1.77 1.77
  403. 2048 1000 2.26 2.26 2.26
  404. 4096 1000 4.80 4.80 4.80
  405. 8192 1000 6.97 6.97 6.97
  406. 16384 1000 11.01 11.01 11.01
  407. 32768 1000 20.00 20.01 20.01
  408. 65536 640 52.01 52.01 52.01
  409. 131072 320 100.92 100.93 100.93
  410. 262144 160 188.37 188.38 188.38
  411. 524288 80 414.40 414.45 414.42
  412. 1048576 40 896.05 896.13 896.09
  413. 2097152 20 2044.94 2045.19 2045.07
  414. 4194304 10 6881.09 6901.10 6891.10
  415.  
  416. #----------------------------------------------------------------
  417. # Benchmarking Reduce
  418. # #processes = 4
  419. # ( 4 additional processes waiting in MPI_Barrier)
  420. #----------------------------------------------------------------
  421. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  422. 0 1000 0.05 0.05 0.05
  423. 4 1000 1.39 1.39 1.39
  424. 8 1000 1.40 1.40 1.40
  425. 16 1000 1.44 1.45 1.44
  426. 32 1000 1.46 1.46 1.46
  427. 64 1000 1.51 1.52 1.52
  428. 128 1000 1.75 1.75 1.75
  429. 256 1000 1.82 1.82 1.82
  430. 512 1000 2.18 2.18 2.18
  431. 1024 1000 2.80 2.80 2.80
  432. 2048 1000 3.64 3.64 3.64
  433. 4096 1000 7.63 7.63 7.63
  434. 8192 1000 10.63 10.63 10.63
  435. 16384 1000 16.56 16.57 16.56
  436. 32768 1000 29.66 29.66 29.66
  437. 65536 640 62.49 62.51 62.50
  438. 131072 320 140.49 140.57 140.53
  439. 262144 160 294.73 294.98 294.85
  440. 524288 80 701.49 702.90 702.28
  441. 1048576 40 1642.48 1648.00 1645.70
  442. 2097152 20 4023.40 4050.74 4040.68
  443. 4194304 10 11699.49 11800.48 11766.01
  444.  
  445. #----------------------------------------------------------------
  446. # Benchmarking Reduce
  447. # #processes = 8
  448. #----------------------------------------------------------------
  449. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  450. 0 1000 0.05 0.05 0.05
  451. 4 1000 2.06 2.06 2.06
  452. 8 1000 2.20 2.20 2.20
  453. 16 1000 2.12 2.13 2.12
  454. 32 1000 2.16 2.17 2.16
  455. 64 1000 2.26 2.26 2.26
  456. 128 1000 2.46 2.46 2.46
  457. 256 1000 2.70 2.70 2.70
  458. 512 1000 3.20 3.20 3.20
  459. 1024 1000 4.00 4.01 4.00
  460. 2048 1000 5.53 5.54 5.54
  461. 4096 1000 10.90 10.91 10.91
  462. 8192 1000 15.05 15.05 15.05
  463. 16384 1000 23.66 23.67 23.66
  464. 32768 1000 41.62 41.64 41.63
  465. 65536 640 80.62 80.65 80.63
  466. 131072 320 188.97 189.12 189.04
  467. 262144 160 410.24 410.85 410.54
  468. 524288 80 1022.11 1023.65 1022.69
  469. 1048576 40 3534.05 3541.32 3537.00
  470. 2097152 20 7947.90 7988.14 7970.06
  471. 4194304 10 20539.59 20719.72 20644.02
  472.  
  473. #----------------------------------------------------------------
  474. # Benchmarking Reduce_scatter
  475. # #processes = 2
  476. # ( 6 additional processes waiting in MPI_Barrier)
  477. #----------------------------------------------------------------
  478. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  479. 0 1000 0.12 0.12 0.12
  480. 4 1000 1.18 1.26 1.22
  481. 8 1000 1.41 1.41 1.41
  482. 16 1000 1.41 1.41 1.41
  483. 32 1000 1.41 1.41 1.41
  484. 64 1000 1.41 1.41 1.41
  485. 128 1000 1.72 1.72 1.72
  486. 256 1000 1.70 1.70 1.70
  487. 512 1000 1.79 1.79 1.79
  488. 1024 1000 2.09 2.09 2.09
  489. 2048 1000 2.53 2.53 2.53
  490. 4096 1000 3.29 3.29 3.29
  491. 8192 1000 4.85 4.85 4.85
  492. 16384 1000 7.89 7.89 7.89
  493. 32768 1000 14.22 14.22 14.22
  494. 65536 640 47.40 47.40 47.40
  495. 131072 320 80.98 80.98 80.98
  496. 262144 160 152.62 152.63 152.62
  497. 524288 80 227.06 227.14 227.10
  498. 1048576 40 416.37 416.50 416.44
  499. 2097152 20 838.45 838.45 838.45
  500. 4194304 10 2020.50 2020.60 2020.55
  501.  
  502. #----------------------------------------------------------------
  503. # Benchmarking Reduce_scatter
  504. # #processes = 4
  505. # ( 4 additional processes waiting in MPI_Barrier)
  506. #----------------------------------------------------------------
  507. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  508. 0 1000 0.12 0.12 0.12
  509. 4 1000 1.21 2.54 2.10
  510. 8 1000 2.15 2.29 2.22
  511. 16 1000 2.55 2.55 2.55
  512. 32 1000 2.63 2.63 2.63
  513. 64 1000 2.58 2.58 2.58
  514. 128 1000 2.82 2.82 2.82
  515. 256 1000 2.82 2.82 2.82
  516. 512 1000 3.04 3.04 3.04
  517. 1024 1000 3.48 3.48 3.48
  518. 2048 1000 4.09 4.09 4.09
  519. 4096 1000 5.48 5.49 5.49
  520. 8192 1000 7.84 7.84 7.84
  521. 16384 1000 12.46 12.46 12.46
  522. 32768 1000 21.21 21.21 21.21
  523. 65536 640 43.04 43.05 43.05
  524. 131072 320 102.09 102.12 102.10
  525. 262144 160 215.66 215.69 215.67
  526. 524288 80 368.15 368.25 368.21
  527. 1048576 40 856.85 858.80 857.87
  528. 2097152 20 1823.79 1829.49 1826.85
  529. 4194304 10 4336.19 4355.98 4346.69
  530.  
  531. #----------------------------------------------------------------
  532. # Benchmarking Reduce_scatter
  533. # #processes = 8
  534. #----------------------------------------------------------------
  535. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  536. 0 1000 0.13 0.13 0.13
  537. 4 1000 0.96 5.60 3.44
  538. 8 1000 0.96 4.13 3.20
  539. 16 1000 3.34 4.00 3.71
  540. 32 1000 3.96 3.96 3.96
  541. 64 1000 3.98 3.98 3.98
  542. 128 1000 4.10 4.10 4.10
  543. 256 1000 4.27 4.27 4.27
  544. 512 1000 4.47 4.48 4.47
  545. 1024 1000 5.22 5.22 5.22
  546. 2048 1000 6.21 6.22 6.22
  547. 4096 1000 8.25 8.25 8.25
  548. 8192 1000 12.05 12.05 12.05
  549. 16384 1000 19.89 19.89 19.89
  550. 32768 1000 35.01 35.01 35.01
  551. 65536 640 68.90 68.91 68.90
  552. 131072 320 165.99 166.02 166.01
  553. 262144 160 374.51 374.59 374.55
  554. 524288 80 604.86 605.04 604.96
  555. 1048576 40 1299.40 1300.72 1300.08
  556. 2097152 20 3310.66 3321.25 3316.55
  557. 4194304 10 8290.41 8309.01 8303.02
  558.  
  559. #----------------------------------------------------------------
  560. # Benchmarking Allgather
  561. # #processes = 2
  562. # ( 6 additional processes waiting in MPI_Barrier)
  563. #----------------------------------------------------------------
  564. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  565. 0 1000 0.05 0.05 0.05
  566. 1 1000 1.10 1.10 1.10
  567. 2 1000 1.10 1.10 1.10
  568. 4 1000 1.21 1.21 1.21
  569. 8 1000 1.11 1.11 1.11
  570. 16 1000 1.10 1.10 1.10
  571. 32 1000 1.10 1.10 1.10
  572. 64 1000 1.19 1.19 1.19
  573. 128 1000 1.15 1.15 1.15
  574. 256 1000 1.17 1.17 1.17
  575. 512 1000 1.33 1.33 1.33
  576. 1024 1000 1.61 1.61 1.61
  577. 2048 1000 1.96 1.96 1.96
  578. 4096 1000 2.81 2.81 2.81
  579. 8192 1000 4.09 4.09 4.09
  580. 16384 1000 7.91 7.91 7.91
  581. 32768 1000 15.49 15.49 15.49
  582. 65536 640 46.31 46.31 46.31
  583. 131072 320 84.32 84.33 84.33
  584. 262144 160 182.63 182.65 182.64
  585. 524288 80 315.86 315.93 315.89
  586. 1048576 40 656.21 656.38 656.29
  587. 2097152 20 1391.55 1391.85 1391.70
  588. 4194304 10 3450.61 3453.40 3452.00
  589.  
  590. #----------------------------------------------------------------
  591. # Benchmarking Allgather
  592. # #processes = 4
  593. # ( 4 additional processes waiting in MPI_Barrier)
  594. #----------------------------------------------------------------
  595. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  596. 0 1000 0.05 0.05 0.05
  597. 1 1000 2.25 2.25 2.25
  598. 2 1000 2.24 2.24 2.24
  599. 4 1000 2.24 2.24 2.24
  600. 8 1000 2.31 2.32 2.31
  601. 16 1000 2.24 2.24 2.24
  602. 32 1000 2.37 2.37 2.37
  603. 64 1000 2.27 2.27 2.27
  604. 128 1000 2.39 2.39 2.39
  605. 256 1000 2.44 2.44 2.44
  606. 512 1000 2.87 2.88 2.87
  607. 1024 1000 3.42 3.42 3.42
  608. 2048 1000 4.80 4.81 4.81
  609. 4096 1000 7.25 7.25 7.25
  610. 8192 1000 11.89 11.89 11.89
  611. 16384 1000 22.67 22.68 22.68
  612. 32768 1000 57.86 57.88 57.87
  613. 65536 640 129.08 129.14 129.11
  614. 131072 320 241.88 241.96 241.92
  615. 262144 160 699.59 699.63 699.61
  616. 524288 80 1373.05 1373.21 1373.13
  617. 1048576 40 3774.70 3775.23 3774.95
  618. 2097152 20 7720.65 7722.50 7721.57
  619. 4194304 10 15470.00 15481.28 15475.82
  620.  
  621. #----------------------------------------------------------------
  622. # Benchmarking Allgather
  623. # #processes = 8
  624. #----------------------------------------------------------------
  625. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  626. 0 1000 0.05 0.05 0.05
  627. 1 1000 3.75 3.75 3.75
  628. 2 1000 3.65 3.66 3.66
  629. 4 1000 3.63 3.63 3.63
  630. 8 1000 3.65 3.65 3.65
  631. 16 1000 3.66 3.66 3.66
  632. 32 1000 3.70 3.71 3.71
  633. 64 1000 3.77 3.77 3.77
  634. 128 1000 4.09 4.09 4.09
  635. 256 1000 4.70 4.70 4.70
  636. 512 1000 6.13 6.14 6.13
  637. 1024 1000 8.90 8.90 8.90
  638. 2048 1000 14.33 14.33 14.33
  639. 4096 1000 26.08 26.08 26.08
  640. 8192 1000 51.01 51.01 51.01
  641. 16384 1000 118.31 118.33 118.32
  642. 32768 1000 249.35 249.38 249.36
  643. 65536 640 392.33 392.36 392.35
  644. 131072 320 788.99 789.26 789.16
  645. 262144 160 3394.36 3395.83 3395.28
  646. 524288 80 6876.10 6879.94 6878.27
  647. 1048576 40 14927.67 14930.95 14929.20
  648. 2097152 20 30903.20 30941.10 30928.29
  649. 4194304 10 60859.32 61331.49 61183.52
  650.  
  651. #----------------------------------------------------------------
  652. # Benchmarking Allgatherv
  653. # #processes = 2
  654. # ( 6 additional processes waiting in MPI_Barrier)
  655. #----------------------------------------------------------------
  656. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  657. 0 1000 0.07 0.07 0.07
  658. 1 1000 1.31 1.31 1.31
  659. 2 1000 1.31 1.31 1.31
  660. 4 1000 1.30 1.30 1.30
  661. 8 1000 1.30 1.31 1.30
  662. 16 1000 1.33 1.33 1.33
  663. 32 1000 1.36 1.37 1.36
  664. 64 1000 1.42 1.42 1.42
  665. 128 1000 1.45 1.45 1.45
  666. 256 1000 1.52 1.52 1.52
  667. 512 1000 1.81 1.81 1.81
  668. 1024 1000 2.10 2.10 2.10
  669. 2048 1000 2.49 2.49 2.49
  670. 4096 1000 3.39 3.39 3.39
  671. 8192 1000 5.36 5.36 5.36
  672. 16384 1000 10.78 10.78 10.78
  673. 32768 1000 21.62 21.62 21.62
  674. 65536 640 59.87 59.87 59.87
  675. 131072 320 112.05 112.06 112.05
  676. 262144 160 183.67 183.69 183.68
  677. 524288 80 318.09 318.16 318.12
  678. 1048576 40 659.63 659.82 659.73
  679. 2097152 20 1377.19 1377.45 1377.32
  680. 4194304 10 3435.02 3437.11 3436.06
  681.  
  682. #----------------------------------------------------------------
  683. # Benchmarking Allgatherv
  684. # #processes = 4
  685. # ( 4 additional processes waiting in MPI_Barrier)
  686. #----------------------------------------------------------------
  687. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  688. 0 1000 0.08 0.08 0.08
  689. 1 1000 2.53 2.53 2.53
  690. 2 1000 2.57 2.57 2.57
  691. 4 1000 2.54 2.54 2.54
  692. 8 1000 2.66 2.66 2.66
  693. 16 1000 2.54 2.55 2.54
  694. 32 1000 2.70 2.70 2.70
  695. 64 1000 2.64 2.64 2.64
  696. 128 1000 2.83 2.83 2.83
  697. 256 1000 2.94 2.94 2.94
  698. 512 1000 3.37 3.37 3.37
  699. 1024 1000 4.26 4.26 4.26
  700. 2048 1000 6.06 6.06 6.06
  701. 4096 1000 9.39 9.39 9.39
  702. 8192 1000 16.04 16.04 16.04
  703. 16384 1000 30.31 30.31 30.31
  704. 32768 1000 70.36 70.38 70.37
  705. 65536 640 158.52 158.55 158.54
  706. 131072 320 238.62 238.72 238.69
  707. 262144 160 700.48 700.55 700.51
  708. 524288 80 1366.21 1366.26 1366.24
  709. 1048576 40 3778.15 3778.65 3778.38
  710. 2097152 20 7712.79 7713.65 7713.19
  711. 4194304 10 15616.99 15630.10 15623.87
  712.  
  713. #----------------------------------------------------------------
  714. # Benchmarking Allgatherv
  715. # #processes = 8
  716. #----------------------------------------------------------------
  717. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  718. 0 1000 0.09 0.10 0.09
  719. 1 1000 4.26 4.26 4.26
  720. 2 1000 4.15 4.16 4.16
  721. 4 1000 4.16 4.17 4.17
  722. 8 1000 4.18 4.18 4.18
  723. 16 1000 4.27 4.27 4.27
  724. 32 1000 4.28 4.29 4.28
  725. 64 1000 4.43 4.43 4.43
  726. 128 1000 4.90 4.90 4.90
  727. 256 1000 5.60 5.60 5.60
  728. 512 1000 7.16 7.16 7.16
  729. 1024 1000 10.21 10.21 10.21
  730. 2048 1000 16.72 16.72 16.72
  731. 4096 1000 30.04 30.05 30.04
  732. 8192 1000 58.65 58.65 58.65
  733. 16384 1000 134.09 134.11 134.10
  734. 32768 1000 295.89 295.90 295.89
  735. 65536 640 392.54 392.57 392.55
  736. 131072 320 789.71 790.04 789.87
  737. 262144 160 3391.75 3392.43 3392.17
  738. 524288 80 6859.45 6864.29 6862.02
  739. 1048576 40 14924.20 14930.00 14928.14
  740. 2097152 20 30915.45 30939.05 30928.81
  741. 4194304 10 61120.70 61218.91 61181.18
  742.  
  743. #----------------------------------------------------------------
  744. # Benchmarking Gather
  745. # #processes = 2
  746. # ( 6 additional processes waiting in MPI_Barrier)
  747. #----------------------------------------------------------------
  748. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  749. 0 1000 0.05 0.05 0.05
  750. 1 1000 0.71 0.71 0.71
  751. 2 1000 0.72 0.72 0.72
  752. 4 1000 0.73 0.73 0.73
  753. 8 1000 0.73 0.73 0.73
  754. 16 1000 0.74 0.74 0.74
  755. 32 1000 0.81 0.81 0.81
  756. 64 1000 0.84 0.84 0.84
  757. 128 1000 0.88 0.88 0.88
  758. 256 1000 0.90 0.90 0.90
  759. 512 1000 1.09 1.09 1.09
  760. 1024 1000 1.08 1.08 1.08
  761. 2048 1000 1.37 1.37 1.37
  762. 4096 1000 1.83 1.83 1.83
  763. 8192 1000 2.85 2.85 2.85
  764. 16384 1000 5.58 5.59 5.58
  765. 32768 1000 10.94 10.94 10.94
  766. 65536 640 24.26 24.26 24.26
  767. 131072 320 47.31 47.32 47.32
  768. 262144 160 101.83 101.86 101.84
  769. 524288 80 180.91 180.95 180.93
  770. 1048576 40 353.10 353.22 353.16
  771. 2097152 20 774.50 774.80 774.65
  772. 4194304 10 1839.61 1844.31 1841.96
  773.  
  774. #----------------------------------------------------------------
  775. # Benchmarking Gather
  776. # #processes = 4
  777. # ( 4 additional processes waiting in MPI_Barrier)
  778. #----------------------------------------------------------------
  779. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  780. 0 1000 0.05 0.05 0.05
  781. 1 1000 1.25 1.25 1.25
  782. 2 1000 1.26 1.26 1.26
  783. 4 1000 1.37 1.38 1.38
  784. 8 1000 1.30 1.30 1.30
  785. 16 1000 1.31 1.32 1.31
  786. 32 1000 1.38 1.38 1.38
  787. 64 1000 1.40 1.41 1.40
  788. 128 1000 1.49 1.49 1.49
  789. 256 1000 1.64 1.65 1.64
  790. 512 1000 2.05 2.05 2.05
  791. 1024 1000 3.61 3.61 3.61
  792. 2048 1000 4.23 4.24 4.23
  793. 4096 1000 5.89 5.89 5.89
  794. 8192 1000 8.82 8.83 8.83
  795. 16384 1000 16.51 16.53 16.52
  796. 32768 1000 31.59 31.62 31.61
  797. 65536 640 51.05 51.12 51.09
  798. 131072 320 111.65 111.93 111.79
  799. 262144 160 244.10 245.03 244.57
  800. 524288 80 497.10 500.36 498.72
  801. 1048576 40 1151.17 1166.75 1158.88
  802. 2097152 20 2299.70 2360.09 2329.34
  803. 4194304 10 4630.61 4886.10 4757.38
  804.  
  805. #----------------------------------------------------------------
  806. # Benchmarking Gather
  807. # #processes = 8
  808. #----------------------------------------------------------------
  809. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  810. 0 1000 0.05 0.05 0.05
  811. 1 1000 1.96 1.96 1.96
  812. 2 1000 2.09 2.09 2.09
  813. 4 1000 2.01 2.01 2.01
  814. 8 1000 2.12 2.13 2.12
  815. 16 1000 2.10 2.10 2.10
  816. 32 1000 2.20 2.21 2.21
  817. 64 1000 2.41 2.42 2.42
  818. 128 1000 2.47 2.47 2.47
  819. 256 1000 2.91 2.91 2.91
  820. 512 1000 3.46 3.46 3.46
  821. 1024 1000 7.24 7.25 7.25
  822. 2048 1000 9.08 9.09 9.09
  823. 4096 1000 13.23 13.25 13.24
  824. 8192 1000 21.90 21.94 21.92
  825. 16384 1000 40.83 40.89 40.86
  826. 32768 1000 73.23 73.33 73.28
  827. 65536 640 155.23 155.56 155.40
  828. 131072 320 369.95 371.39 370.70
  829. 262144 160 775.80 781.72 778.94
  830. 524288 80 1537.24 1560.20 1549.16
  831. 1048576 40 3110.72 3205.42 3160.99
  832. 2097152 20 6201.99 6593.00 6411.09
  833. 4194304 10 11670.59 13161.80 12425.23
  834.  
  835. #----------------------------------------------------------------
  836. # Benchmarking Gatherv
  837. # #processes = 2
  838. # ( 6 additional processes waiting in MPI_Barrier)
  839. #----------------------------------------------------------------
  840. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  841. 0 1000 0.11 0.12 0.11
  842. 1 1000 0.70 0.70 0.70
  843. 2 1000 0.70 0.70 0.70
  844. 4 1000 0.70 0.70 0.70
  845. 8 1000 0.71 0.71 0.71
  846. 16 1000 0.71 0.71 0.71
  847. 32 1000 0.77 0.77 0.77
  848. 64 1000 0.82 0.82 0.82
  849. 128 1000 0.84 0.84 0.84
  850. 256 1000 0.82 0.82 0.82
  851. 512 1000 0.96 0.97 0.96
  852. 1024 1000 1.14 1.14 1.14
  853. 2048 1000 1.41 1.41 1.41
  854. 4096 1000 1.89 1.90 1.90
  855. 8192 1000 2.93 2.94 2.93
  856. 16384 1000 5.71 5.71 5.71
  857. 32768 1000 11.06 11.06 11.06
  858. 65536 640 24.33 24.33 24.33
  859. 131072 320 47.31 47.32 47.31
  860. 262144 160 101.76 101.78 101.77
  861. 524288 80 180.86 180.90 180.88
  862. 1048576 40 353.40 353.52 353.46
  863. 2097152 20 774.05 774.30 774.17
  864. 4194304 10 1824.09 1829.20 1826.64
  865.  
  866. #----------------------------------------------------------------
  867. # Benchmarking Gatherv
  868. # #processes = 4
  869. # ( 4 additional processes waiting in MPI_Barrier)
  870. #----------------------------------------------------------------
  871. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  872. 0 1000 0.09 0.12 0.10
  873. 1 1000 0.80 0.80 0.80
  874. 2 1000 0.81 0.81 0.81
  875. 4 1000 0.80 0.81 0.80
  876. 8 1000 0.92 0.92 0.92
  877. 16 1000 0.83 0.83 0.83
  878. 32 1000 0.83 0.83 0.83
  879. 64 1000 0.84 0.84 0.84
  880. 128 1000 0.87 0.88 0.88
  881. 256 1000 0.91 0.91 0.91
  882. 512 1000 1.06 1.06 1.06
  883. 1024 1000 1.21 1.22 1.22
  884. 2048 1000 1.53 1.54 1.54
  885. 4096 1000 2.27 2.27 2.27
  886. 8192 1000 3.55 3.56 3.55
  887. 16384 1000 6.75 6.76 6.75
  888. 32768 1000 13.18 13.20 13.19
  889. 65536 640 40.04 40.10 40.08
  890. 131072 320 78.20 78.43 78.35
  891. 262144 160 178.00 179.04 178.64
  892. 524288 80 308.64 312.06 310.74
  893. 1048576 40 831.82 847.35 840.99
  894. 2097152 20 1625.25 1687.85 1662.77
  895. 4194304 10 3433.99 3680.30 3584.97
  896.  
  897. #----------------------------------------------------------------
  898. # Benchmarking Gatherv
  899. # #processes = 8
  900. #----------------------------------------------------------------
  901. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  902. 0 1000 0.07 0.09 0.09
  903. 1 1000 0.93 0.93 0.93
  904. 2 1000 0.93 0.93 0.93
  905. 4 1000 0.94 0.94 0.94
  906. 8 1000 0.94 0.95 0.95
  907. 16 1000 0.95 0.96 0.95
  908. 32 1000 0.96 0.97 0.96
  909. 64 1000 0.99 0.99 0.99
  910. 128 1000 1.03 1.03 1.03
  911. 256 1000 1.09 1.09 1.09
  912. 512 1000 1.25 1.25 1.25
  913. 1024 1000 1.50 1.50 1.50
  914. 2048 1000 2.14 2.15 2.15
  915. 4096 1000 3.14 3.15 3.15
  916. 8192 1000 5.41 5.43 5.42
  917. 16384 1000 10.48 10.50 10.49
  918. 32768 1000 21.99 22.04 22.02
  919. 65536 640 69.66 69.90 69.80
  920. 131072 320 157.68 158.74 158.32
  921. 262144 160 465.79 472.29 469.73
  922. 524288 80 864.37 884.21 876.38
  923. 1048576 40 1688.65 1755.88 1728.63
  924. 2097152 20 3179.30 3517.81 3397.21
  925. 4194304 10 6382.01 7625.82 7182.30
  926.  
  927. #----------------------------------------------------------------
  928. # Benchmarking Scatter
  929. # #processes = 2
  930. # ( 6 additional processes waiting in MPI_Barrier)
  931. #----------------------------------------------------------------
  932. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  933. 0 1000 0.05 0.05 0.05
  934. 1 1000 0.72 0.72 0.72
  935. 2 1000 0.72 0.72 0.72
  936. 4 1000 0.72 0.72 0.72
  937. 8 1000 0.72 0.72 0.72
  938. 16 1000 0.74 0.74 0.74
  939. 32 1000 0.79 0.79 0.79
  940. 64 1000 0.82 0.83 0.83
  941. 128 1000 0.86 0.86 0.86
  942. 256 1000 0.89 0.89 0.89
  943. 512 1000 1.12 1.12 1.12
  944. 1024 1000 1.23 1.23 1.23
  945. 2048 1000 1.53 1.53 1.53
  946. 4096 1000 2.18 2.18 2.18
  947. 8192 1000 3.45 3.45 3.45
  948. 16384 1000 6.93 6.93 6.93
  949. 32768 1000 13.94 13.94 13.94
  950. 65536 640 28.97 28.97 28.97
  951. 131072 320 55.11 55.14 55.12
  952. 262144 160 117.25 117.37 117.31
  953. 524288 80 241.49 242.37 241.93
  954. 1048576 40 506.40 510.48 508.44
  955. 2097152 20 1082.75 1099.35 1091.05
  956. 4194304 10 2394.10 2463.51 2428.81
  957.  
  958. #----------------------------------------------------------------
  959. # Benchmarking Scatter
  960. # #processes = 4
  961. # ( 4 additional processes waiting in MPI_Barrier)
  962. #----------------------------------------------------------------
  963. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  964. 0 1000 0.05 0.06 0.05
  965. 1 1000 1.29 1.29 1.29
  966. 2 1000 1.28 1.28 1.28
  967. 4 1000 1.29 1.30 1.30
  968. 8 1000 1.29 1.30 1.30
  969. 16 1000 1.33 1.34 1.34
  970. 32 1000 1.36 1.36 1.36
  971. 64 1000 1.40 1.40 1.40
  972. 128 1000 1.44 1.44 1.44
  973. 256 1000 1.53 1.53 1.53
  974. 512 1000 1.79 1.79 1.79
  975. 1024 1000 2.01 2.02 2.02
  976. 2048 1000 2.85 2.85 2.85
  977. 4096 1000 4.03 4.03 4.03
  978. 8192 1000 7.68 7.68 7.68
  979. 16384 1000 15.60 15.61 15.61
  980. 32768 1000 37.31 37.32 37.32
  981. 65536 640 67.60 67.60 67.60
  982. 131072 320 153.65 153.68 153.67
  983. 262144 160 344.84 345.05 344.92
  984. 524288 80 805.07 806.24 805.53
  985. 1048576 40 2279.42 2283.90 2281.43
  986. 2097152 20 5501.95 5529.34 5516.18
  987. 4194304 10 7102.30 7210.21 7156.35
  988.  
  989. #----------------------------------------------------------------
  990. # Benchmarking Scatter
  991. # #processes = 8
  992. #----------------------------------------------------------------
  993. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  994. 0 1000 0.05 0.05 0.05
  995. 1 1000 1.45 1.45 1.45
  996. 2 1000 1.46 1.46 1.46
  997. 4 1000 1.47 1.48 1.47
  998. 8 1000 1.50 1.50 1.50
  999. 16 1000 1.53 1.53 1.53
  1000. 32 1000 1.65 1.66 1.65
  1001. 64 1000 1.61 1.62 1.61
  1002. 128 1000 1.68 1.69 1.69
  1003. 256 1000 1.93 1.93 1.93
  1004. 512 1000 2.24 2.24 2.24
  1005. 1024 1000 3.32 3.33 3.32
  1006. 2048 1000 5.13 5.13 5.13
  1007. 4096 1000 9.61 9.61 9.61
  1008. 8192 1000 19.13 19.14 19.13
  1009. 16384 1000 43.45 43.45 43.45
  1010. 32768 1000 99.50 99.50 99.50
  1011. 65536 640 228.74 228.75 228.74
  1012. 131072 320 524.25 524.32 524.28
  1013. 262144 160 1117.36 1118.07 1117.68
  1014. 524288 80 2411.41 2414.85 2413.20
  1015. 1048576 40 5029.48 5049.07 5038.80
  1016. 2097152 20 9665.39 9751.45 9708.51
  1017. 4194304 10 25193.00 25567.22 25429.36
  1018.  
  1019. #----------------------------------------------------------------
  1020. # Benchmarking Scatterv
  1021. # #processes = 2
  1022. # ( 6 additional processes waiting in MPI_Barrier)
  1023. #----------------------------------------------------------------
  1024. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  1025. 0 1000 0.11 0.11 0.11
  1026. 1 1000 0.82 0.82 0.82
  1027. 2 1000 0.83 0.83 0.83
  1028. 4 1000 0.83 0.83 0.83
  1029. 8 1000 0.93 0.93 0.93
  1030. 16 1000 0.83 0.84 0.84
  1031. 32 1000 0.82 0.82 0.82
  1032. 64 1000 0.84 0.84 0.84
  1033. 128 1000 0.88 0.88 0.88
  1034. 256 1000 0.86 0.87 0.87
  1035. 512 1000 0.99 0.99 0.99
  1036. 1024 1000 1.22 1.22 1.22
  1037. 2048 1000 1.50 1.50 1.50
  1038. 4096 1000 2.17 2.17 2.17
  1039. 8192 1000 3.49 3.49 3.49
  1040. 16384 1000 6.31 6.31 6.31
  1041. 32768 1000 12.53 12.53 12.53
  1042. 65536 640 26.50 26.51 26.51
  1043. 131072 320 49.28 49.30 49.29
  1044. 262144 160 106.04 106.08 106.06
  1045. 524288 80 186.09 186.16 186.12
  1046. 1048576 40 361.96 362.10 362.03
  1047. 2097152 20 803.70 804.40 804.05
  1048. 4194304 10 1758.60 1760.01 1759.30
  1049.  
  1050. #----------------------------------------------------------------
  1051. # Benchmarking Scatterv
  1052. # #processes = 4
  1053. # ( 4 additional processes waiting in MPI_Barrier)
  1054. #----------------------------------------------------------------
  1055. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  1056. 0 1000 0.09 0.13 0.11
  1057. 1 1000 1.04 1.04 1.04
  1058. 2 1000 1.05 1.05 1.05
  1059. 4 1000 1.15 1.15 1.15
  1060. 8 1000 1.05 1.05 1.05
  1061. 16 1000 1.05 1.06 1.05
  1062. 32 1000 1.04 1.04 1.04
  1063. 64 1000 1.08 1.08 1.08
  1064. 128 1000 1.12 1.12 1.12
  1065. 256 1000 1.16 1.17 1.16
  1066. 512 1000 1.34 1.34 1.34
  1067. 1024 1000 1.57 1.58 1.58
  1068. 2048 1000 2.00 2.00 2.00
  1069. 4096 1000 2.90 2.90 2.90
  1070. 8192 1000 4.72 4.72 4.72
  1071. 16384 1000 9.18 9.18 9.18
  1072. 32768 1000 18.44 18.46 18.45
  1073. 65536 640 48.83 48.88 48.86
  1074. 131072 320 93.73 93.94 93.86
  1075. 262144 160 204.01 204.97 204.60
  1076. 524288 80 349.38 352.80 351.48
  1077. 1048576 40 781.27 795.53 790.11
  1078. 2097152 20 1533.15 1587.09 1566.47
  1079. 4194304 10 3301.98 3550.70 3462.02
  1080.  
  1081. #----------------------------------------------------------------
  1082. # Benchmarking Scatterv
  1083. # #processes = 8
  1084. #----------------------------------------------------------------
  1085. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  1086. 0 1000 0.08 0.10 0.09
  1087. 1 1000 1.53 1.53 1.53
  1088. 2 1000 1.53 1.53 1.53
  1089. 4 1000 1.53 1.53 1.53
  1090. 8 1000 1.53 1.53 1.53
  1091. 16 1000 1.54 1.54 1.54
  1092. 32 1000 1.63 1.63 1.63
  1093. 64 1000 1.61 1.61 1.61
  1094. 128 1000 1.70 1.70 1.70
  1095. 256 1000 1.86 1.87 1.87
  1096. 512 1000 2.06 2.06 2.06
  1097. 1024 1000 2.66 2.66 2.66
  1098. 2048 1000 3.29 3.29 3.29
  1099. 4096 1000 4.77 4.78 4.78
  1100. 8192 1000 8.06 8.07 8.06
  1101. 16384 1000 16.56 16.58 16.57
  1102. 32768 1000 35.01 35.05 35.03
  1103. 65536 640 83.40 83.60 83.52
  1104. 131072 320 177.98 178.64 178.34
  1105. 262144 160 413.71 418.24 416.53
  1106. 524288 80 742.24 757.03 751.35
  1107. 1048576 40 1484.10 1546.28 1522.64
  1108. 2097152 20 3445.85 3627.75 3548.79
  1109. 4194304 10 6555.10 7413.20 7066.23
  1110.  
  1111. #----------------------------------------------------------------
  1112. # Benchmarking Alltoall
  1113. # #processes = 2
  1114. # ( 6 additional processes waiting in MPI_Barrier)
  1115. #----------------------------------------------------------------
  1116. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  1117. 0 1000 0.05 0.05 0.05
  1118. 1 1000 1.94 1.94 1.94
  1119. 2 1000 2.05 2.05 2.05
  1120. 4 1000 1.94 1.94 1.94
  1121. 8 1000 1.94 1.94 1.94
  1122. 16 1000 1.94 1.94 1.94
  1123. 32 1000 1.95 1.96 1.95
  1124. 64 1000 1.96 1.96 1.96
  1125. 128 1000 1.98 1.98 1.98
  1126. 256 1000 2.04 2.04 2.04
  1127. 512 1000 2.18 2.19 2.18
  1128. 1024 1000 2.42 2.42 2.42
  1129. 2048 1000 2.76 2.76 2.76
  1130. 4096 1000 3.49 3.49 3.49
  1131. 8192 1000 5.02 5.02 5.02
  1132. 16384 1000 8.82 8.82 8.82
  1133. 32768 1000 16.39 16.39 16.39
  1134. 65536 640 46.40 46.40 46.40
  1135. 131072 320 85.25 85.26 85.26
  1136. 262144 160 184.81 184.84 184.82
  1137. 524288 80 360.11 360.20 360.16
  1138. 1048576 40 693.50 693.60 693.55
  1139. 2097152 20 1769.15 1769.70 1769.42
  1140. 4194304 10 3479.00 3483.01 3481.01
  1141.  
  1142. #----------------------------------------------------------------
  1143. # Benchmarking Alltoall
  1144. # #processes = 4
  1145. # ( 4 additional processes waiting in MPI_Barrier)
  1146. #----------------------------------------------------------------
  1147. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  1148. 0 1000 0.05 0.05 0.05
  1149. 1 1000 4.01 4.01 4.01
  1150. 2 1000 3.93 3.93 3.93
  1151. 4 1000 3.93 3.93 3.93
  1152. 8 1000 3.90 3.90 3.90
  1153. 16 1000 3.89 3.90 3.90
  1154. 32 1000 3.89 3.89 3.89
  1155. 64 1000 3.91 3.91 3.91
  1156. 128 1000 4.04 4.04 4.04
  1157. 256 1000 4.17 4.17 4.17
  1158. 512 1000 4.68 4.68 4.68
  1159. 1024 1000 5.30 5.30 5.30
  1160. 2048 1000 6.92 6.93 6.92
  1161. 4096 1000 9.65 9.65 9.65
  1162. 8192 1000 15.09 15.10 15.09
  1163. 16384 1000 27.17 27.18 27.17
  1164. 32768 1000 52.87 52.89 52.88
  1165. 65536 640 148.18 148.19 148.19
  1166. 131072 320 342.89 342.92 342.90
  1167. 262144 160 730.39 730.47 730.44
  1168. 524288 80 1957.90 1958.49 1958.21
  1169. 1048576 40 3891.97 3894.10 3893.07
  1170. 2097152 20 7493.79 7499.04 7496.47
  1171. 4194304 10 14320.02 14326.79 14321.95
  1172.  
  1173. #----------------------------------------------------------------
  1174. # Benchmarking Alltoall
  1175. # #processes = 8
  1176. #----------------------------------------------------------------
  1177. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  1178. 0 1000 0.05 0.05 0.05
  1179. 1 1000 8.07 8.07 8.07
  1180. 2 1000 8.17 8.17 8.17
  1181. 4 1000 8.19 8.19 8.19
  1182. 8 1000 8.25 8.26 8.25
  1183. 16 1000 8.63 8.63 8.63
  1184. 32 1000 8.77 8.77 8.77
  1185. 64 1000 9.22 9.23 9.23
  1186. 128 1000 10.02 10.02 10.02
  1187. 256 1000 11.34 11.35 11.35
  1188. 512 1000 10.52 10.52 10.52
  1189. 1024 1000 12.58 12.58 12.58
  1190. 2048 1000 17.46 17.47 17.47
  1191. 4096 1000 26.20 26.20 26.20
  1192. 8192 1000 44.23 44.24 44.24
  1193. 16384 1000 84.27 84.29 84.28
  1194. 32768 1000 196.26 196.31 196.28
  1195. 65536 640 594.41 594.48 594.45
  1196. 131072 320 2143.71 2143.90 2143.84
  1197. 262144 160 4732.23 4733.35 4732.92
  1198. 524288 80 8312.08 8314.91 8314.06
  1199. 1048576 40 15473.82 15487.27 15483.10
  1200. 2097152 20 29428.49 29472.99 29460.38
  1201. 4194304 10 56933.21 57101.18 57042.83
  1202.  
  1203. #----------------------------------------------------------------
  1204. # Benchmarking Alltoallv
  1205. # #processes = 2
  1206. # ( 6 additional processes waiting in MPI_Barrier)
  1207. #----------------------------------------------------------------
  1208. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  1209. 0 1000 0.22 0.27 0.24
  1210. 1 1000 1.92 1.92 1.92
  1211. 2 1000 2.05 2.05 2.05
  1212. 4 1000 1.89 1.90 1.90
  1213. 8 1000 1.90 1.90 1.90
  1214. 16 1000 1.87 1.87 1.87
  1215. 32 1000 1.92 1.92 1.92
  1216. 64 1000 1.94 1.94 1.94
  1217. 128 1000 1.94 1.95 1.94
  1218. 256 1000 1.94 1.94 1.94
  1219. 512 1000 2.13 2.13 2.13
  1220. 1024 1000 2.40 2.40 2.40
  1221. 2048 1000 2.72 2.72 2.72
  1222. 4096 1000 3.44 3.44 3.44
  1223. 8192 1000 4.99 4.99 4.99
  1224. 16384 1000 8.78 8.78 8.78
  1225. 32768 1000 16.34 16.34 16.34
  1226. 65536 640 47.28 47.28 47.28
  1227. 131072 320 85.99 86.00 86.00
  1228. 262144 160 186.88 186.91 186.90
  1229. 524288 80 364.30 364.36 364.33
  1230. 1048576 40 694.63 694.73 694.68
  1231. 2097152 20 1786.26 1786.85 1786.55
  1232. 4194304 10 3456.21 3459.81 3458.01
  1233.  
  1234. #----------------------------------------------------------------
  1235. # Benchmarking Alltoallv
  1236. # #processes = 4
  1237. # ( 4 additional processes waiting in MPI_Barrier)
  1238. #----------------------------------------------------------------
  1239. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  1240. 0 1000 0.25 0.29 0.27
  1241. 1 1000 4.10 4.10 4.10
  1242. 2 1000 4.00 4.00 4.00
  1243. 4 1000 3.98 3.98 3.98
  1244. 8 1000 3.97 3.97 3.97
  1245. 16 1000 3.99 3.99 3.99
  1246. 32 1000 3.99 4.00 3.99
  1247. 64 1000 4.02 4.02 4.02
  1248. 128 1000 4.13 4.14 4.13
  1249. 256 1000 4.25 4.25 4.25
  1250. 512 1000 4.79 4.79 4.79
  1251. 1024 1000 5.49 5.49 5.49
  1252. 2048 1000 7.04 7.04 7.04
  1253. 4096 1000 9.75 9.76 9.76
  1254. 8192 1000 15.06 15.06 15.06
  1255. 16384 1000 27.24 27.25 27.25
  1256. 32768 1000 53.14 53.16 53.15
  1257. 65536 640 133.65 133.68 133.67
  1258. 131072 320 304.23 304.42 304.34
  1259. 262144 160 695.03 695.14 695.08
  1260. 524288 80 1784.84 1787.91 1786.51
  1261. 1048576 40 3852.40 3853.03 3852.69
  1262. 2097152 20 7396.46 7399.94 7398.20
  1263. 4194304 10 14339.30 14347.29 14343.30
  1264.  
  1265. #----------------------------------------------------------------
  1266. # Benchmarking Alltoallv
  1267. # #processes = 8
  1268. #----------------------------------------------------------------
  1269. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  1270. 0 1000 0.34 0.51 0.39
  1271. 1 1000 8.63 8.63 8.63
  1272. 2 1000 8.69 8.69 8.69
  1273. 4 1000 8.70 8.70 8.70
  1274. 8 1000 8.67 8.67 8.67
  1275. 16 1000 8.70 8.71 8.70
  1276. 32 1000 8.65 8.65 8.65
  1277. 64 1000 8.83 8.83 8.83
  1278. 128 1000 9.20 9.20 9.20
  1279. 256 1000 9.48 9.48 9.48
  1280. 512 1000 10.71 10.71 10.71
  1281. 1024 1000 12.86 12.87 12.87
  1282. 2048 1000 17.72 17.73 17.72
  1283. 4096 1000 26.56 26.57 26.57
  1284. 8192 1000 44.70 44.71 44.70
  1285. 16384 1000 84.67 84.70 84.68
  1286. 32768 1000 196.35 196.41 196.39
  1287. 65536 640 557.22 557.29 557.27
  1288. 131072 320 1992.38 1992.75 1992.58
  1289. 262144 160 4704.55 4706.84 4706.09
  1290. 524288 80 8687.38 8690.79 8689.34
  1291. 1048576 40 16293.65 16303.07 16299.10
  1292. 2097152 20 31088.21 31134.05 31119.97
  1293. 4194304 10 59587.88 59909.01 59815.58
  1294.  
  1295. #----------------------------------------------------------------
  1296. # Benchmarking Bcast
  1297. # #processes = 2
  1298. # ( 6 additional processes waiting in MPI_Barrier)
  1299. #----------------------------------------------------------------
  1300. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  1301. 0 1000 0.03 0.03 0.03
  1302. 1 1000 1.10 1.10 1.10
  1303. 2 1000 1.08 1.08 1.08
  1304. 4 1000 1.15 1.15 1.15
  1305. 8 1000 1.08 1.08 1.08
  1306. 16 1000 1.13 1.13 1.13
  1307. 32 1000 1.12 1.12 1.12
  1308. 64 1000 1.19 1.19 1.19
  1309. 128 1000 1.16 1.16 1.16
  1310. 256 1000 1.21 1.21 1.21
  1311. 512 1000 1.35 1.35 1.35
  1312. 1024 1000 1.65 1.65 1.65
  1313. 2048 1000 1.88 1.89 1.89
  1314. 4096 1000 2.52 2.52 2.52
  1315. 8192 1000 4.00 4.00 4.00
  1316. 16384 1000 7.29 7.29 7.29
  1317. 32768 1000 15.36 15.36 15.36
  1318. 65536 640 28.28 28.29 28.29
  1319. 131072 320 50.62 50.63 50.63
  1320. 262144 160 110.22 110.24 110.23
  1321. 524288 80 190.03 190.08 190.05
  1322. 1048576 40 356.30 356.40 356.35
  1323. 2097152 20 693.15 693.36 693.26
  1324. 4194304 10 1742.91 1743.39 1743.15
  1325.  
  1326. #----------------------------------------------------------------
  1327. # Benchmarking Bcast
  1328. # #processes = 4
  1329. # ( 4 additional processes waiting in MPI_Barrier)
  1330. #----------------------------------------------------------------
  1331. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  1332. 0 1000 0.03 0.03 0.03
  1333. 1 1000 1.25 1.26 1.25
  1334. 2 1000 1.24 1.25 1.24
  1335. 4 1000 1.34 1.34 1.34
  1336. 8 1000 1.26 1.26 1.26
  1337. 16 1000 1.31 1.31 1.31
  1338. 32 1000 1.35 1.35 1.35
  1339. 64 1000 1.37 1.37 1.37
  1340. 128 1000 1.42 1.42 1.42
  1341. 256 1000 1.50 1.50 1.50
  1342. 512 1000 1.74 1.74 1.74
  1343. 1024 1000 2.05 2.05 2.05
  1344. 2048 1000 2.63 2.63 2.63
  1345. 4096 1000 3.58 3.58 3.58
  1346. 8192 1000 5.76 5.76 5.76
  1347. 16384 1000 11.96 11.96 11.96
  1348. 32768 1000 24.59 24.60 24.59
  1349. 65536 640 45.81 45.82 45.82
  1350. 131072 320 85.04 85.06 85.05
  1351. 262144 160 189.96 190.02 189.99
  1352. 524288 80 394.31 394.42 394.37
  1353. 1048576 40 845.10 845.35 845.22
  1354. 2097152 20 1790.45 1791.45 1790.95
  1355. 4194304 10 3985.60 3987.50 3986.55
  1356.  
  1357. #----------------------------------------------------------------
  1358. # Benchmarking Bcast
  1359. # #processes = 8
  1360. #----------------------------------------------------------------
  1361. #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
  1362. 0 1000 0.03 0.03 0.03
  1363. 1 1000 1.74 1.74 1.74
  1364. 2 1000 1.82 1.82 1.82
  1365. 4 1000 1.75 1.75 1.75
  1366. 8 1000 1.77 1.77 1.77
  1367. 16 1000 1.82 1.82 1.82
  1368. 32 1000 1.85 1.85 1.85
  1369. 64 1000 1.94 1.94 1.94
  1370. 128 1000 1.98 1.98 1.98
  1371. 256 1000 2.08 2.08 2.08
  1372. 512 1000 2.45 2.45 2.45
  1373. 1024 1000 2.96 2.96 2.96
  1374. 2048 1000 3.86 3.86 3.86
  1375. 4096 1000 5.63 5.64 5.63
  1376. 8192 1000 9.18 9.19 9.19
  1377. 16384 1000 14.26 14.27 14.26
  1378. 32768 1000 29.79 29.81 29.80
  1379. 65536 640 63.94 63.96 63.95
  1380. 131072 320 126.47 126.49 126.48
  1381. 262144 160 340.98 341.19 341.11
  1382. 524288 80 542.50 543.02 542.80
  1383. 1048576 40 1208.90 1209.75 1209.35
  1384. 2097152 20 3724.44 3732.96 3728.42
  1385. 4194304 10 7420.99 7430.60 7425.71
  1386.  
  1387. #---------------------------------------------------
  1388. # Benchmarking Barrier
  1389. # #processes = 2
  1390. # ( 6 additional processes waiting in MPI_Barrier)
  1391. #---------------------------------------------------
  1392. #repetitions t_min[usec] t_max[usec] t_avg[usec]
  1393. 1000 1.69 1.70 1.69
  1394.  
  1395. #---------------------------------------------------
  1396. # Benchmarking Barrier
  1397. # #processes = 4
  1398. # ( 4 additional processes waiting in MPI_Barrier)
  1399. #---------------------------------------------------
  1400. #repetitions t_min[usec] t_max[usec] t_avg[usec]
  1401. 1000 2.98 2.98 2.98
  1402.  
  1403. #---------------------------------------------------
  1404. # Benchmarking Barrier
  1405. # #processes = 8
  1406. #---------------------------------------------------
  1407. #repetitions t_min[usec] t_max[usec] t_avg[usec]
  1408. 1000 4.95 4.95 4.95
  1409.  
  1410.  
  1411. # All processes entering MPI_Finalize
RAW Paste Data