Advertisement
Guest User

CUDA IPC

a guest
Aug 26th, 2014
364
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 2.58 KB | None | 0 0
  1. int main(int argc, char **argv) {
  2.     // setup mpi + cuda
  3.     // read input data (similarities)
  4.     // decompose
  5.     // execute algorithm
  6.     thrust::host_vector<value_type> cluster_idx = compute(world, similarities, options );
  7.     cluster_idx = compute(world, similarities, options );
  8.     cluster_idx = compute(world, similarities, options );
  9.     cluster_idx = compute(world, similarities, options );
  10.     cluster_idx = compute(world, similarities, options );
  11.     return 0;
  12. }
  13.  
  14. template<class T>
  15. thrust::host_vector<T> compute(
  16.         const boost::mpi::communicator& communicator,
  17.         const thrust::host_vector<T>& sim,
  18.         const mpi_options_t& options) {
  19.     //...
  20.     //begin iterative algorithm
  21.     for( int i=0; i<1000; i++ ) {
  22.     //...
  23.     //--- communicate partial results
  24.     value_type* d_srp = thrust::raw_pointer_cast(&srp[0]);
  25.     // boost::mpi::all_gather(world, boost::mpi::inplace(d_srp+columns*communicator.rank()).buffer, columns, d_srp);
  26.     MPI_Allgather(  MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
  27.             &d_srp[0], columns, MPI_DOUBLE, communicator);
  28.     //...
  29.     //--- communicate exemplars
  30.     value_type* d_dec = thrust::raw_pointer_cast(&(dec[0]));
  31.     size_t rows = similarities.size()/columns;
  32.     // boost::mpi::all_gather(communicator, boost::mpi::inplace(d_dec+options.lineoffset).buffer, rows, d_dec);
  33.     MPI_Allgather(  MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
  34.             &d_dec[0], rows, MPI_DOUBLE, communicator);
  35.     }
  36.     //post-process: seems to have the same sync problems, since
  37.     //the amount of identified clusters doesnt work either (completely
  38.     //different results than computed above)
  39.     //...
  40.     MPI_Allgather(....)
  41.     //...
  42.     MPI_Allgather(....)
  43. }
  44.  
  45. mpirun --mca btl_smcuda_use_cuda_ipc 0 --mca btl_smcuda_use_cuda_ipc_same_gpu 0 -np 2 ./double_test ../data/similarities20000.double.-300 ex.20000.double.2.gpus 1000 1000 0.9 &>cout.20000.double.2.gpus
  46.  
  47. # datatype: double
  48. # datapoints: 20000
  49. # max_iterations: 1000
  50. # conv_iterations: 1000
  51. # damping: 0.9
  52. # communicator.size: 2
  53. # time elapsed [s]; iterations executed; convergent since; clusters identified
  54. 121.* 1000 807 20
  55. 121.* 1000 807 20
  56. 121.* 1000 807 20
  57. 121.* 1000 820 9
  58. 121.* 1000 820 9
  59.  
  60. mpirun --mca btl_smcuda_use_cuda_ipc 0 --mca btl_smcuda_use_cuda_ipc_same_gpu 0 -np 2 ./double_test ../data/similarities20000.double.-300 ex.20000.double.2.gpus 1000 1000 0.9 &>cout.20000.double.2.gpus
  61.  
  62. # datatype: double
  63. # datapoints: 20000
  64. # max_iterations: 1000
  65. # conv_iterations: 1000
  66. # damping: 0.9
  67. # communicator.size: 2
  68. # time elapsed [s]; iterations executed; convergent since; clusters identified
  69. 121.* 1000 807 20
  70. 121.* 1000 807 20
  71. 121.* 1000 807 20
  72. 121.* 1000 807 20
  73. 121.* 1000 807 20
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement