Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- int main(int argc, char **argv) {
- // setup mpi + cuda
- // read input data (similarities)
- // decompose
- // execute algorithm
- thrust::host_vector<value_type> cluster_idx = compute(world, similarities, options );
- cluster_idx = compute(world, similarities, options );
- cluster_idx = compute(world, similarities, options );
- cluster_idx = compute(world, similarities, options );
- cluster_idx = compute(world, similarities, options );
- return 0;
- }
- template<class T>
- thrust::host_vector<T> compute(
- const boost::mpi::communicator& communicator,
- const thrust::host_vector<T>& sim,
- const mpi_options_t& options) {
- //...
- //begin iterative algorithm
- for( int i=0; i<1000; i++ ) {
- //...
- //--- communicate partial results
- value_type* d_srp = thrust::raw_pointer_cast(&srp[0]);
- // boost::mpi::all_gather(world, boost::mpi::inplace(d_srp+columns*communicator.rank()).buffer, columns, d_srp);
- MPI_Allgather( MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
- &d_srp[0], columns, MPI_DOUBLE, communicator);
- //...
- //--- communicate exemplars
- value_type* d_dec = thrust::raw_pointer_cast(&(dec[0]));
- size_t rows = similarities.size()/columns;
- // boost::mpi::all_gather(communicator, boost::mpi::inplace(d_dec+options.lineoffset).buffer, rows, d_dec);
- MPI_Allgather( MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
- &d_dec[0], rows, MPI_DOUBLE, communicator);
- }
- //post-process: seems to have the same sync problems, since
- //the amount of identified clusters doesnt work either (completely
- //different results than computed above)
- //...
- MPI_Allgather(....)
- //...
- MPI_Allgather(....)
- }
- mpirun --mca btl_smcuda_use_cuda_ipc 0 --mca btl_smcuda_use_cuda_ipc_same_gpu 0 -np 2 ./double_test ../data/similarities20000.double.-300 ex.20000.double.2.gpus 1000 1000 0.9 &>cout.20000.double.2.gpus
- # datatype: double
- # datapoints: 20000
- # max_iterations: 1000
- # conv_iterations: 1000
- # damping: 0.9
- # communicator.size: 2
- # time elapsed [s]; iterations executed; convergent since; clusters identified
- 121.* 1000 807 20
- 121.* 1000 807 20
- 121.* 1000 807 20
- 121.* 1000 820 9
- 121.* 1000 820 9
- mpirun --mca btl_smcuda_use_cuda_ipc 0 --mca btl_smcuda_use_cuda_ipc_same_gpu 0 -np 2 ./double_test ../data/similarities20000.double.-300 ex.20000.double.2.gpus 1000 1000 0.9 &>cout.20000.double.2.gpus
- # datatype: double
- # datapoints: 20000
- # max_iterations: 1000
- # conv_iterations: 1000
- # damping: 0.9
- # communicator.size: 2
- # time elapsed [s]; iterations executed; convergent since; clusters identified
- 121.* 1000 807 20
- 121.* 1000 807 20
- 121.* 1000 807 20
- 121.* 1000 807 20
- 121.* 1000 807 20
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement