Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- double t_start = omp_get_wtime();
- // запускаем копирования данных
- GraphCSR gpu_graph[2];
- t1 = omp_get_wtime();
- #pragma omp parallel num_threads(2)
- {
- int tid = omp_get_thread_num();
- cudaSetDevice(tid);
- user_copy_graph_to_device(csr_graph, gpu_graph[tid]);
- }
- t2 = omp_get_wtime();
- cout << "Device->host copy time: " << t2 - t1 << " sec" << endl;
- // запускаем алгоритм
- cudaDeviceSynchronize();
- t1 = omp_get_wtime();
- int last_source = 0;
- cout << "will do " << iterations << " iterations" << endl;
- #pragma omp parallel for shared(last_source)
- for(int i = 0; i < iterations; i++)
- {
- int tid = omp_get_thread_num();
- int current_gpu = tid%2;
- cudaSetDevice(current_gpu);
- int source_vertex = rand() % graph.vertices_count;
- int *local_result = new int[graph.vertices_count];
- user_algorithm(gpu_graph[current_gpu], local_result, source_vertex);
- if (i == (iterations - 1)){
- memcpy(user_result, local_result, n * sizeof(int)*graph.vertices_count);
- last_source = source_vertex;
- }
- delete []local_result;
- }
- cudaDeviceSynchronize();
- t2 = omp_get_wtime();
- double t_end = omp_get_wtime();
- cout << "BFS wall time: " << t2 - t1 << " sec" << endl;
- free_memory(&gpu_graph);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement