Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #define USE_NVCC
- #include "cusparse_problem.h"
- #include <algorithm>
- #include <iostream>
- #include <cuda_runtime.h>
- #include "cusparse_v2.h"
- static bool verbose_debug = false;
- static void check_return_code(std::string message, cudaError_t status) {
- if (status != cudaSuccess) {
- std::cerr << "\x1b[91mError performing operation: " << message
- << "; error: " << cudaGetErrorString(status)
- << "\x1b[0m" << std::endl;
- } else if (verbose_debug) {
- std::cerr << "\x1b[35m" << message + " succeeded\x1b[0m" << std::endl;
- }
- }
- static void check_cusparse_call(std::string message, cusparseStatus_t status) {
- if (status != CUSPARSE_STATUS_SUCCESS) {
- std::cerr << "\x1b[91mError performing operation: " << message
- << "\x1b[0m" << std::endl;
- }
- }
- template<class T>
- static void copy_to_device(std::string name, T *dest, T *src, long num) {
- check_return_code("Copying matrix " + name,
- cudaMemcpy(dest, src, num * sizeof(T), cudaMemcpyHostToDevice));
- }
- template<class T>
- static void copy_to_host(std::string name, T *dest, T *src, long num) {
- check_return_code("Copying matrix " + name,
- cudaMemcpy(dest, src, num * sizeof(T), cudaMemcpyDeviceToHost));
- }
- template<class T>
- static void typed_cumalloc(std::string name, T **dest, long num) {
- check_return_code(
- "Allocating " + name,
- cudaMalloc((void **)(dest), num * sizeof(T)));
- }
- CusparseCSRMatrix::CusparseCSRMatrix(const problem *prob_old)
- : width(prob_old->n), height(prob_old->l)
- {
- std::cerr << "initializing cusparse csr" << std::endl;
- csr_matrix *result = new csr_matrix;
- this->nnz = 0; // non-zero values
- for(int i=0; i < prob_old->l; i++) {
- feature_node *s = prob_old->x[i];
- while(s->index!=-1) {
- nnz += 1;
- s++;
- }
- }
- std::cerr << "\x1b[94mNum non-zero values: " << nnz << "\x1b[0m" << std::endl;
- int rows_n = prob_old->l + 1;
- host_matrix.csr_values = new double[nnz];
- host_matrix.row_pointers = new int[rows_n];
- host_matrix.column_indices = new int[nnz];
- // fill values
- int nnz_index = 0;
- for (int i = 0; i < prob_old->l; i++) {
- feature_node *s = prob_old->x[i];
- host_matrix.row_pointers[i] = nnz_index;
- while(s->index!=-1) {
- host_matrix.csr_values[nnz_index] = s->value;
- host_matrix.column_indices[nnz_index] = s->index - 1;
- nnz_index += 1;
- s++;
- }
- }
- host_matrix.row_pointers[prob_old->l] = nnz_index;
- // initialize cusparse
- check_cusparse_call("cusparse initialization", cusparseCreate(&cusparse_handle));
- // copy to cuda
- typed_cumalloc("values array", &(cuda_matrix.csr_values), nnz);
- typed_cumalloc("row pointer array", &(cuda_matrix.row_pointers), rows_n);
- typed_cumalloc("column indices", &(cuda_matrix.column_indices), nnz);
- copy_to_device("csr values", cuda_matrix.csr_values, host_matrix.csr_values, nnz);
- copy_to_device("row pointer", cuda_matrix.row_pointers, host_matrix.row_pointers, rows_n);
- copy_to_device(
- "column indices",
- cuda_matrix.column_indices,
- host_matrix.column_indices,
- nnz);
- // create a matrix description for the cusparse library
- check_cusparse_call("create descriptor", cusparseCreateMatDescr(&descr));
- cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL);
- cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO);
- // pre-allocated some vectors for spmv
- typed_cumalloc("input vector", &cuda_csr_mv_in, std::max(width, height));
- // typed_cumalloc("dummy vector", &cuda_csr_mv_dummy, std::max(width, height));
- typed_cumalloc("output vector", &cuda_csr_mv_out, std::max(width, height));
- }
- CusparseCSRMatrix::~CusparseCSRMatrix() {
- cudaFree(cuda_csr_mv_in);
- cudaFree(cuda_csr_mv_out);
- cudaFree(cuda_matrix.column_indices);
- cudaFree(cuda_matrix.row_pointers);
- cudaFree(cuda_matrix.csr_values);
- cusparseDestroy(cusparse_handle);
- cudaDeviceReset();
- }
- void CusparseCSRMatrix::csr_XTv(double *vector, double *result) const {
- copy_to_device("input vector", cuda_csr_mv_in, vector, height);
- double d_one = 1.0; // dummy value for alpha
- double d_zero = 0.0; // dummy value for beta
- std::cerr << "parameters: "
- << "width: " << width
- << ", height: " << height
- << ", nnz: " << nnz
- << std::endl;
- check_return_code("synchronize after input copy", cudaDeviceSynchronize());
- check_cusparse_call(
- "csr_mv",
- cusparseDcsrmv(
- cusparse_handle,
- // CUSPARSE_OPERATION_NON_TRANSPOSE,
- CUSPARSE_OPERATION_TRANSPOSE,
- width,
- height,
- nnz,
- &d_one,
- descr,
- cuda_matrix.csr_values,
- cuda_matrix.row_pointers,
- cuda_matrix.column_indices,
- cuda_csr_mv_in,
- &d_zero,
- cuda_csr_mv_out));
- check_return_code("synchronize after calculation", cudaDeviceSynchronize());
- copy_to_host("output vector", result, cuda_csr_mv_out, width);
- check_return_code("synchronize after output memcopy", cudaDeviceSynchronize());
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement