Advertisement
gatoatigrado3

cusparse_liblinear_problem

Nov 24th, 2013
145
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 5.10 KB | None | 0 0
  1. #define USE_NVCC
  2.  
  3. #include "cusparse_problem.h"
  4. #include <algorithm>
  5. #include <iostream>
  6. #include <cuda_runtime.h>
  7. #include "cusparse_v2.h"
  8.  
  9.  
  10. static bool verbose_debug = false;
  11.  
  12.  
  13. static void check_return_code(std::string message, cudaError_t status) {
  14.     if (status != cudaSuccess) {
  15.         std::cerr << "\x1b[91mError performing operation: " << message
  16.             << "\x1b[0m" << std::endl;
  17.     } else if (verbose_debug) {
  18.         std::cerr << "\x1b[35m" << message + " succeeded\x1b[0m" << std::endl;
  19.     }
  20. }
  21.  
  22.  
  23. static void check_cusparse_call(std::string message, cusparseStatus_t status) {
  24.     if (status != CUSPARSE_STATUS_SUCCESS) {
  25.         std::cerr << "\x1b[91mError performing operation: " << message
  26.             << "\x1b[0m" << std::endl;
  27.     }
  28. }
  29.  
  30.  
  31. template<class T>
  32. static void copy_to_device(std::string name, T *dest, T *src, long num) {
  33.     check_return_code("Copying matrix " + name,
  34.         cudaMemcpy(dest, src, num * sizeof(T), cudaMemcpyHostToDevice));
  35. }
  36.  
  37.  
  38. template<class T>
  39. static void copy_to_host(std::string name, T *dest, T *src, long num) {
  40.     check_return_code("Copying matrix " + name,
  41.         cudaMemcpy(dest, src, num * sizeof(T), cudaMemcpyDeviceToHost));
  42. }
  43.  
  44.  
  45. template<class T>
  46. static void typed_cumalloc(std::string name, T **dest, long num) {
  47.     check_return_code(
  48.         "Allocating " + name,
  49.         cudaMalloc((void **)(dest), num * sizeof(T)));
  50. }
  51.  
  52.  
  53. CusparseCSRMatrix::CusparseCSRMatrix(const problem *prob_old)
  54.         : width(prob_old->n), height(prob_old->l)
  55. {
  56.     std::cerr << "initializing cusparse csr" << std::endl;
  57.  
  58.     csr_matrix *result = new csr_matrix;
  59.  
  60.     this->nnz = 0;  // non-zero values
  61.     for(int i=0; i < prob_old->l; i++) {
  62.         feature_node *s = prob_old->x[i];
  63.         while(s->index!=-1) {
  64.             nnz += 1;
  65.             s++;
  66.         }
  67.     }
  68.     std::cerr << "\x1b[94mNum non-zero values: " << nnz << "\x1b[0m" << std::endl;
  69.  
  70.     int rows_n = prob_old->l + 1;
  71.     host_matrix.csr_values = new double[nnz];
  72.     host_matrix.row_pointers = new int[rows_n];
  73.     host_matrix.column_indices = new int[nnz];
  74.  
  75.     // fill values
  76.     int nnz_index = 0;
  77.     for (int i = 0; i < prob_old->l; i++) {
  78.         feature_node *s = prob_old->x[i];
  79.         host_matrix.row_pointers[i] = nnz_index;
  80.         while(s->index!=-1) {
  81.             host_matrix.csr_values[nnz_index] = s->value;
  82.             host_matrix.column_indices[nnz_index] = s->index - 1;
  83.             nnz_index += 1;
  84.             s++;
  85.         }
  86.     }
  87.     host_matrix.row_pointers[prob_old->l] = nnz_index;
  88.  
  89.     // initialize cusparse
  90.     check_cusparse_call("cusparse initialization", cusparseCreate(&cusparse_handle));
  91.  
  92.     // copy to cuda
  93.     typed_cumalloc("values array", &(cuda_matrix.csr_values), nnz);
  94.     typed_cumalloc("row pointer array", &(cuda_matrix.row_pointers), rows_n);
  95.     typed_cumalloc("column indices", &(cuda_matrix.column_indices), nnz);
  96.  
  97.     copy_to_device("csr values", cuda_matrix.csr_values, host_matrix.csr_values, nnz);
  98.     copy_to_device("row pointer", cuda_matrix.row_pointers, host_matrix.row_pointers, rows_n);
  99.     copy_to_device(
  100.         "column indices",
  101.         cuda_matrix.column_indices,
  102.         host_matrix.column_indices,
  103.         nnz);
  104.  
  105.     // create a matrix description for the cusparse library
  106.     check_cusparse_call("create descriptor", cusparseCreateMatDescr(&descr));
  107.     cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL);
  108.     cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO);
  109.  
  110.     // pre-allocated some vectors for spmv
  111.     typed_cumalloc("input vector", &cuda_csr_mv_in, std::max(width, height));
  112.     // typed_cumalloc("dummy vector", &cuda_csr_mv_dummy, std::max(width, height));
  113.     typed_cumalloc("output vector", &cuda_csr_mv_out, std::max(width, height));
  114. }
  115.  
  116.  
  117. CusparseCSRMatrix::~CusparseCSRMatrix() {
  118.     cudaFree(cuda_csr_mv_in);
  119.     cudaFree(cuda_csr_mv_out);
  120.     cudaFree(cuda_matrix.column_indices);
  121.     cudaFree(cuda_matrix.row_pointers);
  122.     cudaFree(cuda_matrix.csr_values);
  123.     cusparseDestroy(cusparse_handle);
  124.     cudaDeviceReset();
  125. }
  126.  
  127.  
  128. void CusparseCSRMatrix::csr_XTv(double *vector, double *result) const {
  129.     copy_to_device("input vector", cuda_csr_mv_in, vector, height);
  130.     double d_one = 1.0;  // dummy value for alpha
  131.     double d_zero = 0.0;  // dummy value for beta
  132.     std::cerr << "parameters: "
  133.         << "width: " << width
  134.         << ", height: " << height
  135.         << ", nnz: " << nnz
  136.         << std::endl;
  137.     check_return_code("synchronize after input copy", cudaDeviceSynchronize());
  138.     check_cusparse_call(
  139.         "csr_mv",
  140.         cusparseDcsrmv(
  141.             cusparse_handle,
  142.             // CUSPARSE_OPERATION_NON_TRANSPOSE,
  143.             CUSPARSE_OPERATION_TRANSPOSE,
  144.             width,
  145.             height,
  146.             nnz,
  147.             &d_one,
  148.             descr,
  149.             cuda_matrix.csr_values,
  150.             cuda_matrix.row_pointers,
  151.             cuda_matrix.column_indices,
  152.             cuda_csr_mv_in,
  153.             &d_zero,
  154.             cuda_csr_mv_out));
  155.     check_return_code("synchronize after calculation", cudaDeviceSynchronize());
  156.     copy_to_host("output vector", result, cuda_csr_mv_out, width);
  157.     check_return_code("synchronize after output memcopy", cudaDeviceSynchronize());
  158. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement