Advertisement
gatoatigrado3

cusparse_liblinear_problem

Nov 24th, 2013
165
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #define USE_NVCC
  2.  
  3. #include "cusparse_problem.h"
  4. #include <algorithm>
  5. #include <iostream>
  6. #include <cuda_runtime.h>
  7. #include "cusparse_v2.h"
  8.  
  9.  
  10. static bool verbose_debug = false;
  11.  
  12.  
  13. static void check_return_code(std::string message, cudaError_t status) {
  14.     if (status != cudaSuccess) {
  15.         std::cerr << "\x1b[91mError performing operation: " << message
  16.             << "; error: " << cudaGetErrorString(status)
  17.             << "\x1b[0m" << std::endl;
  18.     } else if (verbose_debug) {
  19.         std::cerr << "\x1b[35m" << message + " succeeded\x1b[0m" << std::endl;
  20.     }
  21. }
  22.  
  23.  
  24. static void check_cusparse_call(std::string message, cusparseStatus_t status) {
  25.     if (status != CUSPARSE_STATUS_SUCCESS) {
  26.         std::cerr << "\x1b[91mError performing operation: " << message
  27.             << "\x1b[0m" << std::endl;
  28.     }
  29. }
  30.  
  31.  
  32. template<class T>
  33. static void copy_to_device(std::string name, T *dest, T *src, long num) {
  34.     check_return_code("Copying matrix " + name,
  35.         cudaMemcpy(dest, src, num * sizeof(T), cudaMemcpyHostToDevice));
  36. }
  37.  
  38.  
  39. template<class T>
  40. static void copy_to_host(std::string name, T *dest, T *src, long num) {
  41.     check_return_code("Copying matrix " + name,
  42.         cudaMemcpy(dest, src, num * sizeof(T), cudaMemcpyDeviceToHost));
  43. }
  44.  
  45.  
  46. template<class T>
  47. static void typed_cumalloc(std::string name, T **dest, long num) {
  48.     check_return_code(
  49.         "Allocating " + name,
  50.         cudaMalloc((void **)(dest), num * sizeof(T)));
  51. }
  52.  
  53.  
  54. CusparseCSRMatrix::CusparseCSRMatrix(const problem *prob_old)
  55.         : width(prob_old->n), height(prob_old->l)
  56. {
  57.     std::cerr << "initializing cusparse csr" << std::endl;
  58.  
  59.     csr_matrix *result = new csr_matrix;
  60.  
  61.     this->nnz = 0;  // non-zero values
  62.     for(int i=0; i < prob_old->l; i++) {
  63.         feature_node *s = prob_old->x[i];
  64.         while(s->index!=-1) {
  65.             nnz += 1;
  66.             s++;
  67.         }
  68.     }
  69.     std::cerr << "\x1b[94mNum non-zero values: " << nnz << "\x1b[0m" << std::endl;
  70.  
  71.     int rows_n = prob_old->l + 1;
  72.     host_matrix.csr_values = new double[nnz];
  73.     host_matrix.row_pointers = new int[rows_n];
  74.     host_matrix.column_indices = new int[nnz];
  75.  
  76.     // fill values
  77.     int nnz_index = 0;
  78.     for (int i = 0; i < prob_old->l; i++) {
  79.         feature_node *s = prob_old->x[i];
  80.         host_matrix.row_pointers[i] = nnz_index;
  81.         while(s->index!=-1) {
  82.             host_matrix.csr_values[nnz_index] = s->value;
  83.             host_matrix.column_indices[nnz_index] = s->index - 1;
  84.             nnz_index += 1;
  85.             s++;
  86.         }
  87.     }
  88.     host_matrix.row_pointers[prob_old->l] = nnz_index;
  89.  
  90.     // initialize cusparse
  91.     check_cusparse_call("cusparse initialization", cusparseCreate(&cusparse_handle));
  92.  
  93.     // copy to cuda
  94.     typed_cumalloc("values array", &(cuda_matrix.csr_values), nnz);
  95.     typed_cumalloc("row pointer array", &(cuda_matrix.row_pointers), rows_n);
  96.     typed_cumalloc("column indices", &(cuda_matrix.column_indices), nnz);
  97.  
  98.     copy_to_device("csr values", cuda_matrix.csr_values, host_matrix.csr_values, nnz);
  99.     copy_to_device("row pointer", cuda_matrix.row_pointers, host_matrix.row_pointers, rows_n);
  100.     copy_to_device(
  101.         "column indices",
  102.         cuda_matrix.column_indices,
  103.         host_matrix.column_indices,
  104.         nnz);
  105.  
  106.     // create a matrix description for the cusparse library
  107.     check_cusparse_call("create descriptor", cusparseCreateMatDescr(&descr));
  108.     cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL);
  109.     cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO);
  110.  
  111.     // pre-allocated some vectors for spmv
  112.     typed_cumalloc("input vector", &cuda_csr_mv_in, std::max(width, height));
  113.     // typed_cumalloc("dummy vector", &cuda_csr_mv_dummy, std::max(width, height));
  114.     typed_cumalloc("output vector", &cuda_csr_mv_out, std::max(width, height));
  115. }
  116.  
  117.  
  118. CusparseCSRMatrix::~CusparseCSRMatrix() {
  119.     cudaFree(cuda_csr_mv_in);
  120.     cudaFree(cuda_csr_mv_out);
  121.     cudaFree(cuda_matrix.column_indices);
  122.     cudaFree(cuda_matrix.row_pointers);
  123.     cudaFree(cuda_matrix.csr_values);
  124.     cusparseDestroy(cusparse_handle);
  125.     cudaDeviceReset();
  126. }
  127.  
  128.  
  129. void CusparseCSRMatrix::csr_XTv(double *vector, double *result) const {
  130.     copy_to_device("input vector", cuda_csr_mv_in, vector, height);
  131.     double d_one = 1.0;  // dummy value for alpha
  132.     double d_zero = 0.0;  // dummy value for beta
  133.     std::cerr << "parameters: "
  134.         << "width: " << width
  135.         << ", height: " << height
  136.         << ", nnz: " << nnz
  137.         << std::endl;
  138.     check_return_code("synchronize after input copy", cudaDeviceSynchronize());
  139.     check_cusparse_call(
  140.         "csr_mv",
  141.         cusparseDcsrmv(
  142.             cusparse_handle,
  143.             // CUSPARSE_OPERATION_NON_TRANSPOSE,
  144.             CUSPARSE_OPERATION_TRANSPOSE,
  145.             width,
  146.             height,
  147.             nnz,
  148.             &d_one,
  149.             descr,
  150.             cuda_matrix.csr_values,
  151.             cuda_matrix.row_pointers,
  152.             cuda_matrix.column_indices,
  153.             cuda_csr_mv_in,
  154.             &d_zero,
  155.             cuda_csr_mv_out));
  156.     check_return_code("synchronize after calculation", cudaDeviceSynchronize());
  157.     copy_to_host("output vector", result, cuda_csr_mv_out, width);
  158.     check_return_code("synchronize after output memcopy", cudaDeviceSynchronize());
  159. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement