View difference between Paste ID: mnv8xyPw and N3pDZWBm
SHOW: | | - or go back to the newest paste.
1
#define USE_NVCC
2
3
#include "cusparse_problem.h"
4
#include <algorithm>
5
#include <iostream>
6
#include <cuda_runtime.h>
7
#include "cusparse_v2.h"
8
9
10
static bool verbose_debug = false;
11
12
13
static void check_return_code(std::string message, cudaError_t status) {
14
    if (status != cudaSuccess) {
15
        std::cerr << "\x1b[91mError performing operation: " << message
16
            << "; error: " << cudaGetErrorString(status)
17
            << "\x1b[0m" << std::endl;
18
    } else if (verbose_debug) {
19
        std::cerr << "\x1b[35m" << message + " succeeded\x1b[0m" << std::endl;
20
    }
21
}
22
23
24
static void check_cusparse_call(std::string message, cusparseStatus_t status) {
25
    if (status != CUSPARSE_STATUS_SUCCESS) {
26
        std::cerr << "\x1b[91mError performing operation: " << message
27
            << "\x1b[0m" << std::endl;
28
    }
29
}
30
31
32
template<class T>
33
static void copy_to_device(std::string name, T *dest, T *src, long num) {
34
    check_return_code("Copying matrix " + name,
35
        cudaMemcpy(dest, src, num * sizeof(T), cudaMemcpyHostToDevice));
36
}
37
38
39
template<class T>
40
static void copy_to_host(std::string name, T *dest, T *src, long num) {
41
    check_return_code("Copying matrix " + name,
42
        cudaMemcpy(dest, src, num * sizeof(T), cudaMemcpyDeviceToHost));
43
}
44
45
46
template<class T>
47
static void typed_cumalloc(std::string name, T **dest, long num) {
48
    check_return_code(
49
        "Allocating " + name,
50
        cudaMalloc((void **)(dest), num * sizeof(T)));
51
}
52
53
54
CusparseCSRMatrix::CusparseCSRMatrix(const problem *prob_old)
55
        : width(prob_old->n), height(prob_old->l)
56
{
57
    std::cerr << "initializing cusparse csr" << std::endl;
58
59
	csr_matrix *result = new csr_matrix;
60
61
	this->nnz = 0;  // non-zero values
62
	for(int i=0; i < prob_old->l; i++) {
63
		feature_node *s = prob_old->x[i];
64
		while(s->index!=-1) {
65
			nnz += 1;
66
			s++;
67
		}
68
	}
69
	std::cerr << "\x1b[94mNum non-zero values: " << nnz << "\x1b[0m" << std::endl;
70
71
    int rows_n = prob_old->l + 1;
72
	host_matrix.csr_values = new double[nnz];
73
	host_matrix.row_pointers = new int[rows_n];
74
	host_matrix.column_indices = new int[nnz];
75
76
	// fill values
77
	int nnz_index = 0;
78
	for (int i = 0; i < prob_old->l; i++) {
79
		feature_node *s = prob_old->x[i];
80
		host_matrix.row_pointers[i] = nnz_index;
81
		while(s->index!=-1) {
82
			host_matrix.csr_values[nnz_index] = s->value;
83
			host_matrix.column_indices[nnz_index] = s->index - 1;
84
			nnz_index += 1;
85
			s++;
86
		}
87
	}
88
	host_matrix.row_pointers[prob_old->l] = nnz_index;
89
90
    // initialize cusparse
91
    check_cusparse_call("cusparse initialization", cusparseCreate(&cusparse_handle));
92
93
    // copy to cuda
94
    typed_cumalloc("values array", &(cuda_matrix.csr_values), nnz);
95
    typed_cumalloc("row pointer array", &(cuda_matrix.row_pointers), rows_n);
96
    typed_cumalloc("column indices", &(cuda_matrix.column_indices), nnz);
97
98
    copy_to_device("csr values", cuda_matrix.csr_values, host_matrix.csr_values, nnz);
99
    copy_to_device("row pointer", cuda_matrix.row_pointers, host_matrix.row_pointers, rows_n);
100
    copy_to_device(
101
        "column indices",
102
        cuda_matrix.column_indices,
103
        host_matrix.column_indices,
104
        nnz);
105
106
    // create a matrix description for the cusparse library
107
    check_cusparse_call("create descriptor", cusparseCreateMatDescr(&descr));
108
    cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL);
109
    cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO);
110
111
    // pre-allocated some vectors for spmv
112
    typed_cumalloc("input vector", &cuda_csr_mv_in, std::max(width, height));
113
    // typed_cumalloc("dummy vector", &cuda_csr_mv_dummy, std::max(width, height));
114
    typed_cumalloc("output vector", &cuda_csr_mv_out, std::max(width, height));
115
}
116
117
118
CusparseCSRMatrix::~CusparseCSRMatrix() {
119
    cudaFree(cuda_csr_mv_in);
120
    cudaFree(cuda_csr_mv_out);
121
    cudaFree(cuda_matrix.column_indices);
122
    cudaFree(cuda_matrix.row_pointers);
123
    cudaFree(cuda_matrix.csr_values);
124
    cusparseDestroy(cusparse_handle);
125
    cudaDeviceReset();
126
}
127
128
129
void CusparseCSRMatrix::csr_XTv(double *vector, double *result) const {
130
    copy_to_device("input vector", cuda_csr_mv_in, vector, height);
131
    double d_one = 1.0;  // dummy value for alpha
132
    double d_zero = 0.0;  // dummy value for beta
133
    std::cerr << "parameters: "
134
        << "width: " << width
135
        << ", height: " << height
136
        << ", nnz: " << nnz
137
        << std::endl;
138
    check_return_code("synchronize after input copy", cudaDeviceSynchronize());
139
    check_cusparse_call(
140
        "csr_mv",
141
        cusparseDcsrmv(
142
            cusparse_handle,
143
            // CUSPARSE_OPERATION_NON_TRANSPOSE,
144
            CUSPARSE_OPERATION_TRANSPOSE,
145
            width,
146
            height,
147
            nnz,
148
            &d_one,
149
            descr,
150
            cuda_matrix.csr_values,
151
            cuda_matrix.row_pointers,
152
            cuda_matrix.column_indices,
153
            cuda_csr_mv_in,
154
            &d_zero,
155
            cuda_csr_mv_out));
156
    check_return_code("synchronize after calculation", cudaDeviceSynchronize());
157
    copy_to_host("output vector", result, cuda_csr_mv_out, width);
158
    check_return_code("synchronize after output memcopy", cudaDeviceSynchronize());
159
}