View difference between Paste ID: <a href="/mnv8xyPw">mnv8xyPw</a> and <a href="/N3pDZWBm">N3pDZWBm</a>

#define USE_NVCC
1		#define USE_NVCC
2
3		#include "cusparse_problem.h"
4		#include <algorithm>
5		#include <iostream>
6		#include <cuda_runtime.h>
7		#include "cusparse_v2.h"
8
9
10		static bool verbose_debug = false;
11
12
13		static void check_return_code(std::string message, cudaError_t status) {
14		if (status != cudaSuccess) {
15		std::cerr << "\x1b[91mError performing operation: " << message
16		<< "; error: " << cudaGetErrorString(status)
17		<< "\x1b[0m" << std::endl;
18		} else if (verbose_debug) {
19		std::cerr << "\x1b[35m" << message + " succeeded\x1b[0m" << std::endl;
20		}
21		}
22
23
24		static void check_cusparse_call(std::string message, cusparseStatus_t status) {
25		if (status != CUSPARSE_STATUS_SUCCESS) {
26		std::cerr << "\x1b[91mError performing operation: " << message
27		<< "\x1b[0m" << std::endl;
28		}
29		}
30
31
32		template<class T>
33		static void copy_to_device(std::string name, T dest, T src, long num) {
34		check_return_code("Copying matrix " + name,
35		cudaMemcpy(dest, src, num * sizeof(T), cudaMemcpyHostToDevice));
36		}
37
38
39		template<class T>
40		static void copy_to_host(std::string name, T dest, T src, long num) {
41		check_return_code("Copying matrix " + name,
42		cudaMemcpy(dest, src, num * sizeof(T), cudaMemcpyDeviceToHost));
43		}
44
45
46		template<class T>
47		static void typed_cumalloc(std::string name, T **dest, long num) {
48		check_return_code(
49		"Allocating " + name,
50		cudaMalloc((void *)(dest), num sizeof(T)));
51		}
52
53
54		CusparseCSRMatrix::CusparseCSRMatrix(const problem *prob_old)
55		: width(prob_old->n), height(prob_old->l)
56		{
57		std::cerr << "initializing cusparse csr" << std::endl;
58
59		csr_matrix *result = new csr_matrix;
60
61		this->nnz = 0; // non-zero values
62		for(int i=0; i < prob_old->l; i++) {
63		feature_node *s = prob_old->x[i];
64		while(s->index!=-1) {
65		nnz += 1;
66		s++;
67		}
68		}
69		std::cerr << "\x1b[94mNum non-zero values: " << nnz << "\x1b[0m" << std::endl;
70
71		int rows_n = prob_old->l + 1;
72		host_matrix.csr_values = new double[nnz];
73		host_matrix.row_pointers = new int[rows_n];
74		host_matrix.column_indices = new int[nnz];
75
76		// fill values
77		int nnz_index = 0;
78		for (int i = 0; i < prob_old->l; i++) {
79		feature_node *s = prob_old->x[i];
80		host_matrix.row_pointers[i] = nnz_index;
81		while(s->index!=-1) {
82		host_matrix.csr_values[nnz_index] = s->value;
83		host_matrix.column_indices[nnz_index] = s->index - 1;
84		nnz_index += 1;
85		s++;
86		}
87		}
88		host_matrix.row_pointers[prob_old->l] = nnz_index;
89
90		// initialize cusparse
91		check_cusparse_call("cusparse initialization", cusparseCreate(&cusparse_handle));
92
93		// copy to cuda
94		typed_cumalloc("values array", &(cuda_matrix.csr_values), nnz);
95		typed_cumalloc("row pointer array", &(cuda_matrix.row_pointers), rows_n);
96		typed_cumalloc("column indices", &(cuda_matrix.column_indices), nnz);
97
98		copy_to_device("csr values", cuda_matrix.csr_values, host_matrix.csr_values, nnz);
99		copy_to_device("row pointer", cuda_matrix.row_pointers, host_matrix.row_pointers, rows_n);
100		copy_to_device(
101		"column indices",
102		cuda_matrix.column_indices,
103		host_matrix.column_indices,
104		nnz);
105
106		// create a matrix description for the cusparse library
107		check_cusparse_call("create descriptor", cusparseCreateMatDescr(&descr));
108		cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL);
109		cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO);
110
111		// pre-allocated some vectors for spmv
112		typed_cumalloc("input vector", &cuda_csr_mv_in, std::max(width, height));
113		// typed_cumalloc("dummy vector", &cuda_csr_mv_dummy, std::max(width, height));
114		typed_cumalloc("output vector", &cuda_csr_mv_out, std::max(width, height));
115		}
116
117
118		CusparseCSRMatrix::~CusparseCSRMatrix() {
119		cudaFree(cuda_csr_mv_in);
120		cudaFree(cuda_csr_mv_out);
121		cudaFree(cuda_matrix.column_indices);
122		cudaFree(cuda_matrix.row_pointers);
123		cudaFree(cuda_matrix.csr_values);
124		cusparseDestroy(cusparse_handle);
125		cudaDeviceReset();
126		}
127
128
129		void CusparseCSRMatrix::csr_XTv(double vector, double result) const {
130		copy_to_device("input vector", cuda_csr_mv_in, vector, height);
131		double d_one = 1.0; // dummy value for alpha
132		double d_zero = 0.0; // dummy value for beta
133		std::cerr << "parameters: "
134		<< "width: " << width
135		<< ", height: " << height
136		<< ", nnz: " << nnz
137		<< std::endl;
138		check_return_code("synchronize after input copy", cudaDeviceSynchronize());
139		check_cusparse_call(
140		"csr_mv",
141		cusparseDcsrmv(
142		cusparse_handle,
143		// CUSPARSE_OPERATION_NON_TRANSPOSE,
144		CUSPARSE_OPERATION_TRANSPOSE,
145		width,
146		height,
147		nnz,
148		&d_one,
149		descr,
150		cuda_matrix.csr_values,
151		cuda_matrix.row_pointers,
152		cuda_matrix.column_indices,
153		cuda_csr_mv_in,
154		&d_zero,
155		cuda_csr_mv_out));
156		check_return_code("synchronize after calculation", cudaDeviceSynchronize());
157		copy_to_host("output vector", result, cuda_csr_mv_out, width);
158		check_return_code("synchronize after output memcopy", cudaDeviceSynchronize());
159		}