Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- $ cat t341.cu
- #include <stdlib.h>
- #include <stdio.h>
- #include <cusp/csr_matrix.h>
- #include <cusp/krylov/cg.h>
- #include <cusp/verify.h>
- typedef cusp::device_memory MemorySpace;
- typedef float mytype;
- typedef mytype ValueType;
- /* genTridiag: generate a random tridiagonal symmetric matrix */
- void genTridiag(int *I, int *J, mytype *val, int N, int nz)
- {
- I[0] = 0, J[0] = 0, J[1] = 1;
- val[0] = (mytype)rand()/RAND_MAX + 10.0f;
- val[1] = (mytype)rand()/RAND_MAX;
- int start;
- for (int i = 1; i < N; i++)
- {
- if (i > 1)
- {
- I[i] = I[i-1]+3;
- }
- else
- {
- I[1] = 2;
- }
- start = (i-1)*3 + 2;
- J[start] = i - 1;
- J[start+1] = i;
- if (i < N-1)
- {
- J[start+2] = i + 1;
- }
- val[start] = val[start-1];
- val[start+1] = (mytype)rand()/RAND_MAX + 10.0f;
- if (i < N-1)
- {
- val[start+2] = (mytype)rand()/RAND_MAX;
- }
- }
- I[N] = nz; // last row offset is always num_entries
- }
- int main(int argc, char **argv)
- {
- int N = 0, nz = 0, *I = NULL, *J = NULL;
- const float tol = 1e-9f;
- const int max_iter = 10000;
- mytype *val;
- cudaDeviceProp deviceProp;
- cudaGetDeviceProperties(&deviceProp, 0);
- // Statistics about the GPU device
- printf("> GPU device has %d Multi-Processors, SM %d.%d compute capabilities\n\n",
- deviceProp.multiProcessorCount, deviceProp.major, deviceProp.minor);
- /* Generate a random tridiagonal symmetric matrix in CSR format */
- N = 1048576;
- nz = (N-2)*3 + 4;
- cusp::csr_matrix<int, ValueType, cusp::host_memory> A_h(N, N, nz);
- I = &(A_h.row_offsets[0]);
- J = &(A_h.column_indices[0]);
- val = &(A_h.values[0]);
- genTridiag(I, J, val, N, nz);
- if (!cusp::is_valid_matrix(A_h)) {printf("invalid CSR matrix A_h\n"); return 1;}
- cusp::csr_matrix<int, ValueType, MemorySpace> A = A_h;
- if (!cusp::is_valid_matrix(A)) {printf("invalid CSR matrix A\n"); return 1;}
- // allocate storage for solution (x) and right hand side (b)
- cusp::array1d<ValueType, MemorySpace> x(A.num_rows, 0);
- cusp::array1d<ValueType, MemorySpace> b(A.num_rows, 1);
- // set stopping criteria:
- cusp::verbose_monitor<ValueType> monitor(b, max_iter, tol);
- // set preconditioner (identity)
- cusp::identity_operator<ValueType, MemorySpace> M(A.num_rows, A.num_rows);
- // solve the linear system A * x = b with the Conjugate Gradient method
- cusp::krylov::cg(A, x, b, monitor, M);
- }
- $ nvcc -arch=sm_20 -o t341 t341.cu
- $ ./t341
- > GPU device has 11 Multi-Processors, SM 2.0 compute capabilities
- Solver will continue until residual norm 1.024e-06 or reaching 10000 iterations
- Iteration Number | Residual Norm
- 0 1.024000e+03
- 1 4.449882e+01
- 2 3.245218e+00
- 3 2.690220e-01
- 4 2.307639e-02
- 5 1.993140e-03
- 6 1.846192e-04
- 7 1.693378e-05
- 8 1.600114e-06
- 9 1.543018e-07
- Successfully converged after 9 iterations.
- $
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement