Advertisement
Guest User

Untitled

a guest
Feb 9th, 2014
177
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.22 KB | None | 0 0
  1. $ cat t341.cu
  2. #include <stdlib.h>
  3. #include <stdio.h>
  4.  
  5. #include <cusp/csr_matrix.h>
  6. #include <cusp/krylov/cg.h>
  7. #include <cusp/verify.h>
  8.  
  9. typedef cusp::device_memory MemorySpace;
  10.  
  11. typedef float mytype;
  12. typedef mytype ValueType;
  13.  
  14.  
  15. /* genTridiag: generate a random tridiagonal symmetric matrix */
  16. void genTridiag(int *I, int *J, mytype *val, int N, int nz)
  17. {
  18. I[0] = 0, J[0] = 0, J[1] = 1;
  19. val[0] = (mytype)rand()/RAND_MAX + 10.0f;
  20. val[1] = (mytype)rand()/RAND_MAX;
  21. int start;
  22.  
  23. for (int i = 1; i < N; i++)
  24. {
  25. if (i > 1)
  26. {
  27. I[i] = I[i-1]+3;
  28. }
  29. else
  30. {
  31. I[1] = 2;
  32. }
  33.  
  34. start = (i-1)*3 + 2;
  35. J[start] = i - 1;
  36. J[start+1] = i;
  37.  
  38. if (i < N-1)
  39. {
  40. J[start+2] = i + 1;
  41. }
  42.  
  43. val[start] = val[start-1];
  44. val[start+1] = (mytype)rand()/RAND_MAX + 10.0f;
  45.  
  46. if (i < N-1)
  47. {
  48. val[start+2] = (mytype)rand()/RAND_MAX;
  49. }
  50. }
  51.  
  52. I[N] = nz; // last row offset is always num_entries
  53. }
  54.  
  55. int main(int argc, char **argv)
  56. {
  57. int N = 0, nz = 0, *I = NULL, *J = NULL;
  58. const float tol = 1e-9f;
  59. const int max_iter = 10000;
  60. mytype *val;
  61. cudaDeviceProp deviceProp;
  62. cudaGetDeviceProperties(&deviceProp, 0);
  63.  
  64. // Statistics about the GPU device
  65. printf("> GPU device has %d Multi-Processors, SM %d.%d compute capabilities\n\n",
  66. deviceProp.multiProcessorCount, deviceProp.major, deviceProp.minor);
  67.  
  68.  
  69. /* Generate a random tridiagonal symmetric matrix in CSR format */
  70. N = 1048576;
  71. nz = (N-2)*3 + 4;
  72. cusp::csr_matrix<int, ValueType, cusp::host_memory> A_h(N, N, nz);
  73. I = &(A_h.row_offsets[0]);
  74. J = &(A_h.column_indices[0]);
  75. val = &(A_h.values[0]);
  76. genTridiag(I, J, val, N, nz);
  77.  
  78. if (!cusp::is_valid_matrix(A_h)) {printf("invalid CSR matrix A_h\n"); return 1;}
  79. cusp::csr_matrix<int, ValueType, MemorySpace> A = A_h;
  80. if (!cusp::is_valid_matrix(A)) {printf("invalid CSR matrix A\n"); return 1;}
  81. // allocate storage for solution (x) and right hand side (b)
  82. cusp::array1d<ValueType, MemorySpace> x(A.num_rows, 0);
  83. cusp::array1d<ValueType, MemorySpace> b(A.num_rows, 1);
  84. // set stopping criteria:
  85. cusp::verbose_monitor<ValueType> monitor(b, max_iter, tol);
  86. // set preconditioner (identity)
  87. cusp::identity_operator<ValueType, MemorySpace> M(A.num_rows, A.num_rows);
  88. // solve the linear system A * x = b with the Conjugate Gradient method
  89. cusp::krylov::cg(A, x, b, monitor, M);
  90.  
  91. }
  92. $ nvcc -arch=sm_20 -o t341 t341.cu
  93. $ ./t341
  94. > GPU device has 11 Multi-Processors, SM 2.0 compute capabilities
  95.  
  96. Solver will continue until residual norm 1.024e-06 or reaching 10000 iterations
  97. Iteration Number | Residual Norm
  98. 0 1.024000e+03
  99. 1 4.449882e+01
  100. 2 3.245218e+00
  101. 3 2.690220e-01
  102. 4 2.307639e-02
  103. 5 1.993140e-03
  104. 6 1.846192e-04
  105. 7 1.693378e-05
  106. 8 1.600114e-06
  107. 9 1.543018e-07
  108. Successfully converged after 9 iterations.
  109. $
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement