Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- $ cat t712.cu
- #include <iostream>
- #define ROWS 20
- #define COLS 10
- __global__ void kernel(const unsigned int size, float* matrix, const float* vector)
- {
- // get the current element index for the thread
- unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
- if (idx < size)
- {
- // sum the current element with the
- matrix[idx] += vector[threadIdx.x];
- }
- }
- int main(){
- float *h_mat, *d_mat, *h_vec, *d_vec;
- const unsigned int msz = ROWS*COLS*sizeof(float);
- const unsigned int vsz = COLS*sizeof(float);
- h_mat = (float *)malloc(msz);
- h_vec = (float *)malloc(vsz);
- cudaMalloc(&d_mat, msz);
- cudaMalloc(&d_vec, vsz);
- cudaMemset(d_mat, 0, msz); // set matrix to zero
- for (int i=0; i<COLS; i++) h_vec[i] = i;
- cudaMemcpy(d_vec, h_vec, vsz, cudaMemcpyHostToDevice);
- kernel<<<ROWS,COLS>>>(ROWS*COLS, d_mat, d_vec);
- cudaMemcpy(h_mat, d_mat, msz, cudaMemcpyDeviceToHost);
- for (int i = 0; i < ROWS; i++){
- for (int j = 0; j < COLS; j++) std::cout << h_mat[i*COLS+j] << " ";
- std::cout << std::endl;}
- return 0;
- }
- $ nvcc -o t712 t712.cu
- $ cuda-memcheck ./t712
- ========= CUDA-MEMCHECK
- 0 1 2 3 4 5 6 7 8 9
- 0 1 2 3 4 5 6 7 8 9
- 0 1 2 3 4 5 6 7 8 9
- 0 1 2 3 4 5 6 7 8 9
- 0 1 2 3 4 5 6 7 8 9
- 0 1 2 3 4 5 6 7 8 9
- 0 1 2 3 4 5 6 7 8 9
- 0 1 2 3 4 5 6 7 8 9
- 0 1 2 3 4 5 6 7 8 9
- 0 1 2 3 4 5 6 7 8 9
- 0 1 2 3 4 5 6 7 8 9
- 0 1 2 3 4 5 6 7 8 9
- 0 1 2 3 4 5 6 7 8 9
- 0 1 2 3 4 5 6 7 8 9
- 0 1 2 3 4 5 6 7 8 9
- 0 1 2 3 4 5 6 7 8 9
- 0 1 2 3 4 5 6 7 8 9
- 0 1 2 3 4 5 6 7 8 9
- 0 1 2 3 4 5 6 7 8 9
- 0 1 2 3 4 5 6 7 8 9
- ========= ERROR SUMMARY: 0 errors
- $
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement