Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <cuda.h>
- __global__ void incr(float *a, int N){
- int idx = blockIdx.x * blockDim.x + threadIdx.x;
- if(idx < N){
- a[idx] *= 10;
- }
- }
- int main(void)
- {
- float *a_h, *a_d;
- const int N = 100;
- size_t size = N*sizeof(float);
- a_h = (float *)malloc(size);
- cudaMalloc( (void**) &a_d, size);
- for(int i=0; i<N; ++i){
- a_h[i] = (float)i;
- }
- cudaMemcpy(a_d, a_h, size, cudaMemcpyHostToDevice);
- int block_size = 4;
- int n_blocks = N/block_size + (N%block_size == 0 ? 0:1);
- incr <<<n_blocks, block_size>>> (a_d, N);
- cudaMemcpy(a_h, a_d, size, cudaMemcpyDeviceToHost);
- for(int i=0; i<N; ++i){
- printf("%d: %3.0f\n", i, a_h[i]);
- }
- free(a_h); cudaFree(a_d);
- system("Pause");
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement