Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <stdlib.h>
- #include <time.h>
- const int ARRAY_SIZE = 2;
- //nvcc -o partone partone.cu
- __global__ void matMult(double** d_out, double** d_in){
- int idx = threadIdx.x;
- /*
- for(int i = 0; i < ARRAY_SIZE; i++){
- d_out[idx][i] = 0;
- for(int j = 0; j < ARRAY_SIZE; j++){
- d_out[idx][i] += d_in[j][idx] * d_in[j][i];
- }
- }*/
- d_out[0][0] = 6.0;
- }
- int main (int argc, char** argv) {
- const int ARRAY_BYTES = ARRAY_SIZE * ARRAY_SIZE * sizeof(double);
- //const double min = 1.0;
- //const double max = 2.0;
- double h_in[ARRAY_SIZE][ARRAY_SIZE];
- double h_out[ARRAY_SIZE][ARRAY_SIZE];
- double** d_in;
- double** d_out;
- //https://ubuntuforums.org/showthread.php?t=1717717&p=10618266#post10618266
- //double range = max - min;
- //double division = RAND_MAX / range;
- for (int i = 0; i < ARRAY_SIZE; i++){
- for(int j = 0; j < ARRAY_SIZE; j++){
- h_in[i][j] = i + j;
- }
- }
- cudaMalloc((void***) &d_in, ARRAY_BYTES);
- cudaMalloc((void***) &d_out, ARRAY_BYTES);
- //transfer the array to the GPU
- //dest, source, bytes, direction
- cudaMemcpy(d_in, h_in, ARRAY_BYTES, cudaMemcpyHostToDevice);
- //launch operator: on 1 block of 64 elements
- matMult <<< 1, ARRAY_SIZE >>> (d_out, d_in);
- cudaMemcpy(h_out, d_out, ARRAY_BYTES, cudaMemcpyDeviceToHost);
- printf("result (0,0): %lf ", h_out[0][0]);
- printf("result (0,1): %lf\n", h_out[0][1]);
- printf("result (1,0): %lf ", h_out[1][0]);
- printf("result (1,1): %lf\n", h_out[1][1]);
- cudaFree(d_in);
- cudaFree(d_out);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement