Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #define PRINTERROR(err) {if(err != cudaSuccess) printf("CUDA ERROR IN LINE %d OF FILE %s: %s (%d)\n", __LINE__, __FILE__, cudaGetErrorString(err), (int)err);}
- #define SAFECALL(fun) {cudaError_t err = fun; PRINTERROR(err);}
- #define SAFECALL_KERNEL(fun) {(fun); cudaError_t err = cudaGetLastError(); PRINTERROR(err);}
- float *ADev, *BDev, *CDev;
- float *AHost, *BHost, *CHost;
- float** matricesHost[3] = {&AHost, &BHost, &CHost};
- float** matricesDevice[3] = {&ADev, &BDev, &CDev};
- void initHostMatrices (float*** f, int matrixSize)
- {
- for (int i = 0; i < 3; i++)
- {
- *(*(f+i)) = (float*)malloc(matrixSize*matrixSize*sizeof(float));
- }
- for (int i = 0; i < matrixSize*matrixSize; i++)
- {
- float* A = *(*f);
- float* B = *(*(f+1));
- *(A + i) = ((float)rand())/RAND_MAX;
- *(B + i) = ((float)rand())/RAND_MAX;
- }
- }
- void freeHostMatrices (float*** f)
- {
- for (int i = 0; i < 3; i++) free(*f[i]);
- }
- void initDeviceMatrices (float*** f, int matrixSize)
- {
- for (int i = 0; i < 3; i++)
- {
- SAFECALL(cudaMalloc(*(f+i), matrixSize*matrixSize*sizeof(float)));
- }
- }
- void freeDeviceMatrices (float*** f)
- {
- for (int i = 0; i < 3; i++) SAFECALL(cudaFree(*f[i]));
- }
- void matrixVerification(int matrixSize)
- {
- float* CVerify = (float*)malloc(matrixSize*matrixSize*sizeof(float));
- for (int i = 0; i < matrixSize; i++)
- for (int j = 0; j < matrixSize; j++)
- {
- CVerify[i*matrixSize + j] = 0;
- for (int k = 0; k < matrixSize; k++)
- CVerify[i*matrixSize + j] += AHost[i*matrixSize + k]*BHost[k*matrixSize + j];
- }
- for (int i = 0; i < matrixSize; i++)
- {
- for (int j = 0; j < matrixSize; j++)
- {
- printf("%s ",( CVerify[i*matrixSize + j] - CHost[i*matrixSize + j] < 0.001)?"P":"F");
- }
- printf("\n");
- }
- system("pause");
- }
- int main()
- {
- srand((unsigned int)time(NULL));
- SAFECALL(cudaSetDevice(0));
- int matrixSize = 7;
- {
- initHostMatrices(matricesHost, matrixSize);
- initDeviceMatrices(matricesDevice, matrixSize);
- SAFECALL(cudaMemcpy(ADev, AHost, matrixSize*matrixSize*sizeof(float), cudaMemcpyHostToDevice));
- SAFECALL(cudaMemcpy(BDev, BHost, matrixSize*matrixSize*sizeof(float), cudaMemcpyHostToDevice));
- dim3 numBlocks(2, 2);
- dim3 numThreads(2, 2);
- SAFECALL_KERNEL((cuMultiplyMatricesStandard<<<numBlocks, numThreads>>>(ADev, BDev, CDev, matrixSize)));
- SAFECALL(cudaMemcpy(CHost, CDev, matrixSize*matrixSize*sizeof(float), cudaMemcpyDeviceToHost));
- matrixVerification(matrixSize);
- freeHostMatrices(matricesHost);
- freeDeviceMatrices(matricesDevice);
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement