Advertisement
djrichardson

Untitled

Apr 29th, 2014
364
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.16 KB | None | 0 0
  1. #include <cub/cub.cuh>
  2. #include <stdio.h>
  3. #include <stdlib.h>
  4. #include <iostream>
  5. using std::cout;
  6. using std::endl;
  7.  
  8. void dev_cumsum (const float *dev_inData, float *dev_outData, int n) {
  9. // Set up the working space on the device
  10. void* dev_temp_storage = NULL;
  11. size_t temp_storage_bytes = 0;
  12. cub::DeviceScan::ExclusiveSum(dev_temp_storage, temp_storage_bytes, const_cast<float*>(dev_inData), dev_outData, n);
  13. cudaMalloc(&dev_temp_storage, temp_storage_bytes);
  14.  
  15. // Run the calculation
  16. cudaError_t error = cub::DeviceScan::ExclusiveSum(dev_temp_storage, temp_storage_bytes, const_cast<float*>(dev_inData), dev_outData, n);
  17. cudaFree(dev_temp_storage);
  18.  
  19. // Abort on error
  20. if (error != 0) {
  21. cout << "Aborting with error: " << error << endl;
  22. exit(-1);
  23. }
  24. }
  25.  
  26. int test (float* host, float* device, unsigned int length) {
  27. for (unsigned int i = 0; i < length; i++) {
  28. if (host[i] != device[i]) {
  29. return i;
  30. }
  31. }
  32. return -1;
  33. }
  34.  
  35. int main(){
  36.  
  37. for (unsigned int i = 72; i < 1024; i++) {
  38. float raw_data[i];
  39. float simple_calculation[i];
  40. float device_calculation[i];
  41.  
  42. raw_data[0] = rand()%10;
  43. simple_calculation[0] = 0;
  44. for (unsigned int j = 1; j < i; j++) {
  45. raw_data[j] = rand()%10;
  46. simple_calculation[j] = raw_data[j-1] + simple_calculation[j-1];
  47. }
  48.  
  49. size_t size = i * sizeof(float);
  50.  
  51. float* device_input;
  52. cudaMalloc(&device_input, size);
  53. cudaMemcpy(device_input, raw_data, size, cudaMemcpyHostToDevice);
  54.  
  55. float* device_output;
  56. cudaMalloc(&device_output, size);
  57. dev_cumsum(device_input, device_output, i);
  58. cudaMemcpy(device_calculation, device_output, size, cudaMemcpyDeviceToHost);
  59.  
  60. cudaFree(device_input);
  61. cudaFree(device_output);
  62.  
  63. int failIndex = test(simple_calculation, device_calculation, i);
  64.  
  65. if (failIndex < 0) {
  66. cout << i << " succeeded" << endl;
  67. }
  68. else {
  69. cout << i << " failed in position " << failIndex << endl;
  70. }
  71. }
  72. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement