Advertisement
Guest User

ZHEGALKIN_2.0

a guest
May 29th, 2017
56
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 2.06 KB | None | 0 0
  1. #include "cuda_runtime.h"
  2. #include "device_launch_parameters.h"
  3. #include <stdio.h>
  4. #include <iostream>
  5. #include <fstream>
  6.  
  7. #include <cuda_runtime.h>
  8. #include <cufft.h>
  9. #include <cufftXt.h>
  10.  
  11.  
  12. using namespace std;
  13. #define vectors 100000
  14. #define N L*vectors
  15. #define L 8
  16.  
  17. __global__ void test(long A[N][L])
  18. {
  19.     int b = threadIdx.x + blockIdx.x * blockDim.x;
  20.    
  21.     if (b < vectors) {
  22.         int popa = L * b + 1;
  23.         for (int i = popa; i < popa + L - 1; i++) {
  24.             for (int j = 0; j < L - 1; j++) {
  25.                 A[i][j] = A[i - 1][j] ^ A[i - 1][j + 1];
  26.             }
  27.         }
  28.     }
  29. }
  30.  
  31. int main()
  32. {
  33.     static long A[N][L];
  34.     long(*d_A)[L]; //pointers to arrays of dimension N
  35.  
  36.     cudaEvent_t start, stop;
  37.     cudaEventCreate(&start);
  38.     cudaEventCreate(&stop);
  39.  
  40.     ofstream myfileout;
  41.     myfileout.open("D:\\out.txt");
  42.  
  43.     ofstream myfileinput;
  44.     myfileinput.open("D:\\in.txt");
  45.    
  46.     for (int i = 0; i < N; i++) {
  47.         for (int j = 0; j < L; j++) {
  48.             A[i][j] = 0;
  49.         }
  50.     }
  51.  
  52.     for (int i = 0; i < N; i = i + 8) {
  53.         A[i][0] = 1;
  54.         A[i][2] = 1;
  55.         A[i][4] = 1;
  56.         A[i][7] = 1;
  57.     }
  58.     /*
  59.     for (int i = 0; i < N; i++) {
  60.         if (i % 8 == 0) {
  61.             myfileinput << endl;
  62.         }
  63.         myfileinput << endl;
  64.         for (int j = 0; j < L; j++) {
  65.             myfileinput << A[i][j];
  66.            
  67.         }
  68.     }
  69.     */
  70.     //allocation
  71.     cudaMalloc((void**)&d_A, (N*L)*sizeof(float));
  72.  
  73.     //copying from host to device
  74.     cudaMemcpy(d_A, A, (N*L)*sizeof(float), cudaMemcpyHostToDevice);
  75.  
  76.     // Start record
  77.     cudaEventRecord(start, 0);
  78.     // Kernel invocation
  79.     test << <512, 256>> >(d_A);
  80.  
  81.     cudaEventRecord(stop, 0);
  82.     cudaEventSynchronize(stop);
  83.     float elapsedTime;
  84.     cudaEventElapsedTime(&elapsedTime, start, stop); // that's our time!
  85.    
  86.     //copying from device to host
  87.     cudaMemcpy(A, (d_A), (N*L)*sizeof(float), cudaMemcpyDeviceToHost);
  88.     cout  << "GPU Time [mks] " << elapsedTime*1000 << endl;
  89.     for (int i = N-L; i < N; i++) {
  90.         if (i % 8 == 0) {
  91.             myfileout << endl;
  92.         }
  93.         myfileout << endl;
  94.         for (int j = 0; j < L; j++) {
  95.             myfileout << A[i][j];
  96.  
  97.         }
  98.     }
  99.  
  100.     myfileout.close();
  101.     myfileinput.close();
  102.     cudaEventDestroy(start);
  103.     cudaEventDestroy(stop);
  104.  
  105. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement