Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # (Approximate command-line. Settings inherited from host are not visible below.)
- # (Please see the output window after a build for the full command-line)
- # Driver API (NVCC Compilation Type is .cubin, .gpu, or .ptx)
- set CUDAFE_FLAGS=--sdk_dir "C:\Program Files (x86)\Windows Kits\8.1\"
- "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\bin\nvcc.exe" --use-local-env --cl-version 2013 -ccbin "C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\bin" -I"D:\Software\Dropbox\Skola\Master's Courses\DH2323 Computer Graphics and Interaction\Project\CUDA_udacity\CUDA_udacity\include" -G --keep-dir Debug -maxrregcount=0 --machine 32 --compile -cudart static -o Debug\%(Filename)%(Extension).obj "%(FullPath)"
- # Runtime API (NVCC Compilation Type is hybrid object or .c file)
- set CUDAFE_FLAGS=--sdk_dir "C:\Program Files (x86)\Windows Kits\8.1\"
- "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\bin\nvcc.exe" --use-local-env --cl-version 2013 -ccbin "C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\bin" -I"D:\Software\Dropbox\Skola\Master's Courses\DH2323 Computer Graphics and Interaction\Project\CUDA_udacity\CUDA_udacity\include" -G --keep-dir Debug -maxrregcount=0 --machine 32 --compile -cudart static -g -Xcompiler "/EHsc /nologo /Od /Zi " -o Debug\%(Filename)%(Extension).obj "%(FullPath)"
- //source code below
- //test.cu
- #include "test.h"
- template <typename T>
- __global__ void add(T* d_out, const T* d_a, const T* d_b)
- {
- /* Adds arrays d_a and d_b and outputs the result to array d_out.
- *
- */
- int i = threadIdx.x;
- d_out[i] = d_a[i] + d_b[i];
- }
- //test.h
- #include <stdio.h>
- #include <iostream>
- #include <time.h>
- #include <cuda.h>
- #include "cuda_runtime.h"
- #include "device_launch_parameters.h"
- template <typename T>
- __global__ void add(T* d_out, const T* d_a, const T* d_b);
- //main.cu
- #include <stdio.h>
- #include <iostream>
- #include <time.h>
- #include <cuda.h>
- #include "cuda_runtime.h"
- #include "device_launch_parameters.h"
- #include "test.h"
- using namespace std;
- int main(int argc, char **argv)
- {
- const int ARRAY_SIZE = 1024;
- const int ARRAY_BYTES = sizeof(float) * ARRAY_SIZE;
- float h_a[ARRAY_SIZE];
- float h_b[ARRAY_SIZE];
- for (int i = 0; i < ARRAY_SIZE; i++)
- {
- h_a[i] = i / 2.0f;
- h_b[i] = (i + 1) / 3.0f;
- }
- float h_out[ARRAY_SIZE];
- float* d_a;
- float* d_b;
- float* d_out;
- //clock_t t;
- //t = clock();
- cudaMalloc(&d_a, ARRAY_BYTES);
- cudaMalloc(&d_b, ARRAY_BYTES);
- cudaMalloc(&d_out, ARRAY_BYTES);
- cudaMemcpy(d_a, h_a, ARRAY_BYTES, cudaMemcpyHostToDevice);
- cudaMemcpy(d_b, h_b, ARRAY_BYTES, cudaMemcpyHostToDevice);
- add <<<1, ARRAY_SIZE>>>(d_out, d_a, d_b);
- cudaMemcpy(h_out, d_out, ARRAY_BYTES, cudaMemcpyDeviceToHost);
- //cout << clock() - t << endl;
- std::cin.get();
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement