Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include <utility>
- #include <cuda_runtime.h>
- using namespace std;
- /*
- * Fake destructor for C-like resources, to cope with exceptions. E.g.:
- * void* array = malloc(123);
- * destructor([=]{ free(array); });
- */
- #define destructor(f) destructor_helper_macro_1(f, __LINE__)
- template<typename F> struct destructor_helper{
- F f;
- ~destructor_helper(){ f(); }
- };
- template<typename F> destructor_helper<F> make_destructor_helper(F&& f){
- return destructor_helper<F>{std::move(f)};
- }
- #define destructor_helper_macro_2(f, l) auto destructor_ ## l = make_destructor_helper(f)
- #define destructor_helper_macro_1(f, l) destructor_helper_macro_2(f, l)
- #define cudacheckpoint_(file, line) { cudaDeviceSynchronize() assertcu; std::cerr<<"cudacheckpoint "<<file<<":"<<line<<std::endl; }
- #define cudacheckpoint cudacheckpoint_(__FILE__, __LINE__)
- /*
- * Cuda errors to exceptions translation.
- * Append to cuda/cufft calls assertcu/assertcufft, e.g. cudaMemcpy(...) assertcu;
- */
- #define assertcufft >>assertcufft_helper()
- #define assertcu >>assertcu_helper()
- #include <stdexcept>
- #include <string>
- struct cuda_error : std::runtime_error{
- cuda_error(cudaError err): std::runtime_error(cudaGetErrorString(err)) {}
- protected:
- cuda_error(std::string&& err): std::runtime_error(std::move(err)) {}
- };
- struct assertcu_helper{};
- inline int operator>>(cudaError ret, assertcu_helper&&){
- return ret == cudaSuccess ? cudaSuccess : throw cuda_error(ret);
- }
- template<typename T> T get_device_object(const T& on_device, cudaStream_t stream){
- T on_host;
- cudaMemcpyFromSymbolAsync((void*)&on_host, (const void*)&on_device, sizeof(T), 0, cudaMemcpyDeviceToHost, stream) assertcu;
- cudaStreamSynchronize(stream) assertcu;
- return on_host;
- }
- #define block_size 256
- __global__ void fillmem(int* ints){
- size_t offset = size_t(blockIdx.x) * blockDim.x + threadIdx.x;
- ints[offset] = 42;
- }
- __device__ bool ok = true;
- __global__ void validate(const int* ints){
- size_t offset = size_t(blockIdx.x) * blockDim.x + threadIdx.x;
- if(ints[offset] != 42) ok = false;
- }
- int main(int argc, char *argv[]){
- int offset = 2;
- if (argc == 2) {
- offset = atoi(argv[1]);
- }
- unsigned int total_ints = (((1024ULL * 1024ULL * 1024ULL * 2) / 4)+offset);
- cudaDeviceProp deviceProp;
- cudaGetDeviceProperties(&deviceProp, 0) assertcu;
- if(deviceProp.major < 3) throw runtime_error("required sm >= 3.0"); //for large grid size in x dimension
- cudaStream_t stream = 0;
- destructor([&]{ cudaStreamDestroy(stream); });
- cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking) assertcu;
- int* ints = 0;
- destructor([&]{ cudaFree(ints); });
- cudaMalloc(&ints, total_ints * 4) assertcu;
- destructor(cudaDeviceSynchronize);
- cudaMemsetAsync(ints, 0, total_ints * 4, stream) assertcu;
- cudaDeviceSynchronize();
- fillmem<<<total_ints / block_size, block_size, 0, stream>>>(ints);
- cudaGetLastError() assertcu;
- validate<<<total_ints / block_size, block_size, 0, stream>>>(ints);
- cudaGetLastError() assertcu;
- if(!get_device_object(ok, stream)) {
- std::cout << offset << " failed\n";
- //throw logic_error("validation failed");
- } else {
- std::cout << offset << " ok\n";
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement