Advertisement
Guest User

D3D11_CUDA_interop

a guest
Oct 20th, 2023
254
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 34.85 KB | Source Code | 0 0
  1. // Include the basic windows header files and the Direct3D header files
  2. #include <windows.h>
  3. #include <windowsx.h>
  4. #include <d3d11.h>                      // Direct3D functionality
  5. #include <DirectXMath.h>                // Use this since D3DX has been deprecated
  6. #include <iostream>
  7. #include <stdexcept>                    // For exceptions
  8. #include <dxgi.h>
  9. #include <cuda_runtime_api.h>           // Runtime CUDA API offers better code management but less control compared the the CUDA driver API
  10. #include <cuda_d3d11_interop.h>         // Provides facilities for registering and mapping graphics resources (among others) from/to Direct3D to/from CUDA context
  11. #include <helper_cuda.h>
  12. #include "load_sample.h"
  13.  
  14. // Source (Depth stencil): https://www.youtube.com/watch?v=NLic7ipz4xE&list=PLcacUGyBsOIBlGyQQWzp6D1Xn6ZENx9Y2&index=17
  15. // Source (ImGui Setup): https://www.youtube.com/watch?v=Btx_tujnyB4&list=PLcacUGyBsOIBlGyQQWzp6D1Xn6ZENx9Y2&index=35
  16. // Source (ImGui Usage): https://www.youtube.com/watch?v=o5sJClp0HDY&list=PLcacUGyBsOIBlGyQQWzp6D1Xn6ZENx9Y2&index=36
  17. #include <imgui.h>              // ImGUI general
  18. #include <imgui_impl_win32.h>   // ImGUI Windows integration
  19. #include <imgui_impl_dx11.h>    // ImGUI Direct3D integration
  20.  
  21. #define WIN_WIDTH 600
  22. #define WIN_HEIGHT 600
  23.  
  24. // Include the Direct3D Library file
  25. // NOTE: No required, since CMake links the library
  26. //#pragma comment (lib, "d3d11.lib")
  27.  
  28. // Global declarations
  29. IDXGIAdapter* adapter = nullptr;                            // Video adapter
  30. IDXGISwapChain* swapchain = nullptr;                        // Swap chain interface
  31. ID3D11Device* device = nullptr;                             // D3D device interface
  32. ID3D11DeviceContext* device_context = nullptr;              // D3D device context
  33.  
  34. ID3D11RenderTargetView* rtv = nullptr;                      // Render target view for main rendering (scen + CUDA results + ImGUI)
  35. ID3D11RenderTargetView* rtv_cuda = nullptr;                 // Render target view for CUDA content
  36. ID3D11ShaderResourceView* srv_cuda = nullptr;               // Shader view for CUDA
  37. ID3D11DepthStencilView* dsv = nullptr;                      // Buffer (depth)
  38.  
  39. ID3D11Texture2D* texture_rgb = nullptr;                     // Buffer for rendering the final result, CUDA cannot write here. Only D3D11 CopyResource will be used to fill it up
  40. ID3D11Texture2D* texture_cuda_read = nullptr;               // Buffer for storing the scene that will be processed by CUDA, CUDA will read from here
  41. ID3D11Texture2D* texture_cuda_write = nullptr;              // Buffer for storing the processed scene what will be copied to texture_rgb, CUDA will write here
  42. ID3D11Texture2D* texture_depth;                             // Buffer for the depth information of the scene
  43.  
  44. // The following CUDA resource is used for reading the rendered scene
  45. cudaGraphicsResource* cuda_interop_read = nullptr;         
  46. void* cuda_linear_mem_read = nullptr;
  47. size_t cuda_pitch_read = 0;
  48. // The following CUDA resource is used for writing the processed rendered scene
  49. cudaGraphicsResource* cuda_interop_write = nullptr;
  50. void* cuda_linear_mem_write = nullptr;
  51. size_t cuda_pitch_write = 0;
  52. int cuda_device_count = 0;
  53. bool cuda_enable = true;
  54.  
  55. extern "C"
  56. {
  57.     cudaError_t cuda_texture_2d(void* surface_in, void* surface_out, size_t width, size_t height, size_t pitch_in, size_t pitch_out, float t);
  58. }
  59.  
  60.  
  61. ID3D11SamplerState* sampler_state = nullptr;
  62. ID3D11RasterizerState* raster_state = nullptr;
  63. ID3D11DepthStencilState* stencil_state = nullptr;
  64. char devname[256];
  65.  
  66. // Sample
  67. ID3D11PixelShader* sample_pixel_shader = nullptr;
  68. ID3D11VertexShader* sample_vertex_shader = nullptr;
  69. ID3D11InputLayout* sample_vertex_layout = nullptr;
  70. ID3D11Buffer* sample_vertex_buffer = nullptr;
  71. ID3D11Buffer* sample_index_buffer = nullptr;
  72. ID3D11Buffer* sample_constant_buffer = nullptr;
  73.  
  74. // Matrices
  75. DirectX::XMMATRIX       matrix_world;
  76. DirectX::XMMATRIX       matrix_view;
  77. DirectX::XMMATRIX       matrix_projection;
  78.  
  79. // Function prototypes
  80. // CUDA related
  81. bool getAdapter(IDXGIAdapter** adapter, char* devname); // Retrieve video adapter
  82. void InitCuda();            // Retrieve and initialize CUDA device
  83. cudaError_t RunCudaKernels();
  84. //bool register_d3d_texture_as_cuda_texture_resource(ID3D11Texture2D* d3d_texure, cudaGraphicsResource** cu_resource);
  85. //bool unregister_cuda_texture_resource(cudaGraphicsResource** cu_resource);
  86. // Direct3D related
  87. void InitD3D(HWND hWnd);    // Sets up and initializes Direct3D
  88. void RenderFrame(void);     // Renders a single frame
  89. void Clean(void);       // Closes Direct3D and releases memory
  90.  
  91. static long long frame_counter;
  92.  
  93. // ImGui input handler
  94. extern LRESULT ImGui_ImplWin32_WndProcHandler(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam);
  95.  
  96. // The WindowProc function prototype
  97. LRESULT CALLBACK WindowProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam);
  98.  
  99. // The entry point for any Windows program
  100. int WINAPI WinMain(HINSTANCE hInstance,
  101.     HINSTANCE hPrevInstance,
  102.     LPSTR lpCmdLine,
  103.     int nCmdShow)
  104. {
  105.     HWND hWnd;
  106.     HRESULT hr;
  107.     WNDCLASSEX wc;
  108.     std::string window_title = "CUDA Direct3D 11 Interop Demo";
  109.  
  110.     std::cout << "Creating and initializing window" << std::endl;
  111.     ZeroMemory(&wc, sizeof(WNDCLASSEX));
  112.  
  113.     wc.cbSize = sizeof(WNDCLASSEX);
  114.     wc.style = CS_HREDRAW | CS_VREDRAW;
  115.     wc.lpfnWndProc = WindowProc;
  116.     wc.hInstance = hInstance;
  117.     wc.hCursor = LoadCursor(NULL, IDC_ARROW);
  118.     wc.hbrBackground = (HBRUSH)COLOR_WINDOW;
  119.     wc.lpszClassName = "WindowClass";
  120.  
  121.     RegisterClassEx(&wc);
  122.  
  123.     RECT wr = { 0, 0, WIN_WIDTH, WIN_HEIGHT };
  124.     //AdjustWindowRect(&wr, WS_OVERLAPPEDWINDOW, FALSE);
  125.     auto wrStyle = (WS_OVERLAPPED | WS_MINIMIZEBOX | WS_SYSMENU | WS_CAPTION | WS_MINIMIZEBOX);
  126.  
  127.     hWnd = CreateWindowEx(NULL,
  128.         "WindowClass",
  129.         window_title.c_str(),
  130.         wrStyle, //WS_OVERLAPPEDWINDOW,
  131.         300,
  132.         300,
  133.         wr.right - wr.left,
  134.         wr.bottom - wr.top,
  135.         NULL,
  136.         NULL,
  137.         hInstance,
  138.         NULL);
  139.  
  140.     std::cout << "Rendering window on screen" << std::endl;
  141.     ShowWindow(hWnd, nCmdShow);
  142.  
  143.     // Set up and initialize CUDA
  144.     try
  145.     {
  146.         std::cout << "Searching for CUDA device" << std::endl;
  147.         InitCuda();
  148.         window_title += std::string(" (") + std::string(devname) + std::string(")");
  149.         SetWindowText(hWnd, window_title.c_str());
  150.     }
  151.     catch (std::exception& e)
  152.     {
  153.         std::cout << e.what() << std::endl;
  154.         return 2;
  155.     }
  156.  
  157.     // Set up and initialize Direct3D
  158.     try
  159.     {
  160.         std::cout << "Attemptint to create and initialize DirectX device" << std::endl;
  161.         InitD3D(hWnd);
  162.     }
  163.     catch (std::exception& e)
  164.     {
  165.         std::cerr << "Initializing D3D11 failed\n" << e.what() << std::endl;
  166.         return 1;
  167.     }
  168.  
  169.     cudaError_t cr = cudaError::cudaSuccess;
  170.     // Register the readable extra texture as a CUDA resource
  171.     // This resource will be mapped as ReadOnly since it will be where we are getting the rendered scene from in order to process it with the CUDA kernels
  172.     cr = cudaGraphicsD3D11RegisterResource(&cuda_interop_read, texture_cuda_read, cudaGraphicsRegisterFlags::cudaGraphicsRegisterFlagsNone); // cudaGraphicsRegisterFlagsReadOnly
  173.     if (cr != cudaSuccess)
  174.     {
  175.         std::string msg = std::string("Unable to register CUDA texture as resource: ") + std::string(cudaGetErrorString(cr));
  176.         throw std::exception(msg.c_str());
  177.     }
  178.  
  179.     // Register the writeable extra texture as a CUDA resource
  180.     // This resource will be mapped as WriteDiscard since it will be where we are writing the CUDA kernel processed data to
  181.     cr = cudaGraphicsD3D11RegisterResource(&cuda_interop_write, texture_cuda_write, cudaGraphicsRegisterFlags::cudaGraphicsRegisterFlagsNone); // cudaGraphicsRegisterFlagsWriteDiscard
  182.     if (cr != cudaSuccess)
  183.     {
  184.         std::string msg = std::string("Unable to register backbuffer  as CUDA resource: ") + std::string(cudaGetErrorString(cr));
  185.         throw std::exception(msg.c_str());
  186.     }
  187.  
  188.     // Specify mapping strategy for the resource to be read/write
  189.     // since the Direct3D rendering will be loaded, processed and
  190.     // written back by CUDA
  191.     cr = cudaGraphicsResourceSetMapFlags(cuda_interop_write, cudaGraphicsMapFlagsWriteDiscard); //cudaGraphicsMapFlagsWriteDiscard udaGraphicsMapFlagsNone
  192.     if (cr != cudaSuccess)
  193.     {
  194.         std::string msg = std::string("Unable to set flags for mapped backbuffer CUDA graphics resources: ") + std::string(cudaGetErrorString(cr));
  195.         throw std::exception(msg.c_str());
  196.     }
  197.     cr = cudaGraphicsResourceSetMapFlags(cuda_interop_read, cudaGraphicsMapFlagsReadOnly); //cudaGraphicsMapFlagsReadOnly cudaGraphicsMapFlagsNone
  198.     if (cr != cudaSuccess)
  199.     {
  200.         std::string msg = std::string("Unable to set flags for mapped CUDA graphics resources: ") + std::string(cudaGetErrorString(cr));
  201.         throw std::exception(msg.c_str());
  202.     }
  203.  
  204.     // The registered resource can only be mapped as a texture. In order to write to it, a CUDA array is required
  205.     RECT rect;
  206.     int width = 0;
  207.     int height = 0;
  208.     if (GetWindowRect(hWnd, &rect))
  209.     {
  210.         width = rect.right - rect.left;
  211.         height = rect.bottom - rect.top;
  212.     }
  213.     // Allocate pitched memory for the backbuffer, which we will write to
  214.     cr = cudaMallocPitch(&cuda_linear_mem_write, &cuda_pitch_write, width * sizeof(float) * 4, height);
  215.     if (cr != cudaSuccess)
  216.     {
  217.         std::string msg = std::string("Unable to allocate pitched backbuffer CUDA memory: ") + std::string(cudaGetErrorString(cr));
  218.         throw std::exception(msg.c_str());
  219.     }
  220.     cr = cudaMemset(cuda_linear_mem_write, 1, cuda_pitch_write * height);
  221.     if (cr != cudaSuccess)
  222.     {
  223.         std::string msg = std::string("Unable to set backbuffer CUDA memory: ") + std::string(cudaGetErrorString(cr));
  224.         throw std::exception(msg.c_str());
  225.     }
  226.     // Allocate pitched memory for the CUDA resource, which we will be reading from
  227.     cr = cudaMallocPitch(&cuda_linear_mem_read, &cuda_pitch_read, width * sizeof(float) * 4, height);
  228.     if (cr != cudaSuccess)
  229.     {
  230.         std::string msg = std::string("Unable to allocate pitched CUDA memory: ") + std::string(cudaGetErrorString(cr));
  231.         throw std::exception(msg.c_str());
  232.     }
  233.     cr = cudaMemset(cuda_linear_mem_read, 1, cuda_pitch_read * height);
  234.     if (cr != cudaSuccess)
  235.     {
  236.         std::string msg = std::string("Unable to set CUDA memory: ") + std::string(cudaGetErrorString(cr));
  237.         throw std::exception(msg.c_str());
  238.     }
  239.     // Setup ImGui
  240.     IMGUI_CHECKVERSION();
  241.     ImGui::CreateContext();
  242.     ImGuiIO& io = ImGui::GetIO();
  243.     ImGui_ImplWin32_Init(hWnd);
  244.     ImGui_ImplDX11_Init(device, device_context);
  245.     ImGui::StyleColorsDark();
  246.  
  247.     // Enter the main loop:
  248.     MSG msg;
  249.  
  250.     // Create cube sample
  251.     hr = create_cube(device, device_context,
  252.         &sample_vertex_layout, &sample_vertex_buffer, &sample_vertex_shader,
  253.         &sample_index_buffer, &sample_constant_buffer, &sample_pixel_shader
  254.     );
  255.  
  256.     if (FAILED(hr))
  257.     {
  258.         throw std::exception("FAILED TO CREATE/LOAD SAMPLE");
  259.     }
  260.  
  261.     while (TRUE)
  262.     {
  263.         // Process window-related messages
  264.         if (PeekMessage(&msg, NULL, 0, 0, PM_REMOVE))
  265.         {
  266.             TranslateMessage(&msg);
  267.             DispatchMessage(&msg);
  268.  
  269.             if (msg.message == WM_QUIT)
  270.                 break;
  271.         }
  272.  
  273.         // Render the frame
  274.         try
  275.         {
  276.             RenderFrame();
  277.         }
  278.         catch (std::exception& e)
  279.         {
  280.             std::cout << "Rendering frame failed\n" << e.what() << std::endl;
  281.         }
  282.     }
  283.  
  284.     // Clean up DirectX and COM
  285.     Clean();
  286.  
  287.     return msg.wParam;
  288. }
  289.  
  290. // This is the main message handler for the program
  291. LRESULT CALLBACK WindowProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam)
  292. {
  293.     if (ImGui_ImplWin32_WndProcHandler(hWnd, message, wParam, lParam))
  294.     {
  295.         return true;
  296.     }
  297.  
  298.     switch (message)
  299.     {
  300.     case WM_DESTROY:
  301.     {
  302.         PostQuitMessage(0);
  303.         return EXIT_SUCCESS;
  304.     } break;
  305.     case WM_KEYDOWN:
  306.     {
  307.         unsigned char keycode = static_cast<unsigned char>(wParam);
  308.         //...
  309.     }
  310.     break;
  311.     }
  312.  
  313.     return DefWindowProc(hWnd, message, wParam, lParam);
  314. }
  315.  
  316.  
  317. // This function initializes and prepares Direct3D for use
  318. void InitD3D(HWND hWnd)
  319. {
  320.     RECT rect;
  321.     int width = 0;
  322.     int height = 0;
  323.     if (GetWindowRect(hWnd, &rect))
  324.     {
  325.         width = rect.right - rect.left;
  326.         height = rect.bottom - rect.top;
  327.     }
  328.  
  329.     if (!width || !height)
  330.     {
  331.         std::cerr << "Initializing Direct3D received window of size 0x0" << std::endl;
  332.         exit(1);
  333.     }
  334.  
  335.     // Store the status of various DX-calls
  336.     HRESULT hr = 0;
  337.  
  338.     // Create a struct to hold information about the swap chain
  339.     DXGI_SWAP_CHAIN_DESC swapchainDesc;
  340.  
  341.     // Clear out the struct for use
  342.     ZeroMemory(&swapchainDesc, sizeof(DXGI_SWAP_CHAIN_DESC));
  343.  
  344.     // Fill the swap chain description struct
  345.     swapchainDesc.BufferCount = 1;                                    // one back buffer
  346.     swapchainDesc.BufferDesc.Width = width;
  347.     swapchainDesc.BufferDesc.Height = height;
  348.     swapchainDesc.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;     // use 32-bit color DXGI_FORMAT_R8G8B8A8_UNORM, 4 channels, 1 byte (uchar) per channel | DXGI_FORMAT_R32G32B32_FLOAT
  349.     swapchainDesc.BufferDesc.RefreshRate.Numerator = 60;
  350.     swapchainDesc.BufferDesc.RefreshRate.Denominator = 1;
  351.     swapchainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;      // how swap chain is to be used
  352.     swapchainDesc.OutputWindow = hWnd;                                // the window to be used
  353.     swapchainDesc.SampleDesc.Count = 1;                               // how many multisamples (1-4, above is not guaranteed for DX11 GPUs)
  354.     swapchainDesc.SampleDesc.Quality = 0;                             // multisample quality level
  355.     swapchainDesc.Windowed = TRUE;                                    // windowed/full-screen mode
  356.  
  357.     // Configure DX feature level
  358.     // https://learn.microsoft.com/en-us/windows/win32/direct3d11/overviews-direct3d-11-devices-downlevel-intro
  359.     D3D_FEATURE_LEVEL featureLevels[] =
  360.     {
  361.         D3D_FEATURE_LEVEL_11_1,
  362.         D3D_FEATURE_LEVEL_11_0,
  363.         D3D_FEATURE_LEVEL_10_1,
  364.         D3D_FEATURE_LEVEL_10_0
  365.     };
  366.     UINT numFeatureLevels = ARRAYSIZE(featureLevels);
  367.     D3D_FEATURE_LEVEL featureLevel;
  368.  
  369.     char devname_dx[128];
  370.     bool device_ok = getAdapter(&adapter, devname_dx);
  371.  
  372.     DXGI_ADAPTER_DESC adapterDesc;
  373.     adapter->GetDesc(&adapterDesc); //devid = 9436, subsysid = 177803304
  374.     std::cout << "DirectX GPU " << devname_dx << " : [BusID " << adapterDesc.DeviceId << " | DevID " << adapterDesc.SubSysId << "]" << std::endl;
  375.  
  376.     // Create a device, device context and swap chain using the information in the scd struct
  377.     // SOURCE https://learn.microsoft.com/en-us/windows/win32/direct3d11/overviews-direct3d-11-devices-initialize
  378.     hr = D3D11CreateDeviceAndSwapChain(
  379.         adapter,                    // graphics adapter, which was already checked for CUDA support
  380.         D3D_DRIVER_TYPE_UNKNOWN,    // If adapter is not NULL, we need to use UNKNOWN, otherwise use D3D_DRIVER_TYPE_HARDWARE
  381.         NULL,                       // software
  382.         NULL,                       // flags  D3D11_CREATE_DEVICE_DEBUG (requires Direct3D SDK Debug Layer -> currently unable to install for unknown reason)
  383.         featureLevels,              // D3D features to enable
  384.         numFeatureLevels,           // D3D feature count
  385.         D3D11_SDK_VERSION,          // D3D SDK version (here 11)
  386.         &swapchainDesc,             // swap chain description
  387.         &swapchain,                 // swap chain
  388.         &device,                    // D3D device
  389.         &featureLevel,              // supported feature level (can also use DXGI_ADAPTER_DESC variable to retrieve the information)
  390.                                     // Read note for pFeatureLevels at https://learn.microsoft.com/en-us/windows/win32/api/d3d11/nf-d3d11-d3d11createdeviceandswapchain#parameters for DX11.1
  391.         &device_context);           // D3D device context
  392.     if (FAILED(hr))
  393.     {
  394.         std::string msg = std::string("[InitD3D] Failed to create DX device and swap chain: ") + std::to_string(hr);
  395.         throw std::exception(msg.c_str());
  396.     }
  397.  
  398.     // Release adapter
  399.     adapter->Release();
  400.     device->GetImmediateContext(&device_context);
  401.  
  402.     // Setup render view
  403.     hr = swapchain->GetBuffer(0, __uuidof(ID3D11Texture2D), (LPVOID*)&texture_rgb);
  404.     if (FAILED(hr))
  405.     {
  406.         std::string msg = std::string("[InitD3D] Failed to get address of back buffer: ") + std::to_string(hr);
  407.         throw std::exception(msg.c_str());
  408.     }
  409.  
  410.     hr = device->CreateRenderTargetView(texture_rgb, nullptr, &rtv);
  411.     if (FAILED(hr))
  412.     {
  413.         std::string msg = std::string("[InitD3D] Failed to create render target view (main): ") + std::to_string(hr);
  414.         throw std::exception(msg.c_str());
  415.     }
  416.     // Leave the pointer, release the texture
  417.     //backbuffer->Release();
  418.  
  419.     //Setup texture for rendering the intial scene, which will be loaded in CUDA
  420.     D3D11_TEXTURE2D_DESC texture_cuda_read_desc;
  421.     //texture_rgb->GetDesc(&texture_rgb_cudaDesc);
  422.     ZeroMemory(&texture_cuda_read_desc, sizeof(D3D11_TEXTURE2D_DESC));
  423.     texture_cuda_read_desc.Width = width;
  424.     texture_cuda_read_desc.Height = height;
  425.     texture_cuda_read_desc.MipLevels = 1;
  426.     texture_cuda_read_desc.ArraySize = 1;
  427.     texture_cuda_read_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; //DXGI_FORMAT_R32G32B32A32_FLOAT
  428.     texture_cuda_read_desc.SampleDesc.Count = 1;
  429.     texture_cuda_read_desc.Usage = D3D11_USAGE::D3D11_USAGE_DEFAULT;//D3D11_USAGE_DEFAULT;
  430.     texture_cuda_read_desc.BindFlags = D3D11_BIND_FLAG::D3D11_BIND_RENDER_TARGET | D3D11_BIND_FLAG::D3D11_BIND_SHADER_RESOURCE;
  431.     //texture_rgb_cudaDesc.MiscFlags = D3D11_RESOURCE_MISC_FLAG::D3D11_RESOURCE_MISC_SHARED_NTHANDLE;
  432.     hr = device->CreateTexture2D(&texture_cuda_read_desc, nullptr, &texture_cuda_read);
  433.     if (FAILED(hr))
  434.     {
  435.         std::string msg = std::string("[InitD3D] Failed to create texture (for readining in CUDA): ") + std::to_string(hr);
  436.         throw std::exception(msg.c_str());
  437.     }
  438.     // Create the render target view for CUDA
  439.     D3D11_RENDER_TARGET_VIEW_DESC rtv_cudaDesc;
  440.     rtv_cudaDesc.Format = texture_cuda_read_desc.Format;
  441.     rtv_cudaDesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D;
  442.     rtv_cudaDesc.Texture2D.MipSlice = 0;
  443.     hr = device->CreateRenderTargetView(texture_cuda_read, &rtv_cudaDesc, &rtv_cuda);
  444.     if (FAILED(hr))
  445.     {
  446.         std::string msg = std::string("[InitD3D] Failed to create render target view (CUDA): ") + std::to_string(hr);
  447.         throw std::exception(msg.c_str());
  448.     }
  449.  
  450.     // Setup the description of the shader resource view for CUDA
  451.     D3D11_SHADER_RESOURCE_VIEW_DESC srv_cudaDesc;
  452.     srv_cudaDesc.Format = texture_cuda_read_desc.Format;
  453.     srv_cudaDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
  454.     srv_cudaDesc.Texture2D.MostDetailedMip = 0;
  455.     srv_cudaDesc.Texture2D.MipLevels = 1;
  456.  
  457.     // Create the shader resource view.
  458.     device->CreateShaderResourceView(texture_cuda_read, &srv_cudaDesc, &srv_cuda);
  459.  
  460.     //Setup texture where the results from CUDA will be written to
  461.     // TODO Remove this and use texture_cuda_read_desc in CreateTexture2D() in case everything is the same (bind flags?)
  462.     D3D11_TEXTURE2D_DESC texture_cuda_write_desc;
  463.     ZeroMemory(&texture_cuda_write_desc, sizeof(D3D11_TEXTURE2D_DESC));
  464.     texture_cuda_write_desc.Width = texture_cuda_read_desc.Width;
  465.     texture_cuda_write_desc.Height = texture_cuda_read_desc.Height;
  466.     texture_cuda_write_desc.MipLevels = texture_cuda_read_desc.MipLevels;
  467.     texture_cuda_write_desc.ArraySize = texture_cuda_read_desc.ArraySize;
  468.     texture_cuda_write_desc.Format = texture_cuda_read_desc.Format;
  469.     texture_cuda_write_desc.SampleDesc.Count = texture_cuda_read_desc.SampleDesc.Count;
  470.     texture_cuda_write_desc.Usage = texture_cuda_read_desc.Usage;
  471.     texture_cuda_write_desc.BindFlags = D3D11_BIND_FLAG::D3D11_BIND_RENDER_TARGET | D3D11_BIND_FLAG::D3D11_BIND_SHADER_RESOURCE;
  472.     //texture_rgb_cudaDesc.MiscFlags = D3D11_RESOURCE_MISC_FLAG::D3D11_RESOURCE_MISC_SHARED_NTHANDLE;
  473.     hr = device->CreateTexture2D(&texture_cuda_write_desc, nullptr, &texture_cuda_write);
  474.     if (FAILED(hr))
  475.     {
  476.         std::string msg = std::string("[InitD3D] Failed to create texture (for writing in CUDA): ") + std::to_string(hr);
  477.         throw std::exception(msg.c_str());
  478.     }
  479.  
  480.     // Setup depth stencil view
  481.     D3D11_TEXTURE2D_DESC depthStencilDesc;
  482.     ZeroMemory(&depthStencilDesc, sizeof(D3D11_TEXTURE2D_DESC));
  483.     depthStencilDesc.Width = width;
  484.     depthStencilDesc.Height = height;
  485.     depthStencilDesc.MipLevels = 1;
  486.     depthStencilDesc.ArraySize = 1;
  487.     depthStencilDesc.Format = DXGI_FORMAT_D24_UNORM_S8_UINT;
  488.     depthStencilDesc.SampleDesc.Count = swapchainDesc.SampleDesc.Count;
  489.     depthStencilDesc.SampleDesc.Quality = swapchainDesc.SampleDesc.Quality;
  490.     depthStencilDesc.Usage = D3D11_USAGE::D3D11_USAGE_DEFAULT;
  491.     depthStencilDesc.BindFlags = D3D11_BIND_FLAG::D3D11_BIND_DEPTH_STENCIL;
  492.     depthStencilDesc.CPUAccessFlags = 0;
  493.     depthStencilDesc.MiscFlags = 0;
  494.     hr = device->CreateTexture2D(&depthStencilDesc, nullptr, &texture_depth);
  495.     if (FAILED(hr))
  496.     {
  497.         std::string msg = std::string("[InitD3D] Failed to create depth buffer: ") + std::to_string(hr);
  498.         throw std::exception(msg.c_str());
  499.     }
  500.     hr = device->CreateDepthStencilView(texture_depth, nullptr, &dsv);
  501.  
  502.     // Set the viewport
  503.     D3D11_VIEWPORT viewport;
  504.     ZeroMemory(&viewport, sizeof(D3D11_VIEWPORT));
  505.  
  506.     viewport.TopLeftX = 0;
  507.     viewport.TopLeftY = 0;
  508.     viewport.Width = WIN_WIDTH;
  509.     viewport.Height = WIN_HEIGHT;
  510.     viewport.MinDepth = 0.0f;   // Closest to camera
  511.     viewport.MaxDepth = 1.0f;   // Furthest away from camera
  512.  
  513.     device_context->RSSetViewports(1, &viewport);
  514.  
  515.     // Initialize the world matrix
  516.     matrix_world = DirectX::XMMatrixIdentity();
  517.  
  518.     // Initialize the view matrix
  519.     DirectX::XMVECTOR Eye = DirectX::XMVectorSet(0.0f, 4.0f, -5.0f, 0.0f);  //0 1 -5 0
  520.     DirectX::XMVECTOR At = DirectX::XMVectorSet(0.0f, 1.0f, 0.0f, 0.0f);
  521.     DirectX::XMVECTOR Up = DirectX::XMVectorSet(0.0f, 1.0f, 0.0f, 0.0f);
  522.     matrix_view = DirectX::XMMatrixLookAtLH(Eye, At, Up);
  523.  
  524.     // Initialize the projection matrix
  525.     matrix_projection = DirectX::XMMatrixPerspectiveFovLH(DirectX::XM_PIDIV2, viewport.Width / (FLOAT)viewport.Height, 0.01f, 100.0f);
  526.  
  527.     // Sampler state
  528.     {
  529.         D3D11_SAMPLER_DESC samplerStateDesc;
  530.         samplerStateDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR;
  531.         samplerStateDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP;
  532.         samplerStateDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP;
  533.         samplerStateDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP;
  534.         samplerStateDesc.MinLOD = 0;
  535.         samplerStateDesc.MaxLOD = 8;
  536.         samplerStateDesc.MipLODBias = 0;
  537.         samplerStateDesc.MaxAnisotropy = 1;
  538.  
  539.         hr = device->CreateSamplerState(&samplerStateDesc, &sampler_state);
  540.         if (FAILED(hr))
  541.         {
  542.             std::string msg = std::string("[InitD3D] Failed to create sampler state: ") + std::to_string(hr);
  543.             throw std::exception(msg.c_str());
  544.             //throw std::exception("[InitD3D] Failed to create sampler state");
  545.         }
  546.         device_context->PSSetSamplers(0, 1, &sampler_state);
  547.     }
  548.  
  549.     // Rasterizer state
  550.     {
  551.         D3D11_RASTERIZER_DESC rasterizerStateDesc;
  552.         rasterizerStateDesc.FillMode = D3D11_FILL_SOLID;    //D3D11_FILL_SOLID      D3D11_FILL_WIREFRAME
  553.         rasterizerStateDesc.CullMode = D3D11_CULL_BACK;
  554.         rasterizerStateDesc.FrontCounterClockwise = false;
  555.         rasterizerStateDesc.DepthBias = false;
  556.         rasterizerStateDesc.DepthBiasClamp = 0;
  557.         rasterizerStateDesc.SlopeScaledDepthBias = 0;
  558.         rasterizerStateDesc.DepthClipEnable = false;
  559.         rasterizerStateDesc.ScissorEnable = false;
  560.         rasterizerStateDesc.MultisampleEnable = false;
  561.         rasterizerStateDesc.AntialiasedLineEnable = false;
  562.  
  563.         hr = device->CreateRasterizerState(&rasterizerStateDesc, &raster_state);
  564.         if (FAILED(hr))
  565.         {
  566.             std::string msg = std::string("[InitD3D] Failed to create raster state: ") + std::to_string(hr);
  567.             throw std::exception(msg.c_str());
  568.         }
  569.         device_context->RSSetState(raster_state);
  570.     }
  571.  
  572.     // Depth stencil state
  573.     {
  574.         D3D11_DEPTH_STENCIL_DESC stencilDesc;
  575.         ZeroMemory(&stencilDesc, sizeof(stencilDesc));
  576.  
  577.         stencilDesc.DepthEnable = true;
  578.         stencilDesc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL;
  579.         stencilDesc.DepthFunc = D3D11_COMPARISON_LESS_EQUAL;
  580.  
  581.         hr = device->CreateDepthStencilState(&stencilDesc, &stencil_state);
  582.         if (FAILED(hr))
  583.         {
  584.             std::string msg = std::string("[InitD3D] Failed to depth stencil state: ") + std::to_string(hr);
  585.             throw std::exception(msg.c_str());
  586.         }
  587.         device_context->OMSetDepthStencilState(stencil_state, 0);
  588.     }
  589. }
  590.  
  591. // This is the function used to render a single frame
  592. void RenderFrame(void)
  593. {
  594.     // Store the status of various DX-calls
  595.     HRESULT hr = 0;
  596.  
  597.     // The background color
  598.     float color[4] = { 0.0f, 0.2f, 0.4f, 1.0f };
  599.  
  600.     // Update our time
  601.     static float t = 0.0f;
  602.     static ULONGLONG timeStart = 0;
  603.     ULONGLONG timeCur = GetTickCount64();
  604.     if (timeStart == 0)
  605.         timeStart = timeCur;
  606.     t = (timeCur - timeStart) / 1000.0f;
  607.  
  608.     // Animate the cube
  609.     static float translationOffset[3] = { 0.0f, 0.0f, 0.0f };
  610.     matrix_world = DirectX::XMMatrixRotationY(t) * DirectX::XMMatrixTranslation(translationOffset[0], translationOffset[1], translationOffset[2]);
  611.  
  612.     // Update variables
  613.     ConstantBuffer cb;
  614.     cb.mWorld = DirectX::XMMatrixTranspose(matrix_world);
  615.     cb.mView = DirectX::XMMatrixTranspose(matrix_view);
  616.     cb.mProjection = DirectX::XMMatrixTranspose(matrix_projection);
  617.     device_context->UpdateSubresource(sample_constant_buffer, 0, nullptr, &cb, 0, 0);
  618.  
  619.     // Clear the main view, which will be then populated render the results from CUDA and the ImGUI
  620.     // TODO Possible can be removed if CopyResource (copy CUDA result to main RTV) indeed clears the buffer
  621.     //device_context->OMSetRenderTargets(1, &rtv, dsv);
  622.     //device_context->ClearRenderTargetView(rtv, color);
  623.     //device_context->ClearDepthStencilView(dsv, D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL, 1.0f, 0);
  624.  
  625.     if (cuda_enable)
  626.     {
  627.         // Change to the CUDA render target view
  628.         device_context->OMSetRenderTargets(1, &rtv_cuda, dsv);
  629.  
  630.         // Clear the buffer
  631.         device_context->ClearRenderTargetView(rtv_cuda, color);
  632.         device_context->ClearDepthStencilView(dsv, D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL, 1.0f, 0);
  633.  
  634.         // Render the scene (no ImGUI) to the current RTV
  635.         device_context->VSSetShader(sample_vertex_shader, nullptr, 0);
  636.         device_context->VSSetConstantBuffers(0, 1, &sample_constant_buffer);
  637.         device_context->PSSetShader(sample_pixel_shader, nullptr, 0);
  638.         device_context->DrawIndexed(36, 0, 0);        // 36 vertices needed for 12 triangles in a triangle list
  639.  
  640.         // TODO Change RTV
  641.         //device_context->OMSetRenderTargets(1, &rtv, dsv);
  642.  
  643.         /*
  644.         // For debugging and perhaps later for actual use
  645.         // By using staging texture and mapping it, one can then obtain direct
  646.         // access to the pData (pixel data, e.g. RGBA)
  647.         device_context->CopySubresourceRegion(texture_rgb_cuda, 0, 0, 0, 0, texture_rgb, 0, nullptr);
  648.         D3D11_MAPPED_SUBRESOURCE* texture_rgb_cuda_data = nullptr;
  649.         hr = device_context->Map(texture_rgb_cuda, 0, D3D11_MAP::D3D11_MAP_READ_WRITE, 0, texture_rgb_cuda_data);
  650.         if (FAILED(hr))
  651.         {
  652.             std::string msg = std::string("[RenderFrame] Failed to map texture sub-resource: ") + std::to_string(hr);
  653.             throw std::exception(msg.c_str());
  654.         }
  655.         device_context->Unmap(texture_rgb_cuda, 0);
  656.         */
  657.  
  658.         // Map the interop CUDA resources (MapFlags for each resource have already been set before this)
  659.         cudaError_t cr;
  660.         cudaStream_t stream = nullptr;
  661.         const int cuda_mapped_resources_count = 2;
  662.         cudaGraphicsResource* cuda_mapped_resources[cuda_mapped_resources_count] = {
  663.             cuda_interop_read,
  664.             cuda_interop_write
  665.         };
  666.  
  667.         cr = cudaGraphicsMapResources(cuda_mapped_resources_count, cuda_mapped_resources, stream);
  668.         if (cr != cudaSuccess)
  669.         {
  670.             std::string msg = std::string("Unable to map CUDA graphics resources: ") + std::string(cudaGetErrorString(cr));
  671.             throw std::exception(msg.c_str());
  672.         }
  673.  
  674.         // Run CUDA kernels
  675.         cr = RunCudaKernels();
  676.         if (cr != cudaSuccess)
  677.         {
  678.             std::string msg = std::string("Unable to run CUDA kernels: ") + std::string(cudaGetErrorString(cr));
  679.         }
  680.  
  681.         // Unmap resource
  682.         cr = cudaGraphicsUnmapResources(cuda_mapped_resources_count, cuda_mapped_resources, stream);
  683.         if (cr != cudaSuccess)
  684.         {
  685.             std::string msg = std::string("Unable to unmap CUDA graphics resources: ") + std::string(cudaGetErrorString(cr));
  686.             throw std::exception(msg.c_str());
  687.         }
  688.  
  689.         //device_context->CopyResource(texture_rgb, texture_rgb_cuda);
  690.         //device_context->UpdateSubresource(texture_rgb, 0, nullptr, &texture_rgb_cuda, 0, 0);
  691.  
  692.         cr = cudaDeviceSynchronize();
  693.         if (cr != cudaSuccess)
  694.         {
  695.             std::string msg = std::string("Unable to synchronize GPU and CPU: ") + std::string(cudaGetErrorString(cr));
  696.             throw std::exception(msg.c_str());
  697.         }
  698.  
  699.         device_context->CopyResource(texture_rgb, texture_cuda_write);
  700.     }
  701.     else
  702.     {
  703.         // We render the scene normally if CUDA processing has been disabled via the ImGUI
  704.         device_context->OMSetRenderTargets(1, &rtv, dsv);
  705.         device_context->ClearRenderTargetView(rtv, color);
  706.         device_context->ClearDepthStencilView(dsv, D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL, 1.0f, 0);
  707.  
  708.         // Render a triangle strip
  709.         device_context->VSSetShader(sample_vertex_shader, nullptr, 0);
  710.         device_context->VSSetConstantBuffers(0, 1, &sample_constant_buffer);
  711.         device_context->PSSetShader(sample_pixel_shader, nullptr, 0);
  712.         device_context->DrawIndexed(36, 0, 0);        // 36 vertices needed for 12 triangles in a triangle list
  713.     }
  714.  
  715.     // Switch back to the main RTV to add the ImGUI components
  716.     device_context->OMSetRenderTargets(1, &rtv, dsv);
  717.  
  718.     // Render ImGUI UI elements
  719.     // Start frame
  720.     ImGui_ImplDX11_NewFrame();
  721.     ImGui_ImplWin32_NewFrame();
  722.  
  723.     // Create frame
  724.     ImGui::NewFrame();
  725.     ImGui::Begin("Controls / Debug Information");
  726.     ImGui::Text((cuda_device_count ? std::string("Found " + std::to_string(cuda_device_count) + " CUDA " + (cuda_device_count == 1 ? "device" : "devices")).c_str() : "No CUDA devices found!"));
  727.     ImGui::Text(devname);
  728.     ImGui::DragFloat3("Trans X/Y/Z", translationOffset, 0.1f, -5.0f, 5.0f);
  729.     std::string cb_label_cuda_enabled = std::string("CUDA ");
  730.     cb_label_cuda_enabled += (cuda_enable ? "enabled" : "disabled");
  731.     ImGui::Checkbox(cb_label_cuda_enabled.c_str(), &cuda_enable);
  732.     ImGui::End();
  733.     // Assemble together the ImGui draw data
  734.     ImGui::Render();
  735.     // Render ImGui
  736.     ImGui_ImplDX11_RenderDrawData(ImGui::GetDrawData());
  737.  
  738.     // Present the main render target
  739.     // Because the maximum frame latency is set to 1,
  740.     // the render loop will generally be throttled to
  741.     // the screen refresh rate, typically around
  742.     // 60 Hz, by sleeping the application on Present
  743.     // until the screen is refreshed.
  744.     hr = swapchain->Present(1, 0);
  745.     if (FAILED(hr))
  746.     {
  747.         std::string msg = std::string("[RenderFrame] Failed to swap back with front buffer: ") + std::to_string(hr);
  748.         throw std::exception(msg.c_str());
  749.     }
  750. }
  751.  
  752.  
  753. // Clean up CUDA, Direct3D and COM objects
  754. void Clean(void)
  755. {
  756.     cudaError_t cr = cudaSuccess;
  757.  
  758.     // Unregister CUDA resources
  759.     if (cuda_interop_write)
  760.     {
  761.         cr = cudaGraphicsUnregisterResource(cuda_interop_write);
  762.         printLastCudaError("cudaGraphicsUnregisterResource failed");
  763.  
  764.         if (cr != cudaSuccess)
  765.         {
  766.             std::string msg = std::string("[Clean] Failed to unregister CUDA resource.\n") + std::string(cudaGetErrorString(cr));
  767.             throw std::exception(msg.c_str());
  768.         }
  769.     }
  770.  
  771.     // Close and release all existing COM objects
  772.     if (sample_constant_buffer)
  773.         sample_constant_buffer->Release();
  774.     if (sample_vertex_buffer)
  775.         sample_vertex_buffer->Release();
  776.     if (sample_index_buffer)
  777.         sample_index_buffer->Release();
  778.     if (sample_vertex_layout)
  779.         sample_vertex_layout->Release();
  780.     if (sample_vertex_shader)
  781.         sample_vertex_shader->Release();
  782.     if (sample_pixel_shader)
  783.         sample_pixel_shader->Release();
  784.  
  785.     if (rtv)
  786.         rtv->Release();
  787.     if (rtv_cuda)
  788.         rtv_cuda->Release();
  789.     if (srv_cuda)
  790.         srv_cuda->Release();
  791.  
  792.     if (swapchain)
  793.         swapchain->Release();
  794.     if (device_context)
  795.         device_context->Release();
  796.     if (device)
  797.         device->Release();
  798.  
  799.     texture_rgb->Release();
  800.     texture_cuda_read->Release();
  801.     texture_depth->Release();
  802. }
  803.  
  804.  
  805. bool getAdapter(IDXGIAdapter** adapter, char* devname)
  806. {
  807.     HRESULT hr = S_OK;
  808.     cudaError_t cr = cudaSuccess;
  809.     UINT adapter_idx = 0;
  810.     IDXGIFactory* pFactory;
  811.  
  812.     hr = CreateDXGIFactory(__uuidof(IDXGIFactory), (void**)(&pFactory));
  813.  
  814.     if (FAILED(hr))
  815.     {
  816.         std::string msg = std::string("[getAdapter] Unable to create DXGI factory: ") + std::to_string(hr);
  817.         throw std::exception(msg.c_str());
  818.         return false;
  819.     }
  820.  
  821.     for (; !(*adapter); ++adapter_idx) {
  822.         // Get a candidate DXGI adapter
  823.         IDXGIAdapter* pAdapter = NULL;
  824.         hr = pFactory->EnumAdapters(adapter_idx, &pAdapter);
  825.  
  826.         if (FAILED(hr))
  827.         {
  828.             break;  // no compatible adapters found
  829.         }
  830.  
  831.         // Query to see if there exists a corresponding compute device
  832.         int cuDevice;
  833.         cr = cudaD3D11GetDevice(&cuDevice, pAdapter);
  834.  
  835.         if (cr == cudaSuccess) {
  836.             // If so, mark it as the one against which to create our d3d10 device
  837.             (*adapter) = pAdapter;
  838.             (*adapter)->AddRef();
  839.             ++adapter_idx;
  840.             break;
  841.         }
  842.  
  843.         pAdapter->Release();
  844.     }
  845.  
  846.     std::cout << "Found " << (int)adapter_idx << " D3D11 adapater(s)" << std::endl;
  847.  
  848.     pFactory->Release();
  849.  
  850.     if (!adapter) {
  851.         std::cerr << "Unable to find a D3D11 adapater with compute capability" << std::endl;
  852.         return false;
  853.     }
  854.  
  855.     DXGI_ADAPTER_DESC adapterDesc;
  856.     (*adapter)->GetDesc(&adapterDesc);
  857.     //wcstombs(devname, adapterDesc.Description, 128);
  858.  
  859.     std::cout << "Found a D3D11 adapater with compute capability" << std::endl;
  860.  
  861.     return true;
  862. }
  863.  
  864.  
  865. // This function initializes and prepares CUDA for use
  866. void InitCuda()
  867. {
  868.     int nGraphicsGPU = 0;
  869.     bool bFoundGraphics = false;
  870.  
  871.     // This function call returns 0 if there are no CUDA capable devices.
  872.     cudaError_t cr = cudaGetDeviceCount(&cuda_device_count);
  873.  
  874.     if (cr != cudaSuccess)
  875.     {
  876.         std::string msg = std::string("[InitCuda] Failed retrieve number of CUDA devices: ") + std::string(cudaGetErrorString(cr));
  877.         throw std::exception(msg.c_str());
  878.     }
  879.  
  880.     if (cuda_device_count == 0) {
  881.         cuda_enable = false;
  882.     }
  883.     else{
  884.         std::cout << "Found " << cuda_device_count << " CUDA capable device(s)" << std::endl;
  885.     }
  886.  
  887.     // Get CUDA device properties
  888.     cudaDeviceProp deviceProp;
  889.  
  890.     for (int dev = 0; dev < cuda_device_count; ++dev) {
  891.         cr = cudaGetDeviceProperties(&deviceProp, dev);
  892.         strcpy_s(devname, deviceProp.name);
  893.         std::cout << "CUDA GPU " << dev << " \"" << devname << "\" : [BusID " << deviceProp.pciBusID << " | DevID " << deviceProp.pciDeviceID << "]" << std::endl;
  894.  
  895.         if (cr != cudaSuccess)
  896.         {
  897.             std::string msg = std::string("[InitCuda] Failed to retrieve properties of CUDA device: ") + std::string(cudaGetErrorString(cr));
  898.             throw std::exception(msg.c_str());
  899.         }
  900.  
  901.         break;
  902.     }
  903. }
  904.  
  905. cudaError_t RunCudaKernels()
  906. {
  907.     static float t = 0.0f;
  908.     cudaError_t cr = cudaSuccess;
  909.  
  910.     /*
  911.     cudaSurfaceObject_t cuda_so_read, cuda_so_write;
  912.     cudaResourceDesc cuda_so_read_desc, cuda_so_write_desc;
  913.     cuda_so_read_desc.resType = cudaResourceTypeArray;
  914.     cuda_so_read_desc.res.array = ;
  915.     */
  916.     //cr = cudaCreateSurfaceObject(&cuda_so_read, cuda_so_read_desc);
  917.  
  918.     // populate the 2d texture
  919.     cudaArray* cu_arr_read, * cu_arr_write;
  920.     cr = cudaGraphicsSubResourceGetMappedArray(&cu_arr_read, cuda_interop_read, 0, 0);  // cuda_interop_read
  921.     if (cr != cudaSuccess)
  922.     {
  923.         return cr;
  924.     }
  925.     cr = cudaGraphicsSubResourceGetMappedArray(&cu_arr_write, cuda_interop_write, 0, 0);            // cuda_interop_write
  926.     if (cr != cudaSuccess)
  927.     {
  928.         return cr;
  929.     }
  930.  
  931.     // kick off the kernel and send the staging buffer cudaLinearMemory as an argument to allow the kernel to write to it
  932.     cr = cuda_texture_2d(
  933.         cuda_linear_mem_read, cuda_linear_mem_write,
  934.         WIN_WIDTH, WIN_HEIGHT,
  935.         cuda_pitch_read, cuda_pitch_write,
  936.         t
  937.     );
  938.     if (cr != cudaSuccess)
  939.     {
  940.         return cr;
  941.     }
  942.  
  943.     // then we want to copy cudaLinearMemory to the D3D texture, via its mapped form : cudaArray
  944.     cr = cudaMemcpy2DToArray(
  945.         cu_arr_write,                               // dst array
  946.         0, 0,                                       // offset
  947.         cuda_linear_mem_write, cuda_pitch_write,    // src cuda_linear_mem_read
  948.         WIN_WIDTH * 4 * sizeof(UINT8), WIN_HEIGHT,  // extent       // sizeof(...) is based on DXGI_FORMAT of texture, e.g. DXGI_FORMAT_R8G8B8A8_UNORM is uint8, while DXGI_FORMAT_R32G32B32A32_FLOAT
  949.         cudaMemcpyDeviceToDevice);                  // copying will occur only on the GPU
  950.     if (cr != cudaSuccess)
  951.     {
  952.         return cr;
  953.     }
  954.  
  955.     t += 0.1f;
  956.  
  957.     return cr;
  958. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement