Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // Include the basic windows header files and the Direct3D header files
- #include <windows.h>
- #include <windowsx.h>
- #include <d3d11.h> // Direct3D functionality
- #include <DirectXMath.h> // Use this since D3DX has been deprecated
- #include <iostream>
- #include <stdexcept> // For exceptions
- #include <dxgi.h>
- #include <cuda_runtime_api.h> // Runtime CUDA API offers better code management but less control compared the the CUDA driver API
- #include <cuda_d3d11_interop.h> // Provides facilities for registering and mapping graphics resources (among others) from/to Direct3D to/from CUDA context
- #include <helper_cuda.h>
- #include "load_sample.h"
- // Source (Depth stencil): https://www.youtube.com/watch?v=NLic7ipz4xE&list=PLcacUGyBsOIBlGyQQWzp6D1Xn6ZENx9Y2&index=17
- // Source (ImGui Setup): https://www.youtube.com/watch?v=Btx_tujnyB4&list=PLcacUGyBsOIBlGyQQWzp6D1Xn6ZENx9Y2&index=35
- // Source (ImGui Usage): https://www.youtube.com/watch?v=o5sJClp0HDY&list=PLcacUGyBsOIBlGyQQWzp6D1Xn6ZENx9Y2&index=36
- #include <imgui.h> // ImGUI general
- #include <imgui_impl_win32.h> // ImGUI Windows integration
- #include <imgui_impl_dx11.h> // ImGUI Direct3D integration
- #define WIN_WIDTH 600
- #define WIN_HEIGHT 600
- // Include the Direct3D Library file
- // NOTE: No required, since CMake links the library
- //#pragma comment (lib, "d3d11.lib")
- // Global declarations
- IDXGIAdapter* adapter = nullptr; // Video adapter
- IDXGISwapChain* swapchain = nullptr; // Swap chain interface
- ID3D11Device* device = nullptr; // D3D device interface
- ID3D11DeviceContext* device_context = nullptr; // D3D device context
- ID3D11RenderTargetView* rtv = nullptr; // Render target view for main rendering (scen + CUDA results + ImGUI)
- ID3D11RenderTargetView* rtv_cuda = nullptr; // Render target view for CUDA content
- ID3D11ShaderResourceView* srv_cuda = nullptr; // Shader view for CUDA
- ID3D11DepthStencilView* dsv = nullptr; // Buffer (depth)
- ID3D11Texture2D* texture_rgb = nullptr; // Buffer for rendering the final result, CUDA cannot write here. Only D3D11 CopyResource will be used to fill it up
- ID3D11Texture2D* texture_cuda_read = nullptr; // Buffer for storing the scene that will be processed by CUDA, CUDA will read from here
- ID3D11Texture2D* texture_cuda_write = nullptr; // Buffer for storing the processed scene what will be copied to texture_rgb, CUDA will write here
- ID3D11Texture2D* texture_depth; // Buffer for the depth information of the scene
- // The following CUDA resource is used for reading the rendered scene
- cudaGraphicsResource* cuda_interop_read = nullptr;
- void* cuda_linear_mem_read = nullptr;
- size_t cuda_pitch_read = 0;
- // The following CUDA resource is used for writing the processed rendered scene
- cudaGraphicsResource* cuda_interop_write = nullptr;
- void* cuda_linear_mem_write = nullptr;
- size_t cuda_pitch_write = 0;
- int cuda_device_count = 0;
- bool cuda_enable = true;
- extern "C"
- {
- cudaError_t cuda_texture_2d(void* surface_in, void* surface_out, size_t width, size_t height, size_t pitch_in, size_t pitch_out, float t);
- }
- ID3D11SamplerState* sampler_state = nullptr;
- ID3D11RasterizerState* raster_state = nullptr;
- ID3D11DepthStencilState* stencil_state = nullptr;
- char devname[256];
- // Sample
- ID3D11PixelShader* sample_pixel_shader = nullptr;
- ID3D11VertexShader* sample_vertex_shader = nullptr;
- ID3D11InputLayout* sample_vertex_layout = nullptr;
- ID3D11Buffer* sample_vertex_buffer = nullptr;
- ID3D11Buffer* sample_index_buffer = nullptr;
- ID3D11Buffer* sample_constant_buffer = nullptr;
- // Matrices
- DirectX::XMMATRIX matrix_world;
- DirectX::XMMATRIX matrix_view;
- DirectX::XMMATRIX matrix_projection;
- // Function prototypes
- // CUDA related
- bool getAdapter(IDXGIAdapter** adapter, char* devname); // Retrieve video adapter
- void InitCuda(); // Retrieve and initialize CUDA device
- cudaError_t RunCudaKernels();
- //bool register_d3d_texture_as_cuda_texture_resource(ID3D11Texture2D* d3d_texure, cudaGraphicsResource** cu_resource);
- //bool unregister_cuda_texture_resource(cudaGraphicsResource** cu_resource);
- // Direct3D related
- void InitD3D(HWND hWnd); // Sets up and initializes Direct3D
- void RenderFrame(void); // Renders a single frame
- void Clean(void); // Closes Direct3D and releases memory
- static long long frame_counter;
- // ImGui input handler
- extern LRESULT ImGui_ImplWin32_WndProcHandler(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam);
- // The WindowProc function prototype
- LRESULT CALLBACK WindowProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam);
- // The entry point for any Windows program
- int WINAPI WinMain(HINSTANCE hInstance,
- HINSTANCE hPrevInstance,
- LPSTR lpCmdLine,
- int nCmdShow)
- {
- HWND hWnd;
- HRESULT hr;
- WNDCLASSEX wc;
- std::string window_title = "CUDA Direct3D 11 Interop Demo";
- std::cout << "Creating and initializing window" << std::endl;
- ZeroMemory(&wc, sizeof(WNDCLASSEX));
- wc.cbSize = sizeof(WNDCLASSEX);
- wc.style = CS_HREDRAW | CS_VREDRAW;
- wc.lpfnWndProc = WindowProc;
- wc.hInstance = hInstance;
- wc.hCursor = LoadCursor(NULL, IDC_ARROW);
- wc.hbrBackground = (HBRUSH)COLOR_WINDOW;
- wc.lpszClassName = "WindowClass";
- RegisterClassEx(&wc);
- RECT wr = { 0, 0, WIN_WIDTH, WIN_HEIGHT };
- //AdjustWindowRect(&wr, WS_OVERLAPPEDWINDOW, FALSE);
- auto wrStyle = (WS_OVERLAPPED | WS_MINIMIZEBOX | WS_SYSMENU | WS_CAPTION | WS_MINIMIZEBOX);
- hWnd = CreateWindowEx(NULL,
- "WindowClass",
- window_title.c_str(),
- wrStyle, //WS_OVERLAPPEDWINDOW,
- 300,
- 300,
- wr.right - wr.left,
- wr.bottom - wr.top,
- NULL,
- NULL,
- hInstance,
- NULL);
- std::cout << "Rendering window on screen" << std::endl;
- ShowWindow(hWnd, nCmdShow);
- // Set up and initialize CUDA
- try
- {
- std::cout << "Searching for CUDA device" << std::endl;
- InitCuda();
- window_title += std::string(" (") + std::string(devname) + std::string(")");
- SetWindowText(hWnd, window_title.c_str());
- }
- catch (std::exception& e)
- {
- std::cout << e.what() << std::endl;
- return 2;
- }
- // Set up and initialize Direct3D
- try
- {
- std::cout << "Attemptint to create and initialize DirectX device" << std::endl;
- InitD3D(hWnd);
- }
- catch (std::exception& e)
- {
- std::cerr << "Initializing D3D11 failed\n" << e.what() << std::endl;
- return 1;
- }
- cudaError_t cr = cudaError::cudaSuccess;
- // Register the readable extra texture as a CUDA resource
- // This resource will be mapped as ReadOnly since it will be where we are getting the rendered scene from in order to process it with the CUDA kernels
- cr = cudaGraphicsD3D11RegisterResource(&cuda_interop_read, texture_cuda_read, cudaGraphicsRegisterFlags::cudaGraphicsRegisterFlagsNone); // cudaGraphicsRegisterFlagsReadOnly
- if (cr != cudaSuccess)
- {
- std::string msg = std::string("Unable to register CUDA texture as resource: ") + std::string(cudaGetErrorString(cr));
- throw std::exception(msg.c_str());
- }
- // Register the writeable extra texture as a CUDA resource
- // This resource will be mapped as WriteDiscard since it will be where we are writing the CUDA kernel processed data to
- cr = cudaGraphicsD3D11RegisterResource(&cuda_interop_write, texture_cuda_write, cudaGraphicsRegisterFlags::cudaGraphicsRegisterFlagsNone); // cudaGraphicsRegisterFlagsWriteDiscard
- if (cr != cudaSuccess)
- {
- std::string msg = std::string("Unable to register backbuffer as CUDA resource: ") + std::string(cudaGetErrorString(cr));
- throw std::exception(msg.c_str());
- }
- // Specify mapping strategy for the resource to be read/write
- // since the Direct3D rendering will be loaded, processed and
- // written back by CUDA
- cr = cudaGraphicsResourceSetMapFlags(cuda_interop_write, cudaGraphicsMapFlagsWriteDiscard); //cudaGraphicsMapFlagsWriteDiscard udaGraphicsMapFlagsNone
- if (cr != cudaSuccess)
- {
- std::string msg = std::string("Unable to set flags for mapped backbuffer CUDA graphics resources: ") + std::string(cudaGetErrorString(cr));
- throw std::exception(msg.c_str());
- }
- cr = cudaGraphicsResourceSetMapFlags(cuda_interop_read, cudaGraphicsMapFlagsReadOnly); //cudaGraphicsMapFlagsReadOnly cudaGraphicsMapFlagsNone
- if (cr != cudaSuccess)
- {
- std::string msg = std::string("Unable to set flags for mapped CUDA graphics resources: ") + std::string(cudaGetErrorString(cr));
- throw std::exception(msg.c_str());
- }
- // The registered resource can only be mapped as a texture. In order to write to it, a CUDA array is required
- RECT rect;
- int width = 0;
- int height = 0;
- if (GetWindowRect(hWnd, &rect))
- {
- width = rect.right - rect.left;
- height = rect.bottom - rect.top;
- }
- // Allocate pitched memory for the backbuffer, which we will write to
- cr = cudaMallocPitch(&cuda_linear_mem_write, &cuda_pitch_write, width * sizeof(float) * 4, height);
- if (cr != cudaSuccess)
- {
- std::string msg = std::string("Unable to allocate pitched backbuffer CUDA memory: ") + std::string(cudaGetErrorString(cr));
- throw std::exception(msg.c_str());
- }
- cr = cudaMemset(cuda_linear_mem_write, 1, cuda_pitch_write * height);
- if (cr != cudaSuccess)
- {
- std::string msg = std::string("Unable to set backbuffer CUDA memory: ") + std::string(cudaGetErrorString(cr));
- throw std::exception(msg.c_str());
- }
- // Allocate pitched memory for the CUDA resource, which we will be reading from
- cr = cudaMallocPitch(&cuda_linear_mem_read, &cuda_pitch_read, width * sizeof(float) * 4, height);
- if (cr != cudaSuccess)
- {
- std::string msg = std::string("Unable to allocate pitched CUDA memory: ") + std::string(cudaGetErrorString(cr));
- throw std::exception(msg.c_str());
- }
- cr = cudaMemset(cuda_linear_mem_read, 1, cuda_pitch_read * height);
- if (cr != cudaSuccess)
- {
- std::string msg = std::string("Unable to set CUDA memory: ") + std::string(cudaGetErrorString(cr));
- throw std::exception(msg.c_str());
- }
- // Setup ImGui
- IMGUI_CHECKVERSION();
- ImGui::CreateContext();
- ImGuiIO& io = ImGui::GetIO();
- ImGui_ImplWin32_Init(hWnd);
- ImGui_ImplDX11_Init(device, device_context);
- ImGui::StyleColorsDark();
- // Enter the main loop:
- MSG msg;
- // Create cube sample
- hr = create_cube(device, device_context,
- &sample_vertex_layout, &sample_vertex_buffer, &sample_vertex_shader,
- &sample_index_buffer, &sample_constant_buffer, &sample_pixel_shader
- );
- if (FAILED(hr))
- {
- throw std::exception("FAILED TO CREATE/LOAD SAMPLE");
- }
- while (TRUE)
- {
- // Process window-related messages
- if (PeekMessage(&msg, NULL, 0, 0, PM_REMOVE))
- {
- TranslateMessage(&msg);
- DispatchMessage(&msg);
- if (msg.message == WM_QUIT)
- break;
- }
- // Render the frame
- try
- {
- RenderFrame();
- }
- catch (std::exception& e)
- {
- std::cout << "Rendering frame failed\n" << e.what() << std::endl;
- }
- }
- // Clean up DirectX and COM
- Clean();
- return msg.wParam;
- }
- // This is the main message handler for the program
- LRESULT CALLBACK WindowProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam)
- {
- if (ImGui_ImplWin32_WndProcHandler(hWnd, message, wParam, lParam))
- {
- return true;
- }
- switch (message)
- {
- case WM_DESTROY:
- {
- PostQuitMessage(0);
- return EXIT_SUCCESS;
- } break;
- case WM_KEYDOWN:
- {
- unsigned char keycode = static_cast<unsigned char>(wParam);
- //...
- }
- break;
- }
- return DefWindowProc(hWnd, message, wParam, lParam);
- }
- // This function initializes and prepares Direct3D for use
- void InitD3D(HWND hWnd)
- {
- RECT rect;
- int width = 0;
- int height = 0;
- if (GetWindowRect(hWnd, &rect))
- {
- width = rect.right - rect.left;
- height = rect.bottom - rect.top;
- }
- if (!width || !height)
- {
- std::cerr << "Initializing Direct3D received window of size 0x0" << std::endl;
- exit(1);
- }
- // Store the status of various DX-calls
- HRESULT hr = 0;
- // Create a struct to hold information about the swap chain
- DXGI_SWAP_CHAIN_DESC swapchainDesc;
- // Clear out the struct for use
- ZeroMemory(&swapchainDesc, sizeof(DXGI_SWAP_CHAIN_DESC));
- // Fill the swap chain description struct
- swapchainDesc.BufferCount = 1; // one back buffer
- swapchainDesc.BufferDesc.Width = width;
- swapchainDesc.BufferDesc.Height = height;
- swapchainDesc.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; // use 32-bit color DXGI_FORMAT_R8G8B8A8_UNORM, 4 channels, 1 byte (uchar) per channel | DXGI_FORMAT_R32G32B32_FLOAT
- swapchainDesc.BufferDesc.RefreshRate.Numerator = 60;
- swapchainDesc.BufferDesc.RefreshRate.Denominator = 1;
- swapchainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; // how swap chain is to be used
- swapchainDesc.OutputWindow = hWnd; // the window to be used
- swapchainDesc.SampleDesc.Count = 1; // how many multisamples (1-4, above is not guaranteed for DX11 GPUs)
- swapchainDesc.SampleDesc.Quality = 0; // multisample quality level
- swapchainDesc.Windowed = TRUE; // windowed/full-screen mode
- // Configure DX feature level
- // https://learn.microsoft.com/en-us/windows/win32/direct3d11/overviews-direct3d-11-devices-downlevel-intro
- D3D_FEATURE_LEVEL featureLevels[] =
- {
- D3D_FEATURE_LEVEL_11_1,
- D3D_FEATURE_LEVEL_11_0,
- D3D_FEATURE_LEVEL_10_1,
- D3D_FEATURE_LEVEL_10_0
- };
- UINT numFeatureLevels = ARRAYSIZE(featureLevels);
- D3D_FEATURE_LEVEL featureLevel;
- char devname_dx[128];
- bool device_ok = getAdapter(&adapter, devname_dx);
- DXGI_ADAPTER_DESC adapterDesc;
- adapter->GetDesc(&adapterDesc); //devid = 9436, subsysid = 177803304
- std::cout << "DirectX GPU " << devname_dx << " : [BusID " << adapterDesc.DeviceId << " | DevID " << adapterDesc.SubSysId << "]" << std::endl;
- // Create a device, device context and swap chain using the information in the scd struct
- // SOURCE https://learn.microsoft.com/en-us/windows/win32/direct3d11/overviews-direct3d-11-devices-initialize
- hr = D3D11CreateDeviceAndSwapChain(
- adapter, // graphics adapter, which was already checked for CUDA support
- D3D_DRIVER_TYPE_UNKNOWN, // If adapter is not NULL, we need to use UNKNOWN, otherwise use D3D_DRIVER_TYPE_HARDWARE
- NULL, // software
- NULL, // flags D3D11_CREATE_DEVICE_DEBUG (requires Direct3D SDK Debug Layer -> currently unable to install for unknown reason)
- featureLevels, // D3D features to enable
- numFeatureLevels, // D3D feature count
- D3D11_SDK_VERSION, // D3D SDK version (here 11)
- &swapchainDesc, // swap chain description
- &swapchain, // swap chain
- &device, // D3D device
- &featureLevel, // supported feature level (can also use DXGI_ADAPTER_DESC variable to retrieve the information)
- // Read note for pFeatureLevels at https://learn.microsoft.com/en-us/windows/win32/api/d3d11/nf-d3d11-d3d11createdeviceandswapchain#parameters for DX11.1
- &device_context); // D3D device context
- if (FAILED(hr))
- {
- std::string msg = std::string("[InitD3D] Failed to create DX device and swap chain: ") + std::to_string(hr);
- throw std::exception(msg.c_str());
- }
- // Release adapter
- adapter->Release();
- device->GetImmediateContext(&device_context);
- // Setup render view
- hr = swapchain->GetBuffer(0, __uuidof(ID3D11Texture2D), (LPVOID*)&texture_rgb);
- if (FAILED(hr))
- {
- std::string msg = std::string("[InitD3D] Failed to get address of back buffer: ") + std::to_string(hr);
- throw std::exception(msg.c_str());
- }
- hr = device->CreateRenderTargetView(texture_rgb, nullptr, &rtv);
- if (FAILED(hr))
- {
- std::string msg = std::string("[InitD3D] Failed to create render target view (main): ") + std::to_string(hr);
- throw std::exception(msg.c_str());
- }
- // Leave the pointer, release the texture
- //backbuffer->Release();
- //Setup texture for rendering the intial scene, which will be loaded in CUDA
- D3D11_TEXTURE2D_DESC texture_cuda_read_desc;
- //texture_rgb->GetDesc(&texture_rgb_cudaDesc);
- ZeroMemory(&texture_cuda_read_desc, sizeof(D3D11_TEXTURE2D_DESC));
- texture_cuda_read_desc.Width = width;
- texture_cuda_read_desc.Height = height;
- texture_cuda_read_desc.MipLevels = 1;
- texture_cuda_read_desc.ArraySize = 1;
- texture_cuda_read_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; //DXGI_FORMAT_R32G32B32A32_FLOAT
- texture_cuda_read_desc.SampleDesc.Count = 1;
- texture_cuda_read_desc.Usage = D3D11_USAGE::D3D11_USAGE_DEFAULT;//D3D11_USAGE_DEFAULT;
- texture_cuda_read_desc.BindFlags = D3D11_BIND_FLAG::D3D11_BIND_RENDER_TARGET | D3D11_BIND_FLAG::D3D11_BIND_SHADER_RESOURCE;
- //texture_rgb_cudaDesc.MiscFlags = D3D11_RESOURCE_MISC_FLAG::D3D11_RESOURCE_MISC_SHARED_NTHANDLE;
- hr = device->CreateTexture2D(&texture_cuda_read_desc, nullptr, &texture_cuda_read);
- if (FAILED(hr))
- {
- std::string msg = std::string("[InitD3D] Failed to create texture (for readining in CUDA): ") + std::to_string(hr);
- throw std::exception(msg.c_str());
- }
- // Create the render target view for CUDA
- D3D11_RENDER_TARGET_VIEW_DESC rtv_cudaDesc;
- rtv_cudaDesc.Format = texture_cuda_read_desc.Format;
- rtv_cudaDesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D;
- rtv_cudaDesc.Texture2D.MipSlice = 0;
- hr = device->CreateRenderTargetView(texture_cuda_read, &rtv_cudaDesc, &rtv_cuda);
- if (FAILED(hr))
- {
- std::string msg = std::string("[InitD3D] Failed to create render target view (CUDA): ") + std::to_string(hr);
- throw std::exception(msg.c_str());
- }
- // Setup the description of the shader resource view for CUDA
- D3D11_SHADER_RESOURCE_VIEW_DESC srv_cudaDesc;
- srv_cudaDesc.Format = texture_cuda_read_desc.Format;
- srv_cudaDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
- srv_cudaDesc.Texture2D.MostDetailedMip = 0;
- srv_cudaDesc.Texture2D.MipLevels = 1;
- // Create the shader resource view.
- device->CreateShaderResourceView(texture_cuda_read, &srv_cudaDesc, &srv_cuda);
- //Setup texture where the results from CUDA will be written to
- // TODO Remove this and use texture_cuda_read_desc in CreateTexture2D() in case everything is the same (bind flags?)
- D3D11_TEXTURE2D_DESC texture_cuda_write_desc;
- ZeroMemory(&texture_cuda_write_desc, sizeof(D3D11_TEXTURE2D_DESC));
- texture_cuda_write_desc.Width = texture_cuda_read_desc.Width;
- texture_cuda_write_desc.Height = texture_cuda_read_desc.Height;
- texture_cuda_write_desc.MipLevels = texture_cuda_read_desc.MipLevels;
- texture_cuda_write_desc.ArraySize = texture_cuda_read_desc.ArraySize;
- texture_cuda_write_desc.Format = texture_cuda_read_desc.Format;
- texture_cuda_write_desc.SampleDesc.Count = texture_cuda_read_desc.SampleDesc.Count;
- texture_cuda_write_desc.Usage = texture_cuda_read_desc.Usage;
- texture_cuda_write_desc.BindFlags = D3D11_BIND_FLAG::D3D11_BIND_RENDER_TARGET | D3D11_BIND_FLAG::D3D11_BIND_SHADER_RESOURCE;
- //texture_rgb_cudaDesc.MiscFlags = D3D11_RESOURCE_MISC_FLAG::D3D11_RESOURCE_MISC_SHARED_NTHANDLE;
- hr = device->CreateTexture2D(&texture_cuda_write_desc, nullptr, &texture_cuda_write);
- if (FAILED(hr))
- {
- std::string msg = std::string("[InitD3D] Failed to create texture (for writing in CUDA): ") + std::to_string(hr);
- throw std::exception(msg.c_str());
- }
- // Setup depth stencil view
- D3D11_TEXTURE2D_DESC depthStencilDesc;
- ZeroMemory(&depthStencilDesc, sizeof(D3D11_TEXTURE2D_DESC));
- depthStencilDesc.Width = width;
- depthStencilDesc.Height = height;
- depthStencilDesc.MipLevels = 1;
- depthStencilDesc.ArraySize = 1;
- depthStencilDesc.Format = DXGI_FORMAT_D24_UNORM_S8_UINT;
- depthStencilDesc.SampleDesc.Count = swapchainDesc.SampleDesc.Count;
- depthStencilDesc.SampleDesc.Quality = swapchainDesc.SampleDesc.Quality;
- depthStencilDesc.Usage = D3D11_USAGE::D3D11_USAGE_DEFAULT;
- depthStencilDesc.BindFlags = D3D11_BIND_FLAG::D3D11_BIND_DEPTH_STENCIL;
- depthStencilDesc.CPUAccessFlags = 0;
- depthStencilDesc.MiscFlags = 0;
- hr = device->CreateTexture2D(&depthStencilDesc, nullptr, &texture_depth);
- if (FAILED(hr))
- {
- std::string msg = std::string("[InitD3D] Failed to create depth buffer: ") + std::to_string(hr);
- throw std::exception(msg.c_str());
- }
- hr = device->CreateDepthStencilView(texture_depth, nullptr, &dsv);
- // Set the viewport
- D3D11_VIEWPORT viewport;
- ZeroMemory(&viewport, sizeof(D3D11_VIEWPORT));
- viewport.TopLeftX = 0;
- viewport.TopLeftY = 0;
- viewport.Width = WIN_WIDTH;
- viewport.Height = WIN_HEIGHT;
- viewport.MinDepth = 0.0f; // Closest to camera
- viewport.MaxDepth = 1.0f; // Furthest away from camera
- device_context->RSSetViewports(1, &viewport);
- // Initialize the world matrix
- matrix_world = DirectX::XMMatrixIdentity();
- // Initialize the view matrix
- DirectX::XMVECTOR Eye = DirectX::XMVectorSet(0.0f, 4.0f, -5.0f, 0.0f); //0 1 -5 0
- DirectX::XMVECTOR At = DirectX::XMVectorSet(0.0f, 1.0f, 0.0f, 0.0f);
- DirectX::XMVECTOR Up = DirectX::XMVectorSet(0.0f, 1.0f, 0.0f, 0.0f);
- matrix_view = DirectX::XMMatrixLookAtLH(Eye, At, Up);
- // Initialize the projection matrix
- matrix_projection = DirectX::XMMatrixPerspectiveFovLH(DirectX::XM_PIDIV2, viewport.Width / (FLOAT)viewport.Height, 0.01f, 100.0f);
- // Sampler state
- {
- D3D11_SAMPLER_DESC samplerStateDesc;
- samplerStateDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR;
- samplerStateDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP;
- samplerStateDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP;
- samplerStateDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP;
- samplerStateDesc.MinLOD = 0;
- samplerStateDesc.MaxLOD = 8;
- samplerStateDesc.MipLODBias = 0;
- samplerStateDesc.MaxAnisotropy = 1;
- hr = device->CreateSamplerState(&samplerStateDesc, &sampler_state);
- if (FAILED(hr))
- {
- std::string msg = std::string("[InitD3D] Failed to create sampler state: ") + std::to_string(hr);
- throw std::exception(msg.c_str());
- //throw std::exception("[InitD3D] Failed to create sampler state");
- }
- device_context->PSSetSamplers(0, 1, &sampler_state);
- }
- // Rasterizer state
- {
- D3D11_RASTERIZER_DESC rasterizerStateDesc;
- rasterizerStateDesc.FillMode = D3D11_FILL_SOLID; //D3D11_FILL_SOLID D3D11_FILL_WIREFRAME
- rasterizerStateDesc.CullMode = D3D11_CULL_BACK;
- rasterizerStateDesc.FrontCounterClockwise = false;
- rasterizerStateDesc.DepthBias = false;
- rasterizerStateDesc.DepthBiasClamp = 0;
- rasterizerStateDesc.SlopeScaledDepthBias = 0;
- rasterizerStateDesc.DepthClipEnable = false;
- rasterizerStateDesc.ScissorEnable = false;
- rasterizerStateDesc.MultisampleEnable = false;
- rasterizerStateDesc.AntialiasedLineEnable = false;
- hr = device->CreateRasterizerState(&rasterizerStateDesc, &raster_state);
- if (FAILED(hr))
- {
- std::string msg = std::string("[InitD3D] Failed to create raster state: ") + std::to_string(hr);
- throw std::exception(msg.c_str());
- }
- device_context->RSSetState(raster_state);
- }
- // Depth stencil state
- {
- D3D11_DEPTH_STENCIL_DESC stencilDesc;
- ZeroMemory(&stencilDesc, sizeof(stencilDesc));
- stencilDesc.DepthEnable = true;
- stencilDesc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL;
- stencilDesc.DepthFunc = D3D11_COMPARISON_LESS_EQUAL;
- hr = device->CreateDepthStencilState(&stencilDesc, &stencil_state);
- if (FAILED(hr))
- {
- std::string msg = std::string("[InitD3D] Failed to depth stencil state: ") + std::to_string(hr);
- throw std::exception(msg.c_str());
- }
- device_context->OMSetDepthStencilState(stencil_state, 0);
- }
- }
- // This is the function used to render a single frame
- void RenderFrame(void)
- {
- // Store the status of various DX-calls
- HRESULT hr = 0;
- // The background color
- float color[4] = { 0.0f, 0.2f, 0.4f, 1.0f };
- // Update our time
- static float t = 0.0f;
- static ULONGLONG timeStart = 0;
- ULONGLONG timeCur = GetTickCount64();
- if (timeStart == 0)
- timeStart = timeCur;
- t = (timeCur - timeStart) / 1000.0f;
- // Animate the cube
- static float translationOffset[3] = { 0.0f, 0.0f, 0.0f };
- matrix_world = DirectX::XMMatrixRotationY(t) * DirectX::XMMatrixTranslation(translationOffset[0], translationOffset[1], translationOffset[2]);
- // Update variables
- ConstantBuffer cb;
- cb.mWorld = DirectX::XMMatrixTranspose(matrix_world);
- cb.mView = DirectX::XMMatrixTranspose(matrix_view);
- cb.mProjection = DirectX::XMMatrixTranspose(matrix_projection);
- device_context->UpdateSubresource(sample_constant_buffer, 0, nullptr, &cb, 0, 0);
- // Clear the main view, which will be then populated render the results from CUDA and the ImGUI
- // TODO Possible can be removed if CopyResource (copy CUDA result to main RTV) indeed clears the buffer
- //device_context->OMSetRenderTargets(1, &rtv, dsv);
- //device_context->ClearRenderTargetView(rtv, color);
- //device_context->ClearDepthStencilView(dsv, D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL, 1.0f, 0);
- if (cuda_enable)
- {
- // Change to the CUDA render target view
- device_context->OMSetRenderTargets(1, &rtv_cuda, dsv);
- // Clear the buffer
- device_context->ClearRenderTargetView(rtv_cuda, color);
- device_context->ClearDepthStencilView(dsv, D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL, 1.0f, 0);
- // Render the scene (no ImGUI) to the current RTV
- device_context->VSSetShader(sample_vertex_shader, nullptr, 0);
- device_context->VSSetConstantBuffers(0, 1, &sample_constant_buffer);
- device_context->PSSetShader(sample_pixel_shader, nullptr, 0);
- device_context->DrawIndexed(36, 0, 0); // 36 vertices needed for 12 triangles in a triangle list
- // TODO Change RTV
- //device_context->OMSetRenderTargets(1, &rtv, dsv);
- /*
- // For debugging and perhaps later for actual use
- // By using staging texture and mapping it, one can then obtain direct
- // access to the pData (pixel data, e.g. RGBA)
- device_context->CopySubresourceRegion(texture_rgb_cuda, 0, 0, 0, 0, texture_rgb, 0, nullptr);
- D3D11_MAPPED_SUBRESOURCE* texture_rgb_cuda_data = nullptr;
- hr = device_context->Map(texture_rgb_cuda, 0, D3D11_MAP::D3D11_MAP_READ_WRITE, 0, texture_rgb_cuda_data);
- if (FAILED(hr))
- {
- std::string msg = std::string("[RenderFrame] Failed to map texture sub-resource: ") + std::to_string(hr);
- throw std::exception(msg.c_str());
- }
- device_context->Unmap(texture_rgb_cuda, 0);
- */
- // Map the interop CUDA resources (MapFlags for each resource have already been set before this)
- cudaError_t cr;
- cudaStream_t stream = nullptr;
- const int cuda_mapped_resources_count = 2;
- cudaGraphicsResource* cuda_mapped_resources[cuda_mapped_resources_count] = {
- cuda_interop_read,
- cuda_interop_write
- };
- cr = cudaGraphicsMapResources(cuda_mapped_resources_count, cuda_mapped_resources, stream);
- if (cr != cudaSuccess)
- {
- std::string msg = std::string("Unable to map CUDA graphics resources: ") + std::string(cudaGetErrorString(cr));
- throw std::exception(msg.c_str());
- }
- // Run CUDA kernels
- cr = RunCudaKernels();
- if (cr != cudaSuccess)
- {
- std::string msg = std::string("Unable to run CUDA kernels: ") + std::string(cudaGetErrorString(cr));
- }
- // Unmap resource
- cr = cudaGraphicsUnmapResources(cuda_mapped_resources_count, cuda_mapped_resources, stream);
- if (cr != cudaSuccess)
- {
- std::string msg = std::string("Unable to unmap CUDA graphics resources: ") + std::string(cudaGetErrorString(cr));
- throw std::exception(msg.c_str());
- }
- //device_context->CopyResource(texture_rgb, texture_rgb_cuda);
- //device_context->UpdateSubresource(texture_rgb, 0, nullptr, &texture_rgb_cuda, 0, 0);
- cr = cudaDeviceSynchronize();
- if (cr != cudaSuccess)
- {
- std::string msg = std::string("Unable to synchronize GPU and CPU: ") + std::string(cudaGetErrorString(cr));
- throw std::exception(msg.c_str());
- }
- device_context->CopyResource(texture_rgb, texture_cuda_write);
- }
- else
- {
- // We render the scene normally if CUDA processing has been disabled via the ImGUI
- device_context->OMSetRenderTargets(1, &rtv, dsv);
- device_context->ClearRenderTargetView(rtv, color);
- device_context->ClearDepthStencilView(dsv, D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL, 1.0f, 0);
- // Render a triangle strip
- device_context->VSSetShader(sample_vertex_shader, nullptr, 0);
- device_context->VSSetConstantBuffers(0, 1, &sample_constant_buffer);
- device_context->PSSetShader(sample_pixel_shader, nullptr, 0);
- device_context->DrawIndexed(36, 0, 0); // 36 vertices needed for 12 triangles in a triangle list
- }
- // Switch back to the main RTV to add the ImGUI components
- device_context->OMSetRenderTargets(1, &rtv, dsv);
- // Render ImGUI UI elements
- // Start frame
- ImGui_ImplDX11_NewFrame();
- ImGui_ImplWin32_NewFrame();
- // Create frame
- ImGui::NewFrame();
- ImGui::Begin("Controls / Debug Information");
- ImGui::Text((cuda_device_count ? std::string("Found " + std::to_string(cuda_device_count) + " CUDA " + (cuda_device_count == 1 ? "device" : "devices")).c_str() : "No CUDA devices found!"));
- ImGui::Text(devname);
- ImGui::DragFloat3("Trans X/Y/Z", translationOffset, 0.1f, -5.0f, 5.0f);
- std::string cb_label_cuda_enabled = std::string("CUDA ");
- cb_label_cuda_enabled += (cuda_enable ? "enabled" : "disabled");
- ImGui::Checkbox(cb_label_cuda_enabled.c_str(), &cuda_enable);
- ImGui::End();
- // Assemble together the ImGui draw data
- ImGui::Render();
- // Render ImGui
- ImGui_ImplDX11_RenderDrawData(ImGui::GetDrawData());
- // Present the main render target
- // Because the maximum frame latency is set to 1,
- // the render loop will generally be throttled to
- // the screen refresh rate, typically around
- // 60 Hz, by sleeping the application on Present
- // until the screen is refreshed.
- hr = swapchain->Present(1, 0);
- if (FAILED(hr))
- {
- std::string msg = std::string("[RenderFrame] Failed to swap back with front buffer: ") + std::to_string(hr);
- throw std::exception(msg.c_str());
- }
- }
- // Clean up CUDA, Direct3D and COM objects
- void Clean(void)
- {
- cudaError_t cr = cudaSuccess;
- // Unregister CUDA resources
- if (cuda_interop_write)
- {
- cr = cudaGraphicsUnregisterResource(cuda_interop_write);
- printLastCudaError("cudaGraphicsUnregisterResource failed");
- if (cr != cudaSuccess)
- {
- std::string msg = std::string("[Clean] Failed to unregister CUDA resource.\n") + std::string(cudaGetErrorString(cr));
- throw std::exception(msg.c_str());
- }
- }
- // Close and release all existing COM objects
- if (sample_constant_buffer)
- sample_constant_buffer->Release();
- if (sample_vertex_buffer)
- sample_vertex_buffer->Release();
- if (sample_index_buffer)
- sample_index_buffer->Release();
- if (sample_vertex_layout)
- sample_vertex_layout->Release();
- if (sample_vertex_shader)
- sample_vertex_shader->Release();
- if (sample_pixel_shader)
- sample_pixel_shader->Release();
- if (rtv)
- rtv->Release();
- if (rtv_cuda)
- rtv_cuda->Release();
- if (srv_cuda)
- srv_cuda->Release();
- if (swapchain)
- swapchain->Release();
- if (device_context)
- device_context->Release();
- if (device)
- device->Release();
- texture_rgb->Release();
- texture_cuda_read->Release();
- texture_depth->Release();
- }
- bool getAdapter(IDXGIAdapter** adapter, char* devname)
- {
- HRESULT hr = S_OK;
- cudaError_t cr = cudaSuccess;
- UINT adapter_idx = 0;
- IDXGIFactory* pFactory;
- hr = CreateDXGIFactory(__uuidof(IDXGIFactory), (void**)(&pFactory));
- if (FAILED(hr))
- {
- std::string msg = std::string("[getAdapter] Unable to create DXGI factory: ") + std::to_string(hr);
- throw std::exception(msg.c_str());
- return false;
- }
- for (; !(*adapter); ++adapter_idx) {
- // Get a candidate DXGI adapter
- IDXGIAdapter* pAdapter = NULL;
- hr = pFactory->EnumAdapters(adapter_idx, &pAdapter);
- if (FAILED(hr))
- {
- break; // no compatible adapters found
- }
- // Query to see if there exists a corresponding compute device
- int cuDevice;
- cr = cudaD3D11GetDevice(&cuDevice, pAdapter);
- if (cr == cudaSuccess) {
- // If so, mark it as the one against which to create our d3d10 device
- (*adapter) = pAdapter;
- (*adapter)->AddRef();
- ++adapter_idx;
- break;
- }
- pAdapter->Release();
- }
- std::cout << "Found " << (int)adapter_idx << " D3D11 adapater(s)" << std::endl;
- pFactory->Release();
- if (!adapter) {
- std::cerr << "Unable to find a D3D11 adapater with compute capability" << std::endl;
- return false;
- }
- DXGI_ADAPTER_DESC adapterDesc;
- (*adapter)->GetDesc(&adapterDesc);
- //wcstombs(devname, adapterDesc.Description, 128);
- std::cout << "Found a D3D11 adapater with compute capability" << std::endl;
- return true;
- }
- // This function initializes and prepares CUDA for use
- void InitCuda()
- {
- int nGraphicsGPU = 0;
- bool bFoundGraphics = false;
- // This function call returns 0 if there are no CUDA capable devices.
- cudaError_t cr = cudaGetDeviceCount(&cuda_device_count);
- if (cr != cudaSuccess)
- {
- std::string msg = std::string("[InitCuda] Failed retrieve number of CUDA devices: ") + std::string(cudaGetErrorString(cr));
- throw std::exception(msg.c_str());
- }
- if (cuda_device_count == 0) {
- cuda_enable = false;
- }
- else{
- std::cout << "Found " << cuda_device_count << " CUDA capable device(s)" << std::endl;
- }
- // Get CUDA device properties
- cudaDeviceProp deviceProp;
- for (int dev = 0; dev < cuda_device_count; ++dev) {
- cr = cudaGetDeviceProperties(&deviceProp, dev);
- strcpy_s(devname, deviceProp.name);
- std::cout << "CUDA GPU " << dev << " \"" << devname << "\" : [BusID " << deviceProp.pciBusID << " | DevID " << deviceProp.pciDeviceID << "]" << std::endl;
- if (cr != cudaSuccess)
- {
- std::string msg = std::string("[InitCuda] Failed to retrieve properties of CUDA device: ") + std::string(cudaGetErrorString(cr));
- throw std::exception(msg.c_str());
- }
- break;
- }
- }
- cudaError_t RunCudaKernels()
- {
- static float t = 0.0f;
- cudaError_t cr = cudaSuccess;
- /*
- cudaSurfaceObject_t cuda_so_read, cuda_so_write;
- cudaResourceDesc cuda_so_read_desc, cuda_so_write_desc;
- cuda_so_read_desc.resType = cudaResourceTypeArray;
- cuda_so_read_desc.res.array = ;
- */
- //cr = cudaCreateSurfaceObject(&cuda_so_read, cuda_so_read_desc);
- // populate the 2d texture
- cudaArray* cu_arr_read, * cu_arr_write;
- cr = cudaGraphicsSubResourceGetMappedArray(&cu_arr_read, cuda_interop_read, 0, 0); // cuda_interop_read
- if (cr != cudaSuccess)
- {
- return cr;
- }
- cr = cudaGraphicsSubResourceGetMappedArray(&cu_arr_write, cuda_interop_write, 0, 0); // cuda_interop_write
- if (cr != cudaSuccess)
- {
- return cr;
- }
- // kick off the kernel and send the staging buffer cudaLinearMemory as an argument to allow the kernel to write to it
- cr = cuda_texture_2d(
- cuda_linear_mem_read, cuda_linear_mem_write,
- WIN_WIDTH, WIN_HEIGHT,
- cuda_pitch_read, cuda_pitch_write,
- t
- );
- if (cr != cudaSuccess)
- {
- return cr;
- }
- // then we want to copy cudaLinearMemory to the D3D texture, via its mapped form : cudaArray
- cr = cudaMemcpy2DToArray(
- cu_arr_write, // dst array
- 0, 0, // offset
- cuda_linear_mem_write, cuda_pitch_write, // src cuda_linear_mem_read
- WIN_WIDTH * 4 * sizeof(UINT8), WIN_HEIGHT, // extent // sizeof(...) is based on DXGI_FORMAT of texture, e.g. DXGI_FORMAT_R8G8B8A8_UNORM is uint8, while DXGI_FORMAT_R32G32B32A32_FLOAT
- cudaMemcpyDeviceToDevice); // copying will occur only on the GPU
- if (cr != cudaSuccess)
- {
- return cr;
- }
- t += 0.1f;
- return cr;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement