/* * Minimum test case I could come up with. No return values checked, no error handling, * and assumes the exact capabilities of my setup (detailed below). And yeah, it's still * pretty big. Yay Vulkan verbosity. * * Problem: * When using more than two different queues (one after another, no multi-threading * needed) for the presenting of swapchain images, things grind to a halt. I first * encountered this using multiple swapchains with multiple surfaces, using one unique * queue per swapchain. But the same trouble occurs when using only a single swapchain. * The first six frames render properly, no matter the combination of swapchain size * and number of queues used. * * See the comments for the two defines QUEUECOUNT and SWAPCHAINSIZE, and change their * values to test the breaking cases. Changing presentation modes has had no effect * other than immediate mode finishing the first six frames more quickly than FIFO mode, * before stalling. * * Problem found on a GTX 780, with nVidia drivers 364.19 for Linux, Linux kernel 4.5.4, * on X11 with Xfce as display manager. And a 4Ghz Intel i7 CPU, in case it's a timing * issue for the first few presented images. Compiled with GCC 5.3.0. * * Build depends on SDL2, X11, and Vulkan. * * Compiled with: * g++ -std=c++14 -O3 -I/path/to/vulkan/include/ -lSDL2 -lvulkan -o broken main.cpp * * Different optimization levels had no effect. * * Warning: When left to its own devices with 3 or more queues, this program makes my * machine completely unresponsive. Use something like the Linux 'timeout' tool to send * a SIGKILL after a minute or two, or you're gonna end up power cycling. */ #include #include "SDL2/SDL.h" #include "SDL2/SDL_syswm.h" #define VK_USE_PLATFORM_XLIB_KHR #include "vulkan/vulkan.h" /* Works fine with a queue count of 1 or 2, breaks at 3 or anything higher. */ #define QUEUECOUNT 3 /* * Failure behavior seems affected by the number of images in the swapchain. * At 2 images, the program hangs while waiting for the command buffer fence. * At 3 or more images, the program exits with a device lost error on command submit. */ #define SWAPCHAINSIZE 2 // Global all the things. VkDevice renderDevice; VkSwapchainKHR swapchains[1]; VkSemaphore swapchainSemaphores[1]; VkSemaphore signalSemaphores[1]; VkFence fences[1]; VkCommandBuffer commandBuffer[1]; // Prototypes so you don't have to scroll all the way down to main. You're welcome. bool shouldTerminate(); void init(); SDL_Window* initSDL(); VkInstance initVulkanInstance(); VkSurfaceKHR initVulkanSurface(VkInstance instance, SDL_Window* sdlwindow); void initVulkanRenderDevice(VkInstance instance); void initVulkanSwapchain(VkSurfaceKHR surface); void initVulkanCommandBuffers(); void initVulkanSynchronizationPrimitives(); /* Look at me, I'm important! */ int main(int, const char**) { init(); // Bunch of variable filling, blah blah, skip to the loop below this. uint32_t imageIndex = 0, roundRobin = 0, frameCount = 0; VkClearColorValue clearColor = {{0.6, 0.2, 0.2, 1.0}}; VkImage imageArray[SWAPCHAINSIZE]; uint32_t imageCount = SWAPCHAINSIZE; vkGetSwapchainImagesKHR(renderDevice, swapchains[0], &imageCount, imageArray); VkPipelineStageFlags stages[1] = {VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT}; VkCommandBufferBeginInfo bufferBeginInfo; bufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; bufferBeginInfo.pNext = NULL; bufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; bufferBeginInfo.pInheritanceInfo = NULL; VkImageSubresourceRange subRanges; subRanges.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; subRanges.baseMipLevel = 0; subRanges.levelCount = 1; subRanges.baseArrayLayer = 0; subRanges.layerCount = 1; VkImageMemoryBarrier imageBarriers[1]; imageBarriers[0].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; imageBarriers[0].pNext = NULL; imageBarriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; imageBarriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; imageBarriers[0].image = imageArray[imageIndex]; imageBarriers[0].subresourceRange = subRanges; VkSubmitInfo submitInfo; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; submitInfo.pNext = NULL; submitInfo.waitSemaphoreCount = 1; submitInfo.pWaitSemaphores = swapchainSemaphores; submitInfo.pWaitDstStageMask = stages; submitInfo.commandBufferCount = 1; submitInfo.signalSemaphoreCount = 1; submitInfo.pSignalSemaphores = signalSemaphores; VkPresentInfoKHR presentInfo; presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; presentInfo.pNext = NULL; presentInfo.waitSemaphoreCount = 1; presentInfo.pWaitSemaphores = signalSemaphores; presentInfo.swapchainCount = 1; presentInfo.pSwapchains = swapchains; presentInfo.pResults = NULL; VkQueue queues[16]; // Prefetch handles for all queues. for (int i = 0; i < 16; ++i) vkGetDeviceQueue(renderDevice, 0, i, &queues[i]); // Run until escape is pressed, the window is closed, VK_ERROR_DEVICE_LOST is encountered, // or, well, forever, if we hang on the fence. while (!shouldTerminate()) { // Reset the buffer, fetch the image (which never blocks), and record the simple commands to clear the image. vkResetCommandBuffer(commandBuffer[0], 0); vkAcquireNextImageKHR(renderDevice, swapchains[0], 0, swapchainSemaphores[0], VK_NULL_HANDLE, &imageIndex); vkBeginCommandBuffer(commandBuffer[0], &bufferBeginInfo); imageBarriers[0].srcAccessMask = 0; imageBarriers[0].dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; imageBarriers[0].oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; imageBarriers[0].newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; vkCmdPipelineBarrier(commandBuffer[0], VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 0, NULL, 1, imageBarriers); vkCmdClearColorImage(commandBuffer[0], imageArray[imageIndex], VK_IMAGE_LAYOUT_GENERAL, &clearColor, 1, &subRanges); imageBarriers[0].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; imageBarriers[0].dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; imageBarriers[0].oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; imageBarriers[0].newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; vkCmdPipelineBarrier(commandBuffer[0], VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, 0, NULL, 0, NULL, 1, imageBarriers); vkEndCommandBuffer(commandBuffer[0]); submitInfo.pCommandBuffers = commandBuffer; VkQueue queue = queues[roundRobin]; if (vkQueueSubmit(queue, 1, &submitInfo, fences[0]) == VK_ERROR_DEVICE_LOST) { std::cout << "Failed.\nvkQueueSubmit() returned VK_ERROR_DEVICE_LOST\n" << std::endl; abort(); } // When this hangs, it hangs. The GPU seems to lock right up, so be sure to have a watchdog that // kills the program after some sane period. std::cout << "Waiting for fence... " << std::flush; while (vkWaitForFences(renderDevice, 1, fences, VK_TRUE, 100) == VK_TIMEOUT); std::cout << "Done.\n"; vkResetFences(renderDevice, 1, fences); uint32_t imageIndices[1] = {imageIndex}; presentInfo.pImageIndices = imageIndices; // This is the most common point of stalling when using more than two queues for presentation. std::cout << "Waiting for present... " << std::flush; if (vkQueuePresentKHR(queue, &presentInfo) == VK_ERROR_DEVICE_LOST) { std::cout << "Failed.\nvkQueuePresentKHR() returned VK_ERROR_DEVICE_LOST\n" << std::endl;; abort(); } std::cout << "Done.\n"; std::cout << "Waiting for queue idle... " << std::flush; vkQueueWaitIdle(queue); std::cout << "Done.\n"; ++frameCount; std::cout << "Finished frame " << frameCount << " on queue " << roundRobin << " for image with index " << imageIndex << "\n" << std::endl; ++roundRobin; if (roundRobin >= QUEUECOUNT) roundRobin = 0; } return 0; } bool shouldTerminate() { SDL_Event event; while (SDL_PollEvent(&event)) { switch (event.type) { case SDL_KEYUP: if (event.key.keysym.sym == SDLK_ESCAPE) return true; break; case SDL_WINDOWEVENT: if (event.window.event == SDL_WINDOWEVENT_CLOSE) return true; break; default: break; } } return false; } void init() { SDL_Window* sdlwindow = initSDL(); VkInstance instance = initVulkanInstance(); VkSurfaceKHR surface = initVulkanSurface(instance, sdlwindow); initVulkanRenderDevice(instance); initVulkanSwapchain(surface); initVulkanCommandBuffers(); initVulkanSynchronizationPrimitives(); } SDL_Window* initSDL() { SDL_Init(SDL_INIT_EVENTS | SDL_INIT_VIDEO); SDL_Window* sdlwindow = SDL_CreateWindow("Broken", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, 640, 480, 0); return sdlwindow; } VkInstance initVulkanInstance() { VkApplicationInfo appInfo = {}; appInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; appInfo.pNext = NULL; appInfo.pApplicationName = NULL; appInfo.applicationVersion = 0; appInfo.pEngineName = NULL; appInfo.engineVersion = 0; appInfo.apiVersion = VK_API_VERSION_1_0; const char* extensions[2] = {VK_KHR_SURFACE_EXTENSION_NAME, VK_KHR_XLIB_SURFACE_EXTENSION_NAME}; VkInstanceCreateInfo instanceCreateInfo = {}; instanceCreateInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; instanceCreateInfo.pNext = NULL; instanceCreateInfo.flags = 0; instanceCreateInfo.pApplicationInfo = &appInfo; instanceCreateInfo.enabledLayerCount = 0; instanceCreateInfo.ppEnabledLayerNames = NULL; instanceCreateInfo.enabledExtensionCount = 2; instanceCreateInfo.ppEnabledExtensionNames = extensions; VkInstance instance; vkCreateInstance(&instanceCreateInfo, nullptr, &instance); return instance; } VkSurfaceKHR initVulkanSurface(VkInstance instance, SDL_Window* sdlwindow) { SDL_SysWMinfo info; SDL_GetVersion(&info.version); SDL_GetWindowWMInfo(sdlwindow, &info); Display* x11display = info.info.x11.display; Window x11window = info.info.x11.window; VkXlibSurfaceCreateInfoKHR surfaceInfo; surfaceInfo.sType = VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR; surfaceInfo.pNext = NULL; surfaceInfo.flags = 0; surfaceInfo.dpy = x11display; surfaceInfo.window = x11window; VkSurfaceKHR surface; vkCreateXlibSurfaceKHR(instance, &surfaceInfo, NULL, &surface); return surface; } void initVulkanRenderDevice(VkInstance instance) { uint32_t deviceCount = 0; vkEnumeratePhysicalDevices(instance, &deviceCount, NULL); VkPhysicalDevice devices[deviceCount]; vkEnumeratePhysicalDevices(instance, &deviceCount, devices); float queuePriorities[1] = {1.0}; VkDeviceQueueCreateInfo queueInfos[1]; queueInfos[0].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; queueInfos[0].pNext = NULL; queueInfos[0].flags = 0; queueInfos[0].queueFamilyIndex = 0; queueInfos[0].queueCount = 16; queueInfos[0].pQueuePriorities = queuePriorities; VkPhysicalDeviceFeatures features; features.robustBufferAccess = VK_FALSE; features.fullDrawIndexUint32 = VK_FALSE; features.imageCubeArray = VK_FALSE; features.independentBlend = VK_FALSE; features.geometryShader = VK_TRUE; features.tessellationShader = VK_TRUE; features.sampleRateShading = VK_FALSE; features.dualSrcBlend = VK_FALSE; features.logicOp = VK_FALSE; features.multiDrawIndirect = VK_FALSE; features.drawIndirectFirstInstance = VK_FALSE; features.depthClamp = VK_FALSE; features.depthBiasClamp = VK_FALSE; features.fillModeNonSolid = VK_FALSE; features.depthBounds = VK_FALSE; features.wideLines = VK_FALSE; features.largePoints = VK_FALSE; features.alphaToOne = VK_FALSE; features.multiViewport = VK_TRUE; features.samplerAnisotropy = VK_TRUE; features.textureCompressionETC2 = VK_FALSE; features.textureCompressionASTC_LDR = VK_FALSE; features.textureCompressionBC = VK_FALSE; features.occlusionQueryPrecise = VK_FALSE; features.pipelineStatisticsQuery = VK_FALSE; features.vertexPipelineStoresAndAtomics = VK_FALSE; features.fragmentStoresAndAtomics = VK_FALSE; features.shaderTessellationAndGeometryPointSize = VK_FALSE; features.shaderImageGatherExtended = VK_FALSE; features.shaderStorageImageExtendedFormats = VK_FALSE; features.shaderStorageImageMultisample = VK_FALSE; features.shaderStorageImageReadWithoutFormat = VK_FALSE; features.shaderStorageImageWriteWithoutFormat = VK_FALSE; features.shaderUniformBufferArrayDynamicIndexing = VK_FALSE; features.shaderSampledImageArrayDynamicIndexing = VK_FALSE; features.shaderStorageBufferArrayDynamicIndexing = VK_FALSE; features.shaderStorageImageArrayDynamicIndexing = VK_FALSE; features.shaderClipDistance = VK_FALSE; features.shaderCullDistance = VK_FALSE; features.shaderFloat64 = VK_FALSE; features.shaderInt64 = VK_FALSE; features.shaderInt16 = VK_FALSE; features.shaderResourceResidency = VK_FALSE; features.shaderResourceMinLod = VK_FALSE; features.sparseBinding = VK_FALSE; features.sparseResidencyBuffer = VK_FALSE; features.sparseResidencyImage2D = VK_FALSE; features.sparseResidencyImage3D = VK_FALSE; features.sparseResidency2Samples = VK_FALSE; features.sparseResidency4Samples = VK_FALSE; features.sparseResidency8Samples = VK_FALSE; features.sparseResidency16Samples = VK_FALSE; features.sparseResidencyAliased = VK_FALSE; features.variableMultisampleRate = VK_FALSE; features.inheritedQueries = VK_FALSE; const char* extensions[1] = {VK_KHR_SWAPCHAIN_EXTENSION_NAME}; VkDeviceCreateInfo deviceInfo; deviceInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; deviceInfo.pNext = NULL; deviceInfo.flags = 0; deviceInfo.queueCreateInfoCount = 1; deviceInfo.pQueueCreateInfos = queueInfos; deviceInfo.enabledLayerCount = 0; deviceInfo.ppEnabledLayerNames = NULL; deviceInfo.enabledExtensionCount = 1; deviceInfo.ppEnabledExtensionNames = extensions; deviceInfo.pEnabledFeatures = &features; vkCreateDevice(devices[0], &deviceInfo, NULL, &renderDevice); } void initVulkanSwapchain(VkSurfaceKHR surface) { VkSwapchainCreateInfoKHR chainInfo; chainInfo.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR; chainInfo.pNext = NULL; chainInfo.flags = 0; chainInfo.surface = surface; chainInfo.minImageCount = SWAPCHAINSIZE; chainInfo.imageFormat = VK_FORMAT_B8G8R8A8_UNORM; chainInfo.imageColorSpace = VK_COLORSPACE_SRGB_NONLINEAR_KHR; chainInfo.imageExtent = {640, 480}; chainInfo.imageArrayLayers = 1; chainInfo.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; chainInfo.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; chainInfo.queueFamilyIndexCount = 0; chainInfo.pQueueFamilyIndices = NULL; chainInfo.preTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; chainInfo.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; // chainInfo.presentMode = VK_PRESENT_MODE_FIFO_KHR; chainInfo.presentMode = VK_PRESENT_MODE_IMMEDIATE_KHR; // Doesn't influence the results. chainInfo.clipped = VK_FALSE; chainInfo.oldSwapchain = NULL; vkCreateSwapchainKHR(renderDevice, &chainInfo, NULL, &swapchains[0]); } void initVulkanCommandBuffers() { VkCommandPoolCreateInfo poolInfo; poolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; poolInfo.pNext = NULL; poolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; poolInfo.queueFamilyIndex = 0; VkCommandPool commandPool; vkCreateCommandPool(renderDevice, &poolInfo, NULL, &commandPool); VkCommandBufferAllocateInfo bufferInfo; bufferInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; bufferInfo.pNext = NULL; bufferInfo.commandPool = commandPool; bufferInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; bufferInfo.commandBufferCount = 1; vkAllocateCommandBuffers(renderDevice, &bufferInfo, commandBuffer); } void initVulkanSynchronizationPrimitives() { VkSemaphoreCreateInfo semaphoreInfo; semaphoreInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; semaphoreInfo.pNext = NULL; semaphoreInfo.flags = 0; vkCreateSemaphore(renderDevice, &semaphoreInfo, NULL, &swapchainSemaphores[0]); vkCreateSemaphore(renderDevice, &semaphoreInfo, NULL, &signalSemaphores[0]); VkFenceCreateInfo fenceInfo[1]; fenceInfo[0].sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; fenceInfo[0].pNext = NULL; fenceInfo[0].flags = 0; vkCreateFence(renderDevice, fenceInfo, NULL, &fences[0]); }