From 17101e9bb935b9b877bd5624e8ba1ad1976c01fa Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Tue, 25 Jul 2023 13:44:34 -0700 Subject: [PATCH 01/45] Fix graphics queue assignment Copy-paste error --- src/core/renderer_vk/renderer_vk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index 4ec70412..92ce12e4 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -513,7 +513,7 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { VULKAN_HPP_DEFAULT_DISPATCHER.init(device.get()); presentQueue = device->getQueue(presentQueueFamily, 0); - graphicsQueue = device->getQueue(presentQueueFamily, 0); + graphicsQueue = device->getQueue(graphicsQueueFamily, 0); computeQueue = device->getQueue(computeQueueFamily, 0); transferQueue = device->getQueue(transferQueueFamily, 0); From d0832ca558c66358092a97083a082c1eb0302151 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Tue, 25 Jul 2023 13:48:07 -0700 Subject: [PATCH 02/45] Fix support for headless vulkan context --- include/renderer_vk/renderer_vk.hpp | 14 +- src/core/renderer_vk/renderer_vk.cpp | 337 +++++++++++++++------------ 2 files changed, 192 insertions(+), 159 deletions(-) diff --git a/include/renderer_vk/renderer_vk.hpp b/include/renderer_vk/renderer_vk.hpp index 59d8cdae..6ebbcb7e 100644 --- a/include/renderer_vk/renderer_vk.hpp +++ b/include/renderer_vk/renderer_vk.hpp @@ -32,9 +32,14 @@ class RendererVK final : public Renderer { std::vector swapchainImages = {}; std::vector swapchainImageViews = {}; - // Per-swapchain-image data - // Each vector is `swapchainImageCount` in size - std::vector presentCommandBuffers = {}; + // This value is the degree of parallelism to allow multiple frames to be in-flight + // aka: "double-buffer"/"triple-buffering" + // Todo: make this a configuration option + static constexpr usize frameBufferingCount = 3; + + // Frame-buffering data + // Each vector is `frameBufferingCount` in size + std::vector frameCommandBuffers = {}; std::vector swapImageFreeSemaphore = {}; std::vector renderFinishedSemaphore = {}; std::vector frameFinishedFences = {}; @@ -42,7 +47,8 @@ class RendererVK final : public Renderer { // Recreate the swapchain, possibly re-using the old one in the case of a resize vk::Result recreateSwapchain(vk::SurfaceKHR surface, vk::Extent2D swapchainExtent); - u64 currentFrame = 0; + u64 frameBufferingIndex = 0; + public: RendererVK(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs); ~RendererVK() override; diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index 92ce12e4..4b6956d5 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -155,48 +155,6 @@ vk::Result RendererVK::recreateSwapchain(vk::SurfaceKHR surface, vk::Extent2D sw Helpers::panic("Error creating acquiring swapchain images: %s\n", vk::to_string(getResult.result).c_str()); } - // Swapchain Command buffer(s) - vk::CommandBufferAllocateInfo commandBuffersInfo = {}; - commandBuffersInfo.commandPool = commandPool.get(); - commandBuffersInfo.level = vk::CommandBufferLevel::ePrimary; - commandBuffersInfo.commandBufferCount = swapchainImageCount; - - if (auto allocateResult = device->allocateCommandBuffersUnique(commandBuffersInfo); allocateResult.result == vk::Result::eSuccess) { - presentCommandBuffers = std::move(allocateResult.value); - } else { - Helpers::panic("Error allocating command buffer: %s\n", vk::to_string(allocateResult.result).c_str()); - } - - // Swapchain synchronization primitives - vk::FenceCreateInfo fenceInfo = {}; - fenceInfo.flags = vk::FenceCreateFlagBits::eSignaled; - - vk::SemaphoreCreateInfo semaphoreInfo = {}; - - swapImageFreeSemaphore.resize(swapchainImageCount); - renderFinishedSemaphore.resize(swapchainImageCount); - frameFinishedFences.resize(swapchainImageCount); - - for (usize i = 0; i < swapchainImageCount; i++) { - if (auto createResult = device->createSemaphoreUnique(semaphoreInfo); createResult.result == vk::Result::eSuccess) { - swapImageFreeSemaphore[i] = std::move(createResult.value); - } else { - Helpers::panic("Error creating 'present-ready' semaphore: %s\n", vk::to_string(createResult.result).c_str()); - } - - if (auto createResult = device->createSemaphoreUnique(semaphoreInfo); createResult.result == vk::Result::eSuccess) { - renderFinishedSemaphore[i] = std::move(createResult.value); - } else { - Helpers::panic("Error creating 'post-render' semaphore: %s\n", vk::to_string(createResult.result).c_str()); - } - - if (auto createResult = device->createFenceUnique(fenceInfo); createResult.result == vk::Result::eSuccess) { - frameFinishedFences[i] = std::move(createResult.value); - } else { - Helpers::panic("Error creating 'present-ready' semaphore: %s\n", vk::to_string(createResult.result).c_str()); - } - } - return vk::Result::eSuccess; } @@ -208,133 +166,143 @@ RendererVK::~RendererVK() {} void RendererVK::reset() {} void RendererVK::display() { - // Block, on the CPU, to ensure that this swapchain-frame is ready for more work - if (auto waitResult = device->waitForFences({frameFinishedFences[currentFrame].get()}, true, std::numeric_limits::max()); + // Block, on the CPU, to ensure that this frame-buffering-frame is ready for more work + if (auto waitResult = device->waitForFences({frameFinishedFences[frameBufferingIndex].get()}, true, std::numeric_limits::max()); waitResult != vk::Result::eSuccess) { Helpers::panic("Error waiting on swapchain fence: %s\n", vk::to_string(waitResult).c_str()); } - u32 swapchainImageIndex = std::numeric_limits::max(); - if (const auto acquireResult = - device->acquireNextImageKHR(swapchain.get(), std::numeric_limits::max(), swapImageFreeSemaphore[currentFrame].get(), {}); - acquireResult.result == vk::Result::eSuccess) { - swapchainImageIndex = acquireResult.value; - } else { - switch (acquireResult.result) { - case vk::Result::eSuboptimalKHR: - case vk::Result::eErrorOutOfDateKHR: { - // Surface resized - vk::Extent2D swapchainExtent; - { - int windowWidth, windowHeight; - // Block until we have a valid surface-area to present to - // Usually this is because the window has been minimized - // Todo: We should still be rendering even without a valid swapchain - do { - SDL_Vulkan_GetDrawableSize(targetWindow, &windowWidth, &windowHeight); - } while (!windowWidth || !windowHeight); - swapchainExtent.width = windowWidth; - swapchainExtent.height = windowHeight; + // Get the next available swapchain image, and signal the semaphore when it's ready + static constexpr u32 swapchainImageInvalid = std::numeric_limits::max(); + u32 swapchainImageIndex = swapchainImageInvalid; + if (swapchain) { + if (const auto acquireResult = + device->acquireNextImageKHR(swapchain.get(), std::numeric_limits::max(), swapImageFreeSemaphore[frameBufferingIndex].get(), {}); + acquireResult.result == vk::Result::eSuccess) { + swapchainImageIndex = acquireResult.value; + } else { + switch (acquireResult.result) { + case vk::Result::eSuboptimalKHR: + case vk::Result::eErrorOutOfDateKHR: { + // Surface resized + vk::Extent2D swapchainExtent; + { + int windowWidth, windowHeight; + // Block until we have a valid surface-area to present to + // Usually this is because the window has been minimized + // Todo: We should still be rendering even without a valid swapchain + do { + SDL_Vulkan_GetDrawableSize(targetWindow, &windowWidth, &windowHeight); + } while (!windowWidth || !windowHeight); + swapchainExtent.width = windowWidth; + swapchainExtent.height = windowHeight; + } + recreateSwapchain(surface.get(), swapchainExtent); + break; + } + default: { + Helpers::panic("Error acquiring next swapchain image: %s\n", vk::to_string(acquireResult.result).c_str()); } - recreateSwapchain(surface.get(), swapchainExtent); - break; - } - default: { - Helpers::panic("Error acquiring next swapchain image: %s\n", vk::to_string(acquireResult.result).c_str()); } } } - vk::UniqueCommandBuffer& presentCommandBuffer = presentCommandBuffers.at(currentFrame); + vk::UniqueCommandBuffer& frameCommandBuffer = frameCommandBuffers.at(frameBufferingIndex); vk::CommandBufferBeginInfo beginInfo = {}; beginInfo.flags = vk::CommandBufferUsageFlagBits::eSimultaneousUse; - if (const vk::Result beginResult = presentCommandBuffer->begin(beginInfo); beginResult != vk::Result::eSuccess) { + if (const vk::Result beginResult = frameCommandBuffer->begin(beginInfo); beginResult != vk::Result::eSuccess) { Helpers::panic("Error beginning command buffer recording: %s\n", vk::to_string(beginResult).c_str()); } { - static const std::array presentScopeColor = {{1.0f, 0.0f, 1.0f, 1.0f}}; + static const std::array frameScopeColor = {{1.0f, 0.0f, 1.0f, 1.0f}}; - Vulkan::DebugLabelScope debugScope(presentCommandBuffer.get(), presentScopeColor, "Present"); + Vulkan::DebugLabelScope debugScope(frameCommandBuffer.get(), frameScopeColor, "Frame"); // Prepare for color-clear - presentCommandBuffer->pipelineBarrier( - vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags(), {}, {}, - {vk::ImageMemoryBarrier( - vk::AccessFlagBits::eMemoryRead, vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eUndefined, - vk::ImageLayout::eTransferDstOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, swapchainImages[swapchainImageIndex], - vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) - )} - ); + if (swapchainImageIndex != swapchainImageInvalid) { + frameCommandBuffer->pipelineBarrier( + vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags(), {}, {}, + {vk::ImageMemoryBarrier( + vk::AccessFlagBits::eMemoryRead, vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eUndefined, + vk::ImageLayout::eTransferDstOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, swapchainImages[swapchainImageIndex], + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + )} + ); - presentCommandBuffer->clearColorImage( - swapchainImages[swapchainImageIndex], vk::ImageLayout::eTransferDstOptimal, presentScopeColor, - vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) - ); - - // Prepare for present - presentCommandBuffer->pipelineBarrier( - vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eColorAttachmentOutput, vk::DependencyFlags(), {}, {}, - {vk::ImageMemoryBarrier( - vk::AccessFlagBits::eNone, vk::AccessFlagBits::eColorAttachmentWrite, vk::ImageLayout::eTransferDstOptimal, - vk::ImageLayout::ePresentSrcKHR, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, swapchainImages[swapchainImageIndex], + static const std::array clearColor = {{0.0f, 0.0f, 0.0f, 1.0f}}; + frameCommandBuffer->clearColorImage( + swapchainImages[swapchainImageIndex], vk::ImageLayout::eTransferDstOptimal, clearColor, vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) - )} - ); + ); + + // Prepare for present + frameCommandBuffer->pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eColorAttachmentOutput, vk::DependencyFlags(), {}, {}, + {vk::ImageMemoryBarrier( + vk::AccessFlagBits::eNone, vk::AccessFlagBits::eColorAttachmentWrite, vk::ImageLayout::eTransferDstOptimal, + vk::ImageLayout::ePresentSrcKHR, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, swapchainImages[swapchainImageIndex], + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + )} + ); + } } - if (const vk::Result endResult = presentCommandBuffer->end(); endResult != vk::Result::eSuccess) { + if (const vk::Result endResult = frameCommandBuffer->end(); endResult != vk::Result::eSuccess) { Helpers::panic("Error ending command buffer recording: %s\n", vk::to_string(endResult).c_str()); } vk::SubmitInfo submitInfo = {}; // Wait for any previous uses of the image image to finish presenting - submitInfo.setWaitSemaphores(swapImageFreeSemaphore[currentFrame].get()); + if (swapchainImageIndex != swapchainImageInvalid) { + submitInfo.setWaitSemaphores(swapImageFreeSemaphore[frameBufferingIndex].get()); + static const vk::PipelineStageFlags waitStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput; + submitInfo.setWaitDstStageMask(waitStageMask); + } // Signal when finished - submitInfo.setSignalSemaphores(renderFinishedSemaphore[currentFrame].get()); + submitInfo.setSignalSemaphores(renderFinishedSemaphore[frameBufferingIndex].get()); - static const vk::PipelineStageFlags waitStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput; - submitInfo.setWaitDstStageMask(waitStageMask); + submitInfo.setCommandBuffers(frameCommandBuffer.get()); - submitInfo.setCommandBuffers(presentCommandBuffer.get()); + device->resetFences({frameFinishedFences[frameBufferingIndex].get()}); - device->resetFences({frameFinishedFences[currentFrame].get()}); - - if (const vk::Result submitResult = graphicsQueue.submit({submitInfo}, frameFinishedFences[currentFrame].get()); + if (const vk::Result submitResult = graphicsQueue.submit({submitInfo}, frameFinishedFences[frameBufferingIndex].get()); submitResult != vk::Result::eSuccess) { Helpers::panic("Error submitting to graphics queue: %s\n", vk::to_string(submitResult).c_str()); } - vk::PresentInfoKHR presentInfo = {}; - presentInfo.setWaitSemaphores(renderFinishedSemaphore[currentFrame].get()); - presentInfo.setSwapchains(swapchain.get()); - presentInfo.setImageIndices(swapchainImageIndex); + if (swapchainImageIndex != swapchainImageInvalid) { + vk::PresentInfoKHR presentInfo = {}; + presentInfo.setWaitSemaphores(renderFinishedSemaphore[frameBufferingIndex].get()); + presentInfo.setSwapchains(swapchain.get()); + presentInfo.setImageIndices(swapchainImageIndex); - if (const auto presentResult = presentQueue.presentKHR(presentInfo); presentResult == vk::Result::eSuccess) { - } else { - switch (presentResult) { - case vk::Result::eSuboptimalKHR: - case vk::Result::eErrorOutOfDateKHR: { - // Surface resized - vk::Extent2D swapchainExtent; - { - int windowWidth, windowHeight; - SDL_Vulkan_GetDrawableSize(targetWindow, &windowWidth, &windowHeight); - swapchainExtent.width = windowWidth; - swapchainExtent.height = windowHeight; + if (const auto presentResult = presentQueue.presentKHR(presentInfo); presentResult == vk::Result::eSuccess) { + } else { + switch (presentResult) { + case vk::Result::eSuboptimalKHR: + case vk::Result::eErrorOutOfDateKHR: { + // Surface resized + vk::Extent2D swapchainExtent; + { + int windowWidth, windowHeight; + SDL_Vulkan_GetDrawableSize(targetWindow, &windowWidth, &windowHeight); + swapchainExtent.width = windowWidth; + swapchainExtent.height = windowHeight; + } + recreateSwapchain(surface.get(), swapchainExtent); + break; + } + default: { + Helpers::panic("Error presenting swapchain image: %s\n", vk::to_string(presentResult).c_str()); } - recreateSwapchain(surface.get(), swapchainExtent); - break; - } - default: { - Helpers::panic("Error presenting swapchain image: %s\n", vk::to_string(presentResult).c_str()); } } } - currentFrame = ((currentFrame + 1) % swapchainImageCount); + frameBufferingIndex = ((frameBufferingIndex + 1) % frameBufferingCount); } void RendererVK::initGraphicsContext(SDL_Window* window) { @@ -365,11 +333,11 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { }; // Get any additional extensions that SDL wants as well - { + if (targetWindow) { unsigned int extensionCount = 0; - SDL_Vulkan_GetInstanceExtensions(window, &extensionCount, nullptr); + SDL_Vulkan_GetInstanceExtensions(targetWindow, &extensionCount, nullptr); std::vector sdlInstanceExtensions(extensionCount); - SDL_Vulkan_GetInstanceExtensions(window, &extensionCount, sdlInstanceExtensions.data()); + SDL_Vulkan_GetInstanceExtensions(targetWindow, &extensionCount, sdlInstanceExtensions.data()); instanceExtensions.insert(instanceExtensions.end(), sdlInstanceExtensions.begin(), sdlInstanceExtensions.end()); } @@ -411,10 +379,12 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { } // Create surface - if (VkSurfaceKHR newSurface; SDL_Vulkan_CreateSurface(window, instance.get(), &newSurface)) { - surface.reset(newSurface); - } else { - Helpers::warn("Error creating Vulkan surface"); + if (window) { + if (VkSurfaceKHR newSurface; SDL_Vulkan_CreateSurface(window, instance.get(), &newSurface)) { + surface.reset(newSurface); + } else { + Helpers::warn("Error creating Vulkan surface"); + } } // Pick physical device @@ -423,18 +393,20 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { std::vector::iterator partitionEnd = physicalDevices.end(); // Prefer GPUs that can access the surface - const auto surfaceSupport = [this](const vk::PhysicalDevice& physicalDevice) -> bool { - const usize queueCount = physicalDevice.getQueueFamilyProperties().size(); - for (usize queueIndex = 0; queueIndex < queueCount; ++queueIndex) { - if (auto supportResult = physicalDevice.getSurfaceSupportKHR(queueIndex, surface.get()); - supportResult.result == vk::Result::eSuccess) { - return supportResult.value; + if (surface) { + const auto surfaceSupport = [this](const vk::PhysicalDevice& physicalDevice) -> bool { + const usize queueCount = physicalDevice.getQueueFamilyProperties().size(); + for (usize queueIndex = 0; queueIndex < queueCount; ++queueIndex) { + if (auto supportResult = physicalDevice.getSurfaceSupportKHR(queueIndex, surface.get()); + supportResult.result == vk::Result::eSuccess) { + return supportResult.value; + } } - } - return false; - }; + return false; + }; - partitionEnd = std::stable_partition(physicalDevices.begin(), partitionEnd, surfaceSupport); + partitionEnd = std::stable_partition(physicalDevices.begin(), partitionEnd, surfaceSupport); + } // Prefer Discrete GPUs const auto isDiscrete = [](const vk::PhysicalDevice& physicalDevice) -> bool { @@ -454,26 +426,32 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { std::vector deviceQueueInfos; { const std::vector queueFamilyProperties = physicalDevice.getQueueFamilyProperties(); - + std::unordered_set queueFamilyRequests; // Get present queue family - for (usize queueFamilyIndex = 0; queueFamilyIndex < queueFamilyProperties.size(); ++queueFamilyIndex) { - if (auto supportResult = physicalDevice.getSurfaceSupportKHR(queueFamilyIndex, surface.get()); - supportResult.result == vk::Result::eSuccess) { - if (supportResult.value) { - presentQueueFamily = queueFamilyIndex; - break; + if (surface) { + for (usize queueFamilyIndex = 0; queueFamilyIndex < queueFamilyProperties.size(); ++queueFamilyIndex) { + if (auto supportResult = physicalDevice.getSurfaceSupportKHR(queueFamilyIndex, surface.get()); + supportResult.result == vk::Result::eSuccess) { + if (supportResult.value) { + presentQueueFamily = queueFamilyIndex; + break; + } } } + queueFamilyRequests.emplace(presentQueueFamily); } static const float queuePriority = 1.0f; graphicsQueueFamily = findQueueFamily(queueFamilyProperties, vk::QueueFlagBits::eGraphics); + queueFamilyRequests.emplace(graphicsQueueFamily); computeQueueFamily = findQueueFamily(queueFamilyProperties, vk::QueueFlagBits::eCompute); + queueFamilyRequests.emplace(computeQueueFamily); transferQueueFamily = findQueueFamily(queueFamilyProperties, vk::QueueFlagBits::eTransfer); + queueFamilyRequests.emplace(transferQueueFamily); // Requests a singular queue for each unique queue-family - const std::unordered_set queueFamilyRequests = {presentQueueFamily, graphicsQueueFamily, computeQueueFamily, transferQueueFamily}; + for (const u32 queueFamilyIndex : queueFamilyRequests) { deviceQueueInfos.emplace_back(vk::DeviceQueueCreateInfo({}, queueFamilyIndex, 1, &queuePriority)); } @@ -482,15 +460,31 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { // Create Device vk::DeviceCreateInfo deviceInfo = {}; - static const char* deviceExtensions[] = { - VK_KHR_SWAPCHAIN_EXTENSION_NAME, + // Device extensions + std::vector deviceExtensions = { #if defined(__APPLE__) "VK_KHR_portability_subset", #endif // VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME }; - deviceInfo.ppEnabledExtensionNames = deviceExtensions; - deviceInfo.enabledExtensionCount = std::size(deviceExtensions); + + std::unordered_set physicalDeviceExtensions; + if (const auto enumerateResult = physicalDevice.enumerateDeviceExtensionProperties(); enumerateResult.result == vk::Result::eSuccess) { + for (const auto& extension : enumerateResult.value) { + physicalDeviceExtensions.insert(extension.extensionName); + } + } else { + Helpers::panic("Error enumerating physical devices extensions: %s\n", vk::to_string(enumerateResult.result).c_str()); + } + + // Opertional extensions + + // Optionally enable the swapchain, to support "headless" rendering + if (physicalDeviceExtensions.contains(VK_KHR_SWAPCHAIN_EXTENSION_NAME)) { + deviceExtensions.emplace_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); + } + + deviceInfo.setPEnabledExtensionNames(deviceExtensions); vk::StructureChain deviceFeatureChain = {}; @@ -528,6 +522,39 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { } // Create swapchain + if (targetWindow && surface) { + vk::Extent2D swapchainExtent; + { + int windowWidth, windowHeight; + SDL_Vulkan_GetDrawableSize(window, &windowWidth, &windowHeight); + swapchainExtent.width = windowWidth; + swapchainExtent.height = windowHeight; + } + recreateSwapchain(surface.get(), swapchainExtent); + } + + // Create frame-buffering data + // Frame-buffering Command buffer(s) + vk::CommandBufferAllocateInfo commandBuffersInfo = {}; + commandBuffersInfo.commandPool = commandPool.get(); + commandBuffersInfo.level = vk::CommandBufferLevel::ePrimary; + commandBuffersInfo.commandBufferCount = frameBufferingCount; + + if (auto allocateResult = device->allocateCommandBuffersUnique(commandBuffersInfo); allocateResult.result == vk::Result::eSuccess) { + frameCommandBuffers = std::move(allocateResult.value); + } else { + Helpers::panic("Error allocating command buffer: %s\n", vk::to_string(allocateResult.result).c_str()); + } + + // Frame-buffering synchronization primitives + vk::FenceCreateInfo fenceInfo = {}; + fenceInfo.flags = vk::FenceCreateFlagBits::eSignaled; + + vk::SemaphoreCreateInfo semaphoreInfo = {}; + + swapImageFreeSemaphore.resize(frameBufferingCount); + renderFinishedSemaphore.resize(frameBufferingCount); + frameFinishedFences.resize(frameBufferingCount); vk::Extent2D swapchainExtent; { int windowWidth, windowHeight; From e3699fe8f8ac799e3cf4208224ee8c3e273e4c66 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Tue, 25 Jul 2023 22:00:37 -0700 Subject: [PATCH 03/45] Allocate and present separate top/bottom screen framebuffer images Instead of operating directly on the swapchain images, we have our own top/bottom framebuffer images that will be rendered to independent of having an available swapchain. The images are blitted into the swapchain images, allowing for resizing too! --- CMakeLists.txt | 4 +- include/renderer_vk/renderer_vk.hpp | 5 + include/renderer_vk/vk_memory.hpp | 36 ++++++ src/core/renderer_vk/renderer_vk.cpp | 150 +++++++++++++++++++++-- src/core/renderer_vk/vk_memory.cpp | 174 +++++++++++++++++++++++++++ 5 files changed, 354 insertions(+), 15 deletions(-) create mode 100644 include/renderer_vk/vk_memory.hpp create mode 100644 src/core/renderer_vk/vk_memory.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 14262e65..f915444c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -239,11 +239,11 @@ if(ENABLE_VULKAN) ) set(RENDERER_VK_INCLUDE_FILES include/renderer_vk/renderer_vk.hpp - include/renderer_vk/vulkan_api.hpp include/renderer_vk/vk_debug.hpp + include/renderer_vk/vulkan_api.hpp include/renderer_vk/vk_debug.hpp include/renderer_vk/vk_memory.hpp ) set(RENDERER_VK_SOURCE_FILES src/core/renderer_vk/renderer_vk.cpp - src/core/renderer_vk/vulkan_api.cpp src/core/renderer_vk/vk_debug.cpp + src/core/renderer_vk/vulkan_api.cpp src/core/renderer_vk/vk_debug.cpp src/core/renderer_vk/vk_memory.cpp ) set(HEADER_FILES ${HEADER_FILES} ${RENDERER_VK_INCLUDE_FILES}) diff --git a/include/renderer_vk/renderer_vk.hpp b/include/renderer_vk/renderer_vk.hpp index 6ebbcb7e..9545a4d9 100644 --- a/include/renderer_vk/renderer_vk.hpp +++ b/include/renderer_vk/renderer_vk.hpp @@ -37,6 +37,8 @@ class RendererVK final : public Renderer { // Todo: make this a configuration option static constexpr usize frameBufferingCount = 3; + vk::UniqueDeviceMemory framebufferMemory = {}; + // Frame-buffering data // Each vector is `frameBufferingCount` in size std::vector frameCommandBuffers = {}; @@ -44,6 +46,9 @@ class RendererVK final : public Renderer { std::vector renderFinishedSemaphore = {}; std::vector frameFinishedFences = {}; + std::vector topScreenImages = {}; + std::vector bottomScreenImages = {}; + // Recreate the swapchain, possibly re-using the old one in the case of a resize vk::Result recreateSwapchain(vk::SurfaceKHR surface, vk::Extent2D swapchainExtent); diff --git a/include/renderer_vk/vk_memory.hpp b/include/renderer_vk/vk_memory.hpp new file mode 100644 index 00000000..8bf3f25d --- /dev/null +++ b/include/renderer_vk/vk_memory.hpp @@ -0,0 +1,36 @@ +#pragma once + +#include +#include +#include + +#include "helpers.hpp" +#include "vulkan_api.hpp" + +namespace Vulkan { + + // Will try to find a memory type that is suitable for the given requirements. + // Returns -1 if no suitable memory type was found. + s32 findMemoryTypeIndex( + vk::PhysicalDevice physicalDevice, u32 memoryTypeMask, vk::MemoryPropertyFlags memoryProperties, + vk::MemoryPropertyFlags memoryExcludeProperties = vk::MemoryPropertyFlagBits::eProtected + ); + + // Given an array of valid Vulkan image-handles or buffer-handles, these + // functions will allocate a single block of device-memory for all of them + // and bind them consecutively. + // There may be a case that all the buffers or images cannot be allocated + // to the same device memory due to their required memory-type. + std::tuple commitImageHeap( + vk::Device device, vk::PhysicalDevice physicalDevice, const std::span images, + vk::MemoryPropertyFlags memoryProperties = vk::MemoryPropertyFlagBits::eDeviceLocal, + vk::MemoryPropertyFlags memoryExcludeProperties = vk::MemoryPropertyFlagBits::eProtected + ); + + std::tuple commitBufferHeap( + vk::Device device, vk::PhysicalDevice physicalDevice, const std::span buffers, + vk::MemoryPropertyFlags memoryProperties = vk::MemoryPropertyFlagBits::eDeviceLocal, + vk::MemoryPropertyFlags memoryExcludeProperties = vk::MemoryPropertyFlagBits::eProtected + ); + +} // namespace Vulkan \ No newline at end of file diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index 4b6956d5..310fdec3 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -1,12 +1,14 @@ #include "renderer_vk/renderer_vk.hpp" #include +#include #include #include #include "SDL_vulkan.h" #include "helpers.hpp" #include "renderer_vk/vk_debug.hpp" +#include "renderer_vk/vk_memory.hpp" // Finds the first queue family that satisfies `queueMask` and excludes `queueExcludeMask` bits // Returns -1 if not found @@ -221,15 +223,27 @@ void RendererVK::display() { Vulkan::DebugLabelScope debugScope(frameCommandBuffer.get(), frameScopeColor, "Frame"); - // Prepare for color-clear if (swapchainImageIndex != swapchainImageInvalid) { + // Prepare images for color-clear frameCommandBuffer->pipelineBarrier( vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags(), {}, {}, - {vk::ImageMemoryBarrier( - vk::AccessFlagBits::eMemoryRead, vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eUndefined, - vk::ImageLayout::eTransferDstOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, swapchainImages[swapchainImageIndex], - vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) - )} + { + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eMemoryRead, vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eUndefined, + vk::ImageLayout::eTransferDstOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, swapchainImages[swapchainImageIndex], + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ), + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eMemoryRead, vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eUndefined, + vk::ImageLayout::eTransferDstOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, + topScreenImages[frameBufferingIndex].get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ), + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eMemoryRead, vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eUndefined, + vk::ImageLayout::eTransferDstOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, + bottomScreenImages[frameBufferingIndex].get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ), + } ); static const std::array clearColor = {{0.0f, 0.0f, 0.0f, 1.0f}}; @@ -238,6 +252,56 @@ void RendererVK::display() { vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) ); + //// Simulated rendering work, just clear the screens and get them ready to blit(transfer-src layout) + { + static const std::array topClearColor = {{1.0f, 0.0f, 0.0f, 1.0f}}; + static const std::array bottomClearColor = {{0.0f, 1.0f, 0.0f, 1.0f}}; + frameCommandBuffer->clearColorImage( + topScreenImages[frameBufferingIndex].get(), vk::ImageLayout::eTransferDstOptimal, topClearColor, + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ); + frameCommandBuffer->clearColorImage( + bottomScreenImages[frameBufferingIndex].get(), vk::ImageLayout::eTransferDstOptimal, bottomClearColor, + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ); + frameCommandBuffer->pipelineBarrier( + vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags(), {}, {}, + { + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eTransferRead, vk::ImageLayout::eTransferDstOptimal, + vk::ImageLayout::eTransferSrcOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, + topScreenImages[frameBufferingIndex].get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ), + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eTransferRead, vk::ImageLayout::eTransferDstOptimal, + vk::ImageLayout::eTransferSrcOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, + bottomScreenImages[frameBufferingIndex].get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ), + } + ); + } + + // Blip top/bottom screen onto swapchain image + { + static const vk::ImageBlit topScreenBlit( + vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), {vk::Offset3D{}, vk::Offset3D{400, 240, 1}}, + vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), {vk::Offset3D{}, vk::Offset3D{400, 240, 1}} + ); + static const vk::ImageBlit bottomScreenBlit( + vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), {vk::Offset3D{}, vk::Offset3D{320, 240, 1}}, + vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), + {vk::Offset3D{(400 / 2) - (320 / 2), 240, 0}, vk::Offset3D{(400 / 2) + (320 / 2), 240 + 240, 1}} + ); + frameCommandBuffer->blitImage( + topScreenImages[frameBufferingIndex].get(), vk::ImageLayout::eTransferSrcOptimal, swapchainImages[swapchainImageIndex], + vk::ImageLayout::eTransferDstOptimal, {topScreenBlit}, vk::Filter::eNearest + ); + frameCommandBuffer->blitImage( + bottomScreenImages[frameBufferingIndex].get(), vk::ImageLayout::eTransferSrcOptimal, swapchainImages[swapchainImageIndex], + vk::ImageLayout::eTransferDstOptimal, {bottomScreenBlit}, vk::Filter::eNearest + ); + } + // Prepare for present frameCommandBuffer->pipelineBarrier( vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eColorAttachmentOutput, vk::DependencyFlags(), {}, {}, @@ -555,14 +619,74 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { swapImageFreeSemaphore.resize(frameBufferingCount); renderFinishedSemaphore.resize(frameBufferingCount); frameFinishedFences.resize(frameBufferingCount); - vk::Extent2D swapchainExtent; - { - int windowWidth, windowHeight; - SDL_Vulkan_GetDrawableSize(window, &windowWidth, &windowHeight); - swapchainExtent.width = windowWidth; - swapchainExtent.height = windowHeight; + + vk::ImageCreateInfo topScreenInfo = {}; + topScreenInfo.setImageType(vk::ImageType::e2D); + topScreenInfo.setFormat(vk::Format::eR8G8B8A8Unorm); + topScreenInfo.setExtent(vk::Extent3D(400, 240, 1)); + topScreenInfo.setMipLevels(1); + topScreenInfo.setArrayLayers(2); // Two image layers, for 3D mode + topScreenInfo.setSamples(vk::SampleCountFlagBits::e1); + topScreenInfo.setTiling(vk::ImageTiling::eOptimal); + topScreenInfo.setUsage( + vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eInputAttachment | vk::ImageUsageFlagBits::eTransferSrc | + vk::ImageUsageFlagBits::eTransferDst + ); + topScreenInfo.setSharingMode(vk::SharingMode::eExclusive); + topScreenInfo.setInitialLayout(vk::ImageLayout::eUndefined); + + vk::ImageCreateInfo bottomScreenInfo = topScreenInfo; + bottomScreenInfo.setExtent(vk::Extent3D(320, 240, 1)); + bottomScreenInfo.setArrayLayers(1); + + topScreenImages.resize(frameBufferingCount); + bottomScreenImages.resize(frameBufferingCount); + + for (usize i = 0; i < frameBufferingCount; i++) { + if (auto createResult = device->createSemaphoreUnique(semaphoreInfo); createResult.result == vk::Result::eSuccess) { + swapImageFreeSemaphore[i] = std::move(createResult.value); + } else { + Helpers::panic("Error creating 'present-ready' semaphore: %s\n", vk::to_string(createResult.result).c_str()); + } + + if (auto createResult = device->createSemaphoreUnique(semaphoreInfo); createResult.result == vk::Result::eSuccess) { + renderFinishedSemaphore[i] = std::move(createResult.value); + } else { + Helpers::panic("Error creating 'post-render' semaphore: %s\n", vk::to_string(createResult.result).c_str()); + } + + if (auto createResult = device->createFenceUnique(fenceInfo); createResult.result == vk::Result::eSuccess) { + frameFinishedFences[i] = std::move(createResult.value); + } else { + Helpers::panic("Error creating 'present-ready' semaphore: %s\n", vk::to_string(createResult.result).c_str()); + } + + if (auto createResult = device->createImageUnique(topScreenInfo); createResult.result == vk::Result::eSuccess) { + topScreenImages[i] = std::move(createResult.value); + } else { + Helpers::panic("Error creating top-screen image: %s\n", vk::to_string(createResult.result).c_str()); + } + + if (auto createResult = device->createImageUnique(bottomScreenInfo); createResult.result == vk::Result::eSuccess) { + bottomScreenImages[i] = std::move(createResult.value); + } else { + Helpers::panic("Error creating bottom-screen image: %s\n", vk::to_string(createResult.result).c_str()); + } + } + + // Commit memory to all of our images + { + const auto getImage = [](const vk::UniqueImage& image) -> vk::Image { return image.get(); }; + std::vector images; + std::ranges::transform(topScreenImages, std::back_inserter(images), getImage); + std::ranges::transform(bottomScreenImages, std::back_inserter(images), getImage); + + if (auto [result, imageMemory] = Vulkan::commitImageHeap(device.get(), physicalDevice, images); result == vk::Result::eSuccess) { + framebufferMemory = std::move(imageMemory); + } else { + Helpers::panic("Error allocating framebuffer memory: %s\n", vk::to_string(result).c_str()); + } } - recreateSwapchain(surface.get(), swapchainExtent); } void RendererVK::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) {} diff --git a/src/core/renderer_vk/vk_memory.cpp b/src/core/renderer_vk/vk_memory.cpp new file mode 100644 index 00000000..c9087719 --- /dev/null +++ b/src/core/renderer_vk/vk_memory.cpp @@ -0,0 +1,174 @@ +#include "renderer_vk/vk_memory.hpp" + +namespace Vulkan { + + static constexpr vk::DeviceSize alignUp(vk::DeviceSize value, std::size_t size) { + const vk::DeviceSize mod = static_cast(value % size); + value -= mod; + return static_cast(mod == vk::DeviceSize{0} ? value : value + size); + } + + // Given a speculative heap-allocation, defined by its current size and + // memory-type bits, appends a memory requirements structure to it, updating + // both the size and the required memory-type-bits. Returns the offset within + // the heap for the current MemoryRequirements Todo: Sun Apr 23 13:28:25 PDT + // 2023 Rather than using a running-size of the heap, look at all of the memory + // requests and optimally create a packing for all of the offset and alignment + // requirements. Such as by satisfying all of the largest alignments first, and + // then the smallest, to reduce padding + static vk::DeviceSize commitMemoryRequestToHeap( + const vk::MemoryRequirements& curMemoryRequirements, vk::DeviceSize& curHeapEnd, u32& curMemoryTypeBits, vk::DeviceSize sizeAlignment + ) { + // Accumulate a mask of all the memory types that satisfies each of the + // handles + curMemoryTypeBits &= curMemoryRequirements.memoryTypeBits; + + // Pad up the memory sizes so they are not considered aliasing + const vk::DeviceSize curMemoryOffset = alignUp(curHeapEnd, curMemoryRequirements.alignment); + // Pad the size by the required size-alignment. + // Intended for BufferImageGranularity + const vk::DeviceSize curMemorySize = alignUp(curMemoryRequirements.size, sizeAlignment); + + curHeapEnd = (curMemoryOffset + curMemorySize); + return curMemoryOffset; + } + + s32 findMemoryTypeIndex( + vk::PhysicalDevice physicalDevice, u32 memoryTypeMask, vk::MemoryPropertyFlags memoryProperties, + vk::MemoryPropertyFlags memoryExcludeProperties + ) { + const vk::PhysicalDeviceMemoryProperties deviceMemoryProperties = physicalDevice.getMemoryProperties(); + // Iterate the physical device's memory types until we find a match + for (std::size_t i = 0; i < deviceMemoryProperties.memoryTypeCount; i++) { + if( + // Is within memory type mask + (((memoryTypeMask >> i) & 0b1) == 0b1) && + // Has property flags + (deviceMemoryProperties.memoryTypes[i].propertyFlags + & memoryProperties) + == memoryProperties + && + // None of the excluded properties are enabled + !(deviceMemoryProperties.memoryTypes[i].propertyFlags + & memoryExcludeProperties) ) + { + return static_cast(i); + } + } + + return -1; + } + + std::tuple commitImageHeap( + vk::Device device, vk::PhysicalDevice physicalDevice, const std::span images, vk::MemoryPropertyFlags memoryProperties, + vk::MemoryPropertyFlags memoryExcludeProperties + ) { + vk::MemoryAllocateInfo imageHeapAllocInfo = {}; + u32 imageHeapMemoryTypeBits = 0xFFFFFFFF; + std::vector imageHeapBinds; + + const vk::DeviceSize bufferImageGranularity = physicalDevice.getProperties().limits.bufferImageGranularity; + + for (const vk::Image& curImage : images) { + const vk::DeviceSize curBindOffset = commitMemoryRequestToHeap( + device.getImageMemoryRequirements(curImage), imageHeapAllocInfo.allocationSize, imageHeapMemoryTypeBits, bufferImageGranularity + ); + + if (imageHeapMemoryTypeBits == 0) { + // No possible memory heap for all of the images to share + return std::make_tuple(vk::Result::eErrorOutOfDeviceMemory, vk::UniqueDeviceMemory()); + } + + // Put nullptr for the device memory for now + imageHeapBinds.emplace_back(vk::BindImageMemoryInfo{curImage, nullptr, curBindOffset}); + } + + const s32 memoryTypeIndex = findMemoryTypeIndex(physicalDevice, imageHeapMemoryTypeBits, memoryProperties, memoryExcludeProperties); + + if (memoryTypeIndex < 0) { + // Unable to find a memory heap that satisfies all the images + return std::make_tuple(vk::Result::eErrorOutOfDeviceMemory, vk::UniqueDeviceMemory()); + } + + imageHeapAllocInfo.memoryTypeIndex = memoryTypeIndex; + + vk::UniqueDeviceMemory imageHeapMemory = {}; + + if (auto allocResult = device.allocateMemoryUnique(imageHeapAllocInfo); allocResult.result == vk::Result::eSuccess) { + imageHeapMemory = std::move(allocResult.value); + } else { + return std::make_tuple(allocResult.result, vk::UniqueDeviceMemory()); + } + + // Assign the device memory to the bindings + for (vk::BindImageMemoryInfo& curBind : imageHeapBinds) { + curBind.memory = imageHeapMemory.get(); + } + + // Now bind them all in one call + if (const vk::Result bindResult = device.bindImageMemory2(imageHeapBinds); bindResult == vk::Result::eSuccess) { + // Binding memory succeeded + } else { + return std::make_tuple(bindResult, vk::UniqueDeviceMemory()); + } + + return std::make_tuple(vk::Result::eSuccess, std::move(imageHeapMemory)); + } + + std::tuple commitBufferHeap( + vk::Device device, vk::PhysicalDevice physicalDevice, const std::span buffers, vk::MemoryPropertyFlags memoryProperties, + vk::MemoryPropertyFlags memoryExcludeProperties + ) { + vk::MemoryAllocateInfo bufferHeapAllocInfo = {}; + u32 bufferHeapMemoryTypeBits = 0xFFFFFFFF; + std::vector bufferHeapBinds; + + const vk::DeviceSize bufferImageGranularity = physicalDevice.getProperties().limits.bufferImageGranularity; + + for (const vk::Buffer& curBuffer : buffers) { + const vk::DeviceSize curBindOffset = commitMemoryRequestToHeap( + device.getBufferMemoryRequirements(curBuffer), bufferHeapAllocInfo.allocationSize, bufferHeapMemoryTypeBits, bufferImageGranularity + ); + + if (bufferHeapMemoryTypeBits == 0) { + // No possible memory heap for all of the buffers to share + return std::make_tuple(vk::Result::eErrorOutOfDeviceMemory, vk::UniqueDeviceMemory()); + } + + // Put nullptr for the device memory for now + bufferHeapBinds.emplace_back(vk::BindBufferMemoryInfo{curBuffer, nullptr, curBindOffset}); + } + + const s32 memoryTypeIndex = findMemoryTypeIndex(physicalDevice, bufferHeapMemoryTypeBits, memoryProperties, memoryExcludeProperties); + + if (memoryTypeIndex < 0) { + // Unable to find a memory heap that satisfies all the buffers + return std::make_tuple(vk::Result::eErrorOutOfDeviceMemory, vk::UniqueDeviceMemory()); + } + + bufferHeapAllocInfo.memoryTypeIndex = memoryTypeIndex; + + vk::UniqueDeviceMemory bufferHeapMemory = {}; + + if (auto allocResult = device.allocateMemoryUnique(bufferHeapAllocInfo); allocResult.result == vk::Result::eSuccess) { + bufferHeapMemory = std::move(allocResult.value); + } else { + return std::make_tuple(allocResult.result, vk::UniqueDeviceMemory()); + } + + // Assign the device memory to the bindings + for (vk::BindBufferMemoryInfo& curBind : bufferHeapBinds) { + curBind.memory = bufferHeapMemory.get(); + } + + // Now bind them all in one call + if (const vk::Result bindResult = device.bindBufferMemory2(bufferHeapBinds); bindResult == vk::Result::eSuccess) { + // Binding memory succeeded + } else { + return std::make_tuple(bindResult, vk::UniqueDeviceMemory()); + } + + return std::make_tuple(vk::Result::eSuccess, std::move(bufferHeapMemory)); + } + +} // namespace Vulkan \ No newline at end of file From e87db99a97e703bb4b581bd5b6afb47de59b91fe Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Tue, 25 Jul 2023 22:53:47 -0700 Subject: [PATCH 04/45] Remove ownership of SDL's vulkan-surface This surface is managed by SDL itself, so there is no need to keep it in a Unique handle for us to delete. Fixes the bug where vulkan crashes during shutdown. --- include/renderer_vk/renderer_vk.hpp | 2 +- src/core/renderer_vk/renderer_vk.cpp | 13 ++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/include/renderer_vk/renderer_vk.hpp b/include/renderer_vk/renderer_vk.hpp index 9545a4d9..708e196d 100644 --- a/include/renderer_vk/renderer_vk.hpp +++ b/include/renderer_vk/renderer_vk.hpp @@ -10,7 +10,7 @@ class RendererVK final : public Renderer { vk::UniqueInstance instance = {}; vk::UniqueDebugUtilsMessengerEXT debugMessenger = {}; - vk::UniqueSurfaceKHR surface = {}; + vk::SurfaceKHR surface = {}; vk::PhysicalDevice physicalDevice = {}; diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index 310fdec3..3161bf58 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -199,7 +199,7 @@ void RendererVK::display() { swapchainExtent.width = windowWidth; swapchainExtent.height = windowHeight; } - recreateSwapchain(surface.get(), swapchainExtent); + recreateSwapchain(surface, swapchainExtent); break; } default: { @@ -356,7 +356,7 @@ void RendererVK::display() { swapchainExtent.width = windowWidth; swapchainExtent.height = windowHeight; } - recreateSwapchain(surface.get(), swapchainExtent); + recreateSwapchain(surface, swapchainExtent); break; } default: { @@ -445,7 +445,7 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { // Create surface if (window) { if (VkSurfaceKHR newSurface; SDL_Vulkan_CreateSurface(window, instance.get(), &newSurface)) { - surface.reset(newSurface); + surface = newSurface; } else { Helpers::warn("Error creating Vulkan surface"); } @@ -461,8 +461,7 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { const auto surfaceSupport = [this](const vk::PhysicalDevice& physicalDevice) -> bool { const usize queueCount = physicalDevice.getQueueFamilyProperties().size(); for (usize queueIndex = 0; queueIndex < queueCount; ++queueIndex) { - if (auto supportResult = physicalDevice.getSurfaceSupportKHR(queueIndex, surface.get()); - supportResult.result == vk::Result::eSuccess) { + if (auto supportResult = physicalDevice.getSurfaceSupportKHR(queueIndex, surface); supportResult.result == vk::Result::eSuccess) { return supportResult.value; } } @@ -494,7 +493,7 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { // Get present queue family if (surface) { for (usize queueFamilyIndex = 0; queueFamilyIndex < queueFamilyProperties.size(); ++queueFamilyIndex) { - if (auto supportResult = physicalDevice.getSurfaceSupportKHR(queueFamilyIndex, surface.get()); + if (auto supportResult = physicalDevice.getSurfaceSupportKHR(queueFamilyIndex, surface); supportResult.result == vk::Result::eSuccess) { if (supportResult.value) { presentQueueFamily = queueFamilyIndex; @@ -594,7 +593,7 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { swapchainExtent.width = windowWidth; swapchainExtent.height = windowHeight; } - recreateSwapchain(surface.get(), swapchainExtent); + recreateSwapchain(surface, swapchainExtent); } // Create frame-buffering data From ac1f7bc521fe35f7198c6073a4f29ac7c1c7c9d5 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Thu, 27 Jul 2023 06:54:44 -0700 Subject: [PATCH 05/45] Remove dependency on C++20 ranges AppleClang does not support this unfortunately --- src/core/renderer_vk/renderer_vk.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index 3161bf58..7edeb25c 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -1,7 +1,6 @@ #include "renderer_vk/renderer_vk.hpp" #include -#include #include #include @@ -677,8 +676,8 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { { const auto getImage = [](const vk::UniqueImage& image) -> vk::Image { return image.get(); }; std::vector images; - std::ranges::transform(topScreenImages, std::back_inserter(images), getImage); - std::ranges::transform(bottomScreenImages, std::back_inserter(images), getImage); + std::transform(topScreenImages.begin(), topScreenImages.end(), std::back_inserter(images), getImage); + std::transform(bottomScreenImages.begin(), bottomScreenImages.end(), std::back_inserter(images), getImage); if (auto [result, imageMemory] = Vulkan::commitImageHeap(device.get(), physicalDevice, images); result == vk::Result::eSuccess) { framebufferMemory = std::move(imageMemory); From 4976671ef085ba17130a761c75cdb41ddc86fe2d Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Thu, 27 Jul 2023 06:58:33 -0700 Subject: [PATCH 06/45] Rename `vulkan_api.hpp` to `vk_api.hpp` Consistant with other vulkan header patterns. This header in particular is to configure our Vulkan API usage. --- CMakeLists.txt | 4 ++-- include/renderer_vk/renderer_vk.hpp | 2 +- include/renderer_vk/{vulkan_api.hpp => vk_api.hpp} | 0 include/renderer_vk/vk_debug.hpp | 2 +- include/renderer_vk/vk_memory.hpp | 2 +- src/core/renderer_vk/vk_api.cpp | 3 +++ src/core/renderer_vk/vulkan_api.cpp | 3 --- 7 files changed, 8 insertions(+), 8 deletions(-) rename include/renderer_vk/{vulkan_api.hpp => vk_api.hpp} (100%) create mode 100644 src/core/renderer_vk/vk_api.cpp delete mode 100644 src/core/renderer_vk/vulkan_api.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index f915444c..c49fa564 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -239,11 +239,11 @@ if(ENABLE_VULKAN) ) set(RENDERER_VK_INCLUDE_FILES include/renderer_vk/renderer_vk.hpp - include/renderer_vk/vulkan_api.hpp include/renderer_vk/vk_debug.hpp include/renderer_vk/vk_memory.hpp + include/renderer_vk/vk_api.hpp include/renderer_vk/vk_debug.hpp include/renderer_vk/vk_memory.hpp ) set(RENDERER_VK_SOURCE_FILES src/core/renderer_vk/renderer_vk.cpp - src/core/renderer_vk/vulkan_api.cpp src/core/renderer_vk/vk_debug.cpp src/core/renderer_vk/vk_memory.cpp + src/core/renderer_vk/vk_api.cpp src/core/renderer_vk/vk_debug.cpp src/core/renderer_vk/vk_memory.cpp ) set(HEADER_FILES ${HEADER_FILES} ${RENDERER_VK_INCLUDE_FILES}) diff --git a/include/renderer_vk/renderer_vk.hpp b/include/renderer_vk/renderer_vk.hpp index 708e196d..b20c1789 100644 --- a/include/renderer_vk/renderer_vk.hpp +++ b/include/renderer_vk/renderer_vk.hpp @@ -1,5 +1,5 @@ #include "renderer.hpp" -#include "vulkan_api.hpp" +#include "vk_api.hpp" class GPU; diff --git a/include/renderer_vk/vulkan_api.hpp b/include/renderer_vk/vk_api.hpp similarity index 100% rename from include/renderer_vk/vulkan_api.hpp rename to include/renderer_vk/vk_api.hpp diff --git a/include/renderer_vk/vk_debug.hpp b/include/renderer_vk/vk_debug.hpp index afc367dc..ed712269 100644 --- a/include/renderer_vk/vk_debug.hpp +++ b/include/renderer_vk/vk_debug.hpp @@ -4,7 +4,7 @@ #include #include -#include "vulkan_api.hpp" +#include "vk_api.hpp" namespace Vulkan { diff --git a/include/renderer_vk/vk_memory.hpp b/include/renderer_vk/vk_memory.hpp index 8bf3f25d..a84a5720 100644 --- a/include/renderer_vk/vk_memory.hpp +++ b/include/renderer_vk/vk_memory.hpp @@ -5,7 +5,7 @@ #include #include "helpers.hpp" -#include "vulkan_api.hpp" +#include "vk_api.hpp" namespace Vulkan { diff --git a/src/core/renderer_vk/vk_api.cpp b/src/core/renderer_vk/vk_api.cpp new file mode 100644 index 00000000..4f879dc2 --- /dev/null +++ b/src/core/renderer_vk/vk_api.cpp @@ -0,0 +1,3 @@ +#include "renderer_vk/vk_api.hpp" + +VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE; \ No newline at end of file diff --git a/src/core/renderer_vk/vulkan_api.cpp b/src/core/renderer_vk/vulkan_api.cpp deleted file mode 100644 index c207eea7..00000000 --- a/src/core/renderer_vk/vulkan_api.cpp +++ /dev/null @@ -1,3 +0,0 @@ -#include "renderer_vk/vulkan_api.hpp" - -VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE; \ No newline at end of file From d19b8cf364c8bbc1ec6dfe15effb2c8776ae8550 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Thu, 27 Jul 2023 07:32:10 -0700 Subject: [PATCH 07/45] Separate frame-work from presentation --- include/renderer_vk/renderer_vk.hpp | 2 +- src/core/renderer_vk/renderer_vk.cpp | 202 ++++++++++++++------------- 2 files changed, 103 insertions(+), 101 deletions(-) diff --git a/include/renderer_vk/renderer_vk.hpp b/include/renderer_vk/renderer_vk.hpp index b20c1789..e7dfdf0c 100644 --- a/include/renderer_vk/renderer_vk.hpp +++ b/include/renderer_vk/renderer_vk.hpp @@ -10,7 +10,7 @@ class RendererVK final : public Renderer { vk::UniqueInstance instance = {}; vk::UniqueDebugUtilsMessengerEXT debugMessenger = {}; - vk::SurfaceKHR surface = {}; + vk::SurfaceKHR swapchainSurface = {}; vk::PhysicalDevice physicalDevice = {}; diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index 7edeb25c..2c887cc8 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -167,7 +167,7 @@ RendererVK::~RendererVK() {} void RendererVK::reset() {} void RendererVK::display() { - // Block, on the CPU, to ensure that this frame-buffering-frame is ready for more work + // Block, on the CPU, to ensure that this buffered-frame is ready for more work if (auto waitResult = device->waitForFences({frameFinishedFences[frameBufferingIndex].get()}, true, std::numeric_limits::max()); waitResult != vk::Result::eSuccess) { Helpers::panic("Error waiting on swapchain fence: %s\n", vk::to_string(waitResult).c_str()); @@ -198,7 +198,7 @@ void RendererVK::display() { swapchainExtent.width = windowWidth; swapchainExtent.height = windowHeight; } - recreateSwapchain(surface, swapchainExtent); + recreateSwapchain(swapchainSurface, swapchainExtent); break; } default: { @@ -208,7 +208,7 @@ void RendererVK::display() { } } - vk::UniqueCommandBuffer& frameCommandBuffer = frameCommandBuffers.at(frameBufferingIndex); + const vk::UniqueCommandBuffer& frameCommandBuffer = frameCommandBuffers[frameBufferingIndex]; vk::CommandBufferBeginInfo beginInfo = {}; beginInfo.flags = vk::CommandBufferUsageFlagBits::eSimultaneousUse; @@ -217,100 +217,102 @@ void RendererVK::display() { Helpers::panic("Error beginning command buffer recording: %s\n", vk::to_string(beginResult).c_str()); } + //// Render Frame(Simulated - just clear the images to different colors for now) { static const std::array frameScopeColor = {{1.0f, 0.0f, 1.0f, 1.0f}}; Vulkan::DebugLabelScope debugScope(frameCommandBuffer.get(), frameScopeColor, "Frame"); - if (swapchainImageIndex != swapchainImageInvalid) { - // Prepare images for color-clear - frameCommandBuffer->pipelineBarrier( - vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags(), {}, {}, - { - vk::ImageMemoryBarrier( - vk::AccessFlagBits::eMemoryRead, vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eUndefined, - vk::ImageLayout::eTransferDstOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, swapchainImages[swapchainImageIndex], - vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) - ), - vk::ImageMemoryBarrier( - vk::AccessFlagBits::eMemoryRead, vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eUndefined, - vk::ImageLayout::eTransferDstOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, - topScreenImages[frameBufferingIndex].get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) - ), - vk::ImageMemoryBarrier( - vk::AccessFlagBits::eMemoryRead, vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eUndefined, - vk::ImageLayout::eTransferDstOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, - bottomScreenImages[frameBufferingIndex].get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) - ), - } - ); + // Prepare images for color-clear + frameCommandBuffer->pipelineBarrier( + vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags(), {}, {}, + { + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eMemoryRead, vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eUndefined, + vk::ImageLayout::eTransferDstOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, + topScreenImages[frameBufferingIndex].get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ), + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eMemoryRead, vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eUndefined, + vk::ImageLayout::eTransferDstOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, + bottomScreenImages[frameBufferingIndex].get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ), + } + ); + static const std::array topClearColor = {{1.0f, 0.0f, 0.0f, 1.0f}}; + static const std::array bottomClearColor = {{0.0f, 1.0f, 0.0f, 1.0f}}; + frameCommandBuffer->clearColorImage( + topScreenImages[frameBufferingIndex].get(), vk::ImageLayout::eTransferDstOptimal, topClearColor, + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ); + frameCommandBuffer->clearColorImage( + bottomScreenImages[frameBufferingIndex].get(), vk::ImageLayout::eTransferDstOptimal, bottomClearColor, + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ); + } - static const std::array clearColor = {{0.0f, 0.0f, 0.0f, 1.0f}}; - frameCommandBuffer->clearColorImage( - swapchainImages[swapchainImageIndex], vk::ImageLayout::eTransferDstOptimal, clearColor, + //// Present + if (swapchainImageIndex != swapchainImageInvalid) { + static const std::array presentScopeColor = {{1.0f, 1.0f, 0.0f, 1.0f}}; + Vulkan::DebugLabelScope debugScope(frameCommandBuffer.get(), presentScopeColor, "Present"); + + // Prepare swapchain image for color-clear/blit-dst, prepare top/bottom screen for blit-src + frameCommandBuffer->pipelineBarrier( + vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags(), {}, {}, + { + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eMemoryRead, vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eUndefined, + vk::ImageLayout::eTransferDstOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, swapchainImages[swapchainImageIndex], + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ), + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eTransferRead, vk::ImageLayout::eTransferDstOptimal, + vk::ImageLayout::eTransferSrcOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, + topScreenImages[frameBufferingIndex].get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ), + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eTransferRead, vk::ImageLayout::eTransferDstOptimal, + vk::ImageLayout::eTransferSrcOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, + bottomScreenImages[frameBufferingIndex].get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ), + } + ); + + static const std::array clearColor = {{0.0f, 0.0f, 0.0f, 1.0f}}; + frameCommandBuffer->clearColorImage( + swapchainImages[swapchainImageIndex], vk::ImageLayout::eTransferDstOptimal, clearColor, + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ); + + // Blit top/bottom screen into swapchain image + + static const vk::ImageBlit topScreenBlit( + vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), {vk::Offset3D{}, vk::Offset3D{400, 240, 1}}, + vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), {vk::Offset3D{}, vk::Offset3D{400, 240, 1}} + ); + static const vk::ImageBlit bottomScreenBlit( + vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), {vk::Offset3D{}, vk::Offset3D{320, 240, 1}}, + vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), + {vk::Offset3D{(400 / 2) - (320 / 2), 240, 0}, vk::Offset3D{(400 / 2) + (320 / 2), 240 + 240, 1}} + ); + frameCommandBuffer->blitImage( + topScreenImages[frameBufferingIndex].get(), vk::ImageLayout::eTransferSrcOptimal, swapchainImages[swapchainImageIndex], + vk::ImageLayout::eTransferDstOptimal, {topScreenBlit}, vk::Filter::eNearest + ); + frameCommandBuffer->blitImage( + bottomScreenImages[frameBufferingIndex].get(), vk::ImageLayout::eTransferSrcOptimal, swapchainImages[swapchainImageIndex], + vk::ImageLayout::eTransferDstOptimal, {bottomScreenBlit}, vk::Filter::eNearest + ); + + // Prepare swapchain image for present + frameCommandBuffer->pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eColorAttachmentOutput, vk::DependencyFlags(), {}, {}, + {vk::ImageMemoryBarrier( + vk::AccessFlagBits::eNone, vk::AccessFlagBits::eColorAttachmentWrite, vk::ImageLayout::eTransferDstOptimal, + vk::ImageLayout::ePresentSrcKHR, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, swapchainImages[swapchainImageIndex], vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) - ); - - //// Simulated rendering work, just clear the screens and get them ready to blit(transfer-src layout) - { - static const std::array topClearColor = {{1.0f, 0.0f, 0.0f, 1.0f}}; - static const std::array bottomClearColor = {{0.0f, 1.0f, 0.0f, 1.0f}}; - frameCommandBuffer->clearColorImage( - topScreenImages[frameBufferingIndex].get(), vk::ImageLayout::eTransferDstOptimal, topClearColor, - vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) - ); - frameCommandBuffer->clearColorImage( - bottomScreenImages[frameBufferingIndex].get(), vk::ImageLayout::eTransferDstOptimal, bottomClearColor, - vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) - ); - frameCommandBuffer->pipelineBarrier( - vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags(), {}, {}, - { - vk::ImageMemoryBarrier( - vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eTransferRead, vk::ImageLayout::eTransferDstOptimal, - vk::ImageLayout::eTransferSrcOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, - topScreenImages[frameBufferingIndex].get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) - ), - vk::ImageMemoryBarrier( - vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eTransferRead, vk::ImageLayout::eTransferDstOptimal, - vk::ImageLayout::eTransferSrcOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, - bottomScreenImages[frameBufferingIndex].get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) - ), - } - ); - } - - // Blip top/bottom screen onto swapchain image - { - static const vk::ImageBlit topScreenBlit( - vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), {vk::Offset3D{}, vk::Offset3D{400, 240, 1}}, - vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), {vk::Offset3D{}, vk::Offset3D{400, 240, 1}} - ); - static const vk::ImageBlit bottomScreenBlit( - vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), {vk::Offset3D{}, vk::Offset3D{320, 240, 1}}, - vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), - {vk::Offset3D{(400 / 2) - (320 / 2), 240, 0}, vk::Offset3D{(400 / 2) + (320 / 2), 240 + 240, 1}} - ); - frameCommandBuffer->blitImage( - topScreenImages[frameBufferingIndex].get(), vk::ImageLayout::eTransferSrcOptimal, swapchainImages[swapchainImageIndex], - vk::ImageLayout::eTransferDstOptimal, {topScreenBlit}, vk::Filter::eNearest - ); - frameCommandBuffer->blitImage( - bottomScreenImages[frameBufferingIndex].get(), vk::ImageLayout::eTransferSrcOptimal, swapchainImages[swapchainImageIndex], - vk::ImageLayout::eTransferDstOptimal, {bottomScreenBlit}, vk::Filter::eNearest - ); - } - - // Prepare for present - frameCommandBuffer->pipelineBarrier( - vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eColorAttachmentOutput, vk::DependencyFlags(), {}, {}, - {vk::ImageMemoryBarrier( - vk::AccessFlagBits::eNone, vk::AccessFlagBits::eColorAttachmentWrite, vk::ImageLayout::eTransferDstOptimal, - vk::ImageLayout::ePresentSrcKHR, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, swapchainImages[swapchainImageIndex], - vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) - )} - ); - } + )} + ); } if (const vk::Result endResult = frameCommandBuffer->end(); endResult != vk::Result::eSuccess) { @@ -355,7 +357,7 @@ void RendererVK::display() { swapchainExtent.width = windowWidth; swapchainExtent.height = windowHeight; } - recreateSwapchain(surface, swapchainExtent); + recreateSwapchain(swapchainSurface, swapchainExtent); break; } default: { @@ -444,7 +446,7 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { // Create surface if (window) { if (VkSurfaceKHR newSurface; SDL_Vulkan_CreateSurface(window, instance.get(), &newSurface)) { - surface = newSurface; + swapchainSurface = newSurface; } else { Helpers::warn("Error creating Vulkan surface"); } @@ -456,11 +458,12 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { std::vector::iterator partitionEnd = physicalDevices.end(); // Prefer GPUs that can access the surface - if (surface) { + if (swapchainSurface) { const auto surfaceSupport = [this](const vk::PhysicalDevice& physicalDevice) -> bool { const usize queueCount = physicalDevice.getQueueFamilyProperties().size(); for (usize queueIndex = 0; queueIndex < queueCount; ++queueIndex) { - if (auto supportResult = physicalDevice.getSurfaceSupportKHR(queueIndex, surface); supportResult.result == vk::Result::eSuccess) { + if (auto supportResult = physicalDevice.getSurfaceSupportKHR(queueIndex, swapchainSurface); + supportResult.result == vk::Result::eSuccess) { return supportResult.value; } } @@ -490,9 +493,9 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { const std::vector queueFamilyProperties = physicalDevice.getQueueFamilyProperties(); std::unordered_set queueFamilyRequests; // Get present queue family - if (surface) { + if (swapchainSurface) { for (usize queueFamilyIndex = 0; queueFamilyIndex < queueFamilyProperties.size(); ++queueFamilyIndex) { - if (auto supportResult = physicalDevice.getSurfaceSupportKHR(queueFamilyIndex, surface); + if (auto supportResult = physicalDevice.getSurfaceSupportKHR(queueFamilyIndex, swapchainSurface); supportResult.result == vk::Result::eSuccess) { if (supportResult.value) { presentQueueFamily = queueFamilyIndex; @@ -584,7 +587,7 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { } // Create swapchain - if (targetWindow && surface) { + if (targetWindow && swapchainSurface) { vk::Extent2D swapchainExtent; { int windowWidth, windowHeight; @@ -592,7 +595,7 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { swapchainExtent.width = windowWidth; swapchainExtent.height = windowHeight; } - recreateSwapchain(surface, swapchainExtent); + recreateSwapchain(swapchainSurface, swapchainExtent); } // Create frame-buffering data @@ -623,7 +626,7 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { topScreenInfo.setFormat(vk::Format::eR8G8B8A8Unorm); topScreenInfo.setExtent(vk::Extent3D(400, 240, 1)); topScreenInfo.setMipLevels(1); - topScreenInfo.setArrayLayers(2); // Two image layers, for 3D mode + topScreenInfo.setArrayLayers(1); topScreenInfo.setSamples(vk::SampleCountFlagBits::e1); topScreenInfo.setTiling(vk::ImageTiling::eOptimal); topScreenInfo.setUsage( @@ -635,7 +638,6 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { vk::ImageCreateInfo bottomScreenInfo = topScreenInfo; bottomScreenInfo.setExtent(vk::Extent3D(320, 240, 1)); - bottomScreenInfo.setArrayLayers(1); topScreenImages.resize(frameBufferingCount); bottomScreenImages.resize(frameBufferingCount); From bf8bb5d45913cc85d0b76cef65edf03c0dbece86 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Thu, 27 Jul 2023 10:05:43 -0700 Subject: [PATCH 08/45] Add `vk_pica` translation unit Intended for conversions and utils between PICA definitions and Vulkan. --- CMakeLists.txt | 2 ++ include/renderer_vk/vk_pica.hpp | 12 ++++++++++++ src/core/renderer_vk/vk_pica.cpp | 25 +++++++++++++++++++++++++ 3 files changed, 39 insertions(+) create mode 100644 include/renderer_vk/vk_pica.hpp create mode 100644 src/core/renderer_vk/vk_pica.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index c49fa564..6d15bbe9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -240,10 +240,12 @@ if(ENABLE_VULKAN) set(RENDERER_VK_INCLUDE_FILES include/renderer_vk/renderer_vk.hpp include/renderer_vk/vk_api.hpp include/renderer_vk/vk_debug.hpp include/renderer_vk/vk_memory.hpp + include/renderer_vk/vk_pica.hpp ) set(RENDERER_VK_SOURCE_FILES src/core/renderer_vk/renderer_vk.cpp src/core/renderer_vk/vk_api.cpp src/core/renderer_vk/vk_debug.cpp src/core/renderer_vk/vk_memory.cpp + src/core/renderer_vk/vk_pica.cpp ) set(HEADER_FILES ${HEADER_FILES} ${RENDERER_VK_INCLUDE_FILES}) diff --git a/include/renderer_vk/vk_pica.hpp b/include/renderer_vk/vk_pica.hpp new file mode 100644 index 00000000..affd3aa8 --- /dev/null +++ b/include/renderer_vk/vk_pica.hpp @@ -0,0 +1,12 @@ +#pragma once + +#include "PICA/gpu.hpp" +#include "helpers.hpp" +#include "vk_api.hpp" + +namespace Vulkan { + + vk::Format colorFormatToVulkan(PICA::ColorFmt colorFormat); + vk::Format depthFormatToVulkan(PICA::DepthFmt depthFormat); + +} // namespace Vulkan \ No newline at end of file diff --git a/src/core/renderer_vk/vk_pica.cpp b/src/core/renderer_vk/vk_pica.cpp new file mode 100644 index 00000000..8e72b51c --- /dev/null +++ b/src/core/renderer_vk/vk_pica.cpp @@ -0,0 +1,25 @@ +#include "renderer_vk/vk_pica.hpp" + +namespace Vulkan { + + vk::Format colorFormatToVulkan(PICA::ColorFmt colorFormat) { + switch (colorFormat) { + case PICA::ColorFmt::RGBA8: return vk::Format::eR8G8B8A8Unorm; + case PICA::ColorFmt::RGB8: return vk::Format::eR8G8B8Unorm; + case PICA::ColorFmt::RGBA5551: return vk::Format::eR5G5B5A1UnormPack16; + case PICA::ColorFmt::RGB565: return vk::Format::eR5G6B5UnormPack16; + case PICA::ColorFmt::RGBA4: return vk::Format::eR4G4B4A4UnormPack16; + } + return vk::Format::eUndefined; + } + vk::Format depthFormatToVulkan(PICA::DepthFmt depthFormat) { + switch (depthFormat) { + case PICA::DepthFmt::Depth16: return vk::Format::eD16Unorm; + case PICA::DepthFmt::Unknown1: return vk::Format::eUndefined; + case PICA::DepthFmt::Depth24: return vk::Format::eX8D24UnormPack32; + case PICA::DepthFmt::Depth24Stencil8: return vk::Format::eD24UnormS8Uint; + } + return vk::Format::eUndefined; + } + +} // namespace Vulkan \ No newline at end of file From 37902cd9d6c50a592c1653c94a552294fa5b3716 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Thu, 27 Jul 2023 10:13:53 -0700 Subject: [PATCH 09/45] Implement a renderpass cache. Technically we can generate every possible render-pass up-front based on the possible combinations of ColorFmt and DepthFmt, but we should only allocate what the game asks for. Save that pattern for pipelines. --- include/renderer_vk/renderer_vk.hpp | 7 ++++ src/core/renderer_vk/renderer_vk.cpp | 59 +++++++++++++++++++++++++++- 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/include/renderer_vk/renderer_vk.hpp b/include/renderer_vk/renderer_vk.hpp index e7dfdf0c..700861b2 100644 --- a/include/renderer_vk/renderer_vk.hpp +++ b/include/renderer_vk/renderer_vk.hpp @@ -1,3 +1,6 @@ +#include +#include + #include "renderer.hpp" #include "vk_api.hpp" @@ -49,6 +52,10 @@ class RendererVK final : public Renderer { std::vector topScreenImages = {}; std::vector bottomScreenImages = {}; + std::map renderPassCache; + + vk::RenderPass getRenderPass(PICA::ColorFmt colorFormat, std::optional depthFormat); + // Recreate the swapchain, possibly re-using the old one in the case of a resize vk::Result recreateSwapchain(vk::SurfaceKHR surface, vk::Extent2D swapchainExtent); diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index 2c887cc8..05b40e43 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -8,6 +8,7 @@ #include "helpers.hpp" #include "renderer_vk/vk_debug.hpp" #include "renderer_vk/vk_memory.hpp" +#include "renderer_vk/vk_pica.hpp" // Finds the first queue family that satisfies `queueMask` and excludes `queueExcludeMask` bits // Returns -1 if not found @@ -24,6 +25,57 @@ static s32 findQueueFamily( return -1; } +vk::RenderPass RendererVK::getRenderPass(PICA::ColorFmt colorFormat, std::optional depthFormat) { + u32 renderPassHash = static_cast(colorFormat); + + if (depthFormat.has_value()) { + renderPassHash |= (static_cast(depthFormat.value()) << 8); + } + + // Cache hit + if (renderPassCache.contains(renderPassHash)) { + return renderPassCache.at(renderPassHash).get(); + } + + // Cache miss + vk::RenderPassCreateInfo renderPassInfo = {}; + + std::vector renderPassAttachments = {}; + + vk::AttachmentDescription colorAttachment = {}; + colorAttachment.format = Vulkan::colorFormatToVulkan(colorFormat); + colorAttachment.samples = vk::SampleCountFlagBits::e1; + colorAttachment.loadOp = vk::AttachmentLoadOp::eLoad; + colorAttachment.storeOp = vk::AttachmentStoreOp::eStore; + colorAttachment.stencilLoadOp = vk::AttachmentLoadOp::eLoad; + colorAttachment.stencilStoreOp = vk::AttachmentStoreOp::eStore; + colorAttachment.initialLayout = vk::ImageLayout::eColorAttachmentOptimal; + colorAttachment.finalLayout = vk::ImageLayout::eColorAttachmentOptimal; + renderPassAttachments.emplace_back(colorAttachment); + + if (depthFormat.has_value()) { + vk::AttachmentDescription depthAttachment = {}; + depthAttachment.format = Vulkan::depthFormatToVulkan(depthFormat.value()); + depthAttachment.samples = vk::SampleCountFlagBits::e1; + depthAttachment.loadOp = vk::AttachmentLoadOp::eLoad; + depthAttachment.storeOp = vk::AttachmentStoreOp::eStore; + depthAttachment.stencilLoadOp = vk::AttachmentLoadOp::eLoad; + depthAttachment.stencilStoreOp = vk::AttachmentStoreOp::eStore; + depthAttachment.initialLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + depthAttachment.finalLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + renderPassAttachments.emplace_back(depthAttachment); + } + + renderPassInfo.setAttachments(renderPassAttachments); + + if (auto createResult = device->createRenderPassUnique(renderPassInfo); createResult.result == vk::Result::eSuccess) { + return (renderPassCache[renderPassHash] = std::move(createResult.value)).get(); + } else { + Helpers::panic("Error creating render pass: %s\n", vk::to_string(createResult.result).c_str()); + } + return {}; +} + vk::Result RendererVK::recreateSwapchain(vk::SurfaceKHR surface, vk::Extent2D swapchainExtent) { static constexpr u32 screenTextureWidth = 400; // Top screen is 400 pixels wide, bottom is 320 static constexpr u32 screenTextureHeight = 2 * 240; // Both screens are 240 pixels tall @@ -695,6 +747,11 @@ void RendererVK::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u void RendererVK::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) {} -void RendererVK::drawVertices(PICA::PrimType primType, std::span vertices) {} +void RendererVK::drawVertices(PICA::PrimType primType, std::span vertices) { + const u32 depthControl = regs[PICA::InternalRegs::DepthAndColorMask]; + const bool depthEnable = depthControl & 1; + + const vk::RenderPass curRenderPass = getRenderPass(colourBufferFormat, depthEnable ? std::make_optional(depthBufferFormat) : std::nullopt); +} void RendererVK::screenshot(const std::string& name) {} From bf3917f0743d207de8a82f730b37016ef21bdd83 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Thu, 27 Jul 2023 14:56:49 -0700 Subject: [PATCH 10/45] Add subpass dependencies, optional depth attachment --- src/core/renderer_vk/renderer_vk.cpp | 40 +++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index 05b40e43..ea458314 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -39,6 +39,7 @@ vk::RenderPass RendererVK::getRenderPass(PICA::ColorFmt colorFormat, std::option // Cache miss vk::RenderPassCreateInfo renderPassInfo = {}; + vk::SubpassDescription subPass = {}; std::vector renderPassAttachments = {}; @@ -68,6 +69,32 @@ vk::RenderPass RendererVK::getRenderPass(PICA::ColorFmt colorFormat, std::option renderPassInfo.setAttachments(renderPassAttachments); + static const vk::AttachmentReference colorAttachmentReference = {0, vk::ImageLayout::eColorAttachmentOptimal}; + static const vk::AttachmentReference depthAttachmentReference = {1, vk::ImageLayout::eDepthStencilReadOnlyOptimal}; + + subPass.setColorAttachments(colorAttachmentReference); + if (depthFormat.has_value()) { + subPass.setPDepthStencilAttachment(&depthAttachmentReference); + } + + subPass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; + + renderPassInfo.setSubpasses(subPass); + + // We only have one sub-pass and we want all render-passes to be sequential, + // so input/output depends on VK_SUBPASS_EXTERNAL + static const vk::SubpassDependency subpassDependencies[2] = { + vk::SubpassDependency( + VK_SUBPASS_EXTERNAL, 0, vk::PipelineStageFlagBits::eAllGraphics, vk::PipelineStageFlagBits::eAllGraphics, + vk::AccessFlagBits::eColorAttachmentWrite, vk::AccessFlagBits::eColorAttachmentWrite, vk::DependencyFlagBits::eByRegion + ), + vk::SubpassDependency( + 0, VK_SUBPASS_EXTERNAL, vk::PipelineStageFlagBits::eAllGraphics, vk::PipelineStageFlagBits::eAllGraphics, + vk::AccessFlagBits::eColorAttachmentWrite, vk::AccessFlagBits::eColorAttachmentWrite, vk::DependencyFlagBits::eByRegion + )}; + + renderPassInfo.setDependencies(subpassDependencies); + if (auto createResult = device->createRenderPassUnique(renderPassInfo); createResult.result == vk::Result::eSuccess) { return (renderPassCache[renderPassHash] = std::move(createResult.value)).get(); } else { @@ -216,7 +243,7 @@ RendererVK::RendererVK(GPU& gpu, const std::array& internalRegs, co RendererVK::~RendererVK() {} -void RendererVK::reset() {} +void RendererVK::reset() { renderPassCache.clear(); } void RendererVK::display() { // Block, on the CPU, to ensure that this buffered-frame is ready for more work @@ -748,10 +775,15 @@ void RendererVK::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u void RendererVK::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) {} void RendererVK::drawVertices(PICA::PrimType primType, std::span vertices) { - const u32 depthControl = regs[PICA::InternalRegs::DepthAndColorMask]; - const bool depthEnable = depthControl & 1; + using namespace Helpers; - const vk::RenderPass curRenderPass = getRenderPass(colourBufferFormat, depthEnable ? std::make_optional(depthBufferFormat) : std::nullopt); + const u32 depthControl = regs[PICA::InternalRegs::DepthAndColorMask]; + const bool depthTestEnable = depthControl & 1; + const bool depthWriteEnable = getBit<12>(depthControl); + const int depthFunc = getBits<4, 3>(depthControl); + const vk::ColorComponentFlags colorMask = vk::ColorComponentFlags(getBits<8, 4>(depthControl)); + + const vk::RenderPass curRenderPass = getRenderPass(colourBufferFormat, depthTestEnable ? std::make_optional(depthBufferFormat) : std::nullopt); } void RendererVK::screenshot(const std::string& name) {} From a36ee0025fb8c3dc4347b09f507bc2d4f0cff88c Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Thu, 27 Jul 2023 21:57:43 -0700 Subject: [PATCH 11/45] Disable present queue when present unavailable. --- src/core/renderer_vk/renderer_vk.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index ea458314..61f3186a 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -650,7 +650,9 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { // Initialize device-specific function pointers VULKAN_HPP_DEFAULT_DISPATCHER.init(device.get()); - presentQueue = device->getQueue(presentQueueFamily, 0); + if (presentQueueFamily != VK_QUEUE_FAMILY_IGNORED) { + presentQueue = device->getQueue(presentQueueFamily, 0); + } graphicsQueue = device->getQueue(graphicsQueueFamily, 0); computeQueue = device->getQueue(computeQueueFamily, 0); transferQueue = device->getQueue(transferQueueFamily, 0); From 6dcd09af3e33d612543f0ff38af072f1db34d1a5 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Thu, 27 Jul 2023 22:16:02 -0700 Subject: [PATCH 12/45] Implement basic color/depth render-texture cache --- include/renderer_vk/renderer_vk.hpp | 12 ++ src/core/renderer_vk/renderer_vk.cpp | 159 ++++++++++++++++++++++++++- 2 files changed, 170 insertions(+), 1 deletion(-) diff --git a/include/renderer_vk/renderer_vk.hpp b/include/renderer_vk/renderer_vk.hpp index 700861b2..fa349148 100644 --- a/include/renderer_vk/renderer_vk.hpp +++ b/include/renderer_vk/renderer_vk.hpp @@ -48,6 +48,18 @@ class RendererVK final : public Renderer { std::vector swapImageFreeSemaphore = {}; std::vector renderFinishedSemaphore = {}; std::vector frameFinishedFences = {}; + struct Texture { + vk::UniqueImage image; + vk::UniqueDeviceMemory imageMemory; + vk::UniqueImageView imageView; + }; + std::map textureCache; + + static u32 colorBufferHash(u32 loc, u32 size, PICA::ColorFmt format); + static u32 depthBufferHash(u32 loc, u32 size, PICA::DepthFmt format); + + Texture& getColorRenderTexture(); + Texture& getDepthRenderTexture(); std::vector topScreenImages = {}; std::vector bottomScreenImages = {}; diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index 61f3186a..f255eb0b 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -25,6 +25,124 @@ static s32 findQueueFamily( return -1; } +u32 RendererVK::colorBufferHash(u32 loc, u32 size, PICA::ColorFmt format) { + return std::rotl(loc, 17) ^ std::rotr(size, 23) ^ (static_cast(format) << 60); +} +u32 RendererVK::depthBufferHash(u32 loc, u32 size, PICA::DepthFmt format) { + return std::rotl(loc, 17) ^ std::rotr(size, 29) ^ (static_cast(format) << 60); +} + +RendererVK::Texture& RendererVK::getColorRenderTexture() { + const u32 renderTextureHash = + colorBufferHash(colourBufferLoc, fbSize[0] * fbSize[1] * PICA::sizePerPixel(colourBufferFormat), colourBufferFormat); + + // Cache hit + if (textureCache.contains(renderTextureHash)) { + return textureCache.at(renderTextureHash); + } + + // Cache miss + Texture& newTexture = textureCache[renderTextureHash]; + + vk::ImageCreateInfo textureInfo = {}; + textureInfo.setImageType(vk::ImageType::e2D); + textureInfo.setFormat(Vulkan::colorFormatToVulkan(colourBufferFormat)); + textureInfo.setExtent(vk::Extent3D(fbSize[0], fbSize[1], 1)); + textureInfo.setMipLevels(1); + textureInfo.setArrayLayers(1); + textureInfo.setSamples(vk::SampleCountFlagBits::e1); + textureInfo.setTiling(vk::ImageTiling::eOptimal); + textureInfo.setUsage( + vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eInputAttachment | vk::ImageUsageFlagBits::eTransferSrc | + vk::ImageUsageFlagBits::eTransferDst + ); + textureInfo.setSharingMode(vk::SharingMode::eExclusive); + textureInfo.setInitialLayout(vk::ImageLayout::eUndefined); + + if (auto createResult = device->createImageUnique(textureInfo); createResult.result == vk::Result::eSuccess) { + newTexture.image = std::move(createResult.value); + } else { + Helpers::panic("Error creating color render-texture image: %s\n", vk::to_string(createResult.result).c_str()); + } + + vk::ImageViewCreateInfo viewInfo = {}; + viewInfo.image = newTexture.image.get(); + viewInfo.viewType = vk::ImageViewType::e2D; + viewInfo.format = Vulkan::colorFormatToVulkan(colourBufferFormat); + viewInfo.components = vk::ComponentMapping(); + viewInfo.subresourceRange = vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1); + + if (auto [result, imageMemory] = Vulkan::commitImageHeap(device.get(), physicalDevice, {&newTexture.image.get(), 1}); + result == vk::Result::eSuccess) { + newTexture.imageMemory = std::move(imageMemory); + } else { + Helpers::panic("Error allocating color render-texture memory: %s\n", vk::to_string(result).c_str()); + } + + if (auto createResult = device->createImageViewUnique(viewInfo); createResult.result == vk::Result::eSuccess) { + newTexture.imageView = std::move(createResult.value); + } else { + Helpers::panic("Error creating color render-texture: %s\n", vk::to_string(createResult.result).c_str()); + } + + return newTexture; +} + +RendererVK::Texture& RendererVK::getDepthRenderTexture() { + const u32 renderTextureHash = depthBufferHash(depthBufferLoc, fbSize[0] * fbSize[1] * PICA::sizePerPixel(depthBufferFormat), depthBufferFormat); + + // Cache hit + if (textureCache.contains(renderTextureHash)) { + return textureCache.at(renderTextureHash); + } + + // Cache miss + Texture& newTexture = textureCache[renderTextureHash]; + + vk::ImageCreateInfo textureInfo = {}; + textureInfo.setImageType(vk::ImageType::e2D); + textureInfo.setFormat(Vulkan::depthFormatToVulkan(depthBufferFormat)); + textureInfo.setExtent(vk::Extent3D(fbSize[0], fbSize[1], 1)); + textureInfo.setMipLevels(1); + textureInfo.setArrayLayers(1); + textureInfo.setSamples(vk::SampleCountFlagBits::e1); + textureInfo.setTiling(vk::ImageTiling::eOptimal); + textureInfo.setUsage( + vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eInputAttachment | vk::ImageUsageFlagBits::eTransferSrc | + vk::ImageUsageFlagBits::eTransferDst + ); + textureInfo.setSharingMode(vk::SharingMode::eExclusive); + textureInfo.setInitialLayout(vk::ImageLayout::eUndefined); + + if (auto createResult = device->createImageUnique(textureInfo); createResult.result == vk::Result::eSuccess) { + newTexture.image = std::move(createResult.value); + } else { + Helpers::panic("Error creating depth render-texture image: %s\n", vk::to_string(createResult.result).c_str()); + } + + vk::ImageViewCreateInfo viewInfo = {}; + viewInfo.image = newTexture.image.get(); + viewInfo.viewType = vk::ImageViewType::e2D; + viewInfo.format = Vulkan::depthFormatToVulkan(depthBufferFormat); + viewInfo.components = vk::ComponentMapping(); + viewInfo.subresourceRange = vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eDepth, 0, 1, 0, 1); + + if (auto createResult = device->createImageViewUnique(viewInfo); createResult.result == vk::Result::eSuccess) { + newTexture.imageView = std::move(createResult.value); + } else { + Helpers::panic("Error creating depth render-texture: %s\n", vk::to_string(createResult.result).c_str()); + } + + if (auto [result, imageMemory] = Vulkan::commitImageHeap(device.get(), physicalDevice, {&newTexture.image.get(), 1}); + result == vk::Result::eSuccess) { + newTexture.imageMemory = std::move(imageMemory); + } else { + Helpers::panic("Error allocating depth render-texture memory: %s\n", vk::to_string(result).c_str()); + } + + return newTexture; +} + vk::RenderPass RendererVK::getRenderPass(PICA::ColorFmt colorFormat, std::optional depthFormat) { u32 renderPassHash = static_cast(colorFormat); @@ -739,7 +857,7 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { if (auto createResult = device->createFenceUnique(fenceInfo); createResult.result == vk::Result::eSuccess) { frameFinishedFences[i] = std::move(createResult.value); } else { - Helpers::panic("Error creating 'present-ready' semaphore: %s\n", vk::to_string(createResult.result).c_str()); + Helpers::panic("Error creating 'frame-finished' fence: %s\n", vk::to_string(createResult.result).c_str()); } if (auto createResult = device->createImageUnique(topScreenInfo); createResult.result == vk::Result::eSuccess) { @@ -786,6 +904,45 @@ void RendererVK::drawVertices(PICA::PrimType primType, std::span(depthControl)); const vk::RenderPass curRenderPass = getRenderPass(colourBufferFormat, depthTestEnable ? std::make_optional(depthBufferFormat) : std::nullopt); + + // Create framebuffer, find a way to cache this! + vk::Framebuffer curFramebuffer = {}; + { + std::vector renderTargets; + + const auto& colorTexture = getColorRenderTexture(); + renderTargets.emplace_back(colorTexture.imageView.get()); + + if (depthTestEnable) { + const auto& depthTexture = getDepthRenderTexture(); + renderTargets.emplace_back(depthTexture.imageView.get()); + } + + vk::FramebufferCreateInfo framebufferInfo = {}; + framebufferInfo.setRenderPass(curRenderPass); + framebufferInfo.setAttachments(renderTargets); + framebufferInfo.setWidth(fbSize[0]); + framebufferInfo.setHeight(fbSize[1]); + framebufferInfo.setLayers(1); + if (auto createResult = device->createFramebufferUnique(framebufferInfo); createResult.result == vk::Result::eSuccess) { + curFramebuffer = (frameFramebuffers[frameBufferingIndex].emplace_back(std::move(createResult.value))).get(); + } else { + Helpers::panic("Error creating render-texture framebuffer: %s\n", vk::to_string(createResult.result).c_str()); + } + } + + vk::RenderPassBeginInfo renderBeginInfo = {}; + renderBeginInfo.renderPass = curRenderPass; + static const vk::ClearValue ClearColors[] = { + vk::ClearColorValue(std::array{0.0f, 0.0f, 0.0f, 0.0f}), + vk::ClearDepthStencilValue(1.0f, 0), + vk::ClearColorValue(std::array{0.0f, 0.0f, 0.0f, 0.0f}), + }; + renderBeginInfo.pClearValues = ClearColors; + renderBeginInfo.clearValueCount = std::size(ClearColors); + renderBeginInfo.renderArea.extent.width = fbSize[0]; + renderBeginInfo.renderArea.extent.height = fbSize[1]; + renderBeginInfo.framebuffer = curFramebuffer; } void RendererVK::screenshot(const std::string& name) {} From 27268f86d35848bc69d3a699f646388832457a3b Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Fri, 11 Aug 2023 18:40:47 -0700 Subject: [PATCH 13/45] Fix instance-extension iteration Add the extensions when they are available rather than statically including them. --- src/core/renderer_vk/renderer_vk.cpp | 34 +++++++++++++++++++--------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index f255eb0b..ef64bd5c 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -587,12 +587,30 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { instanceInfo.pApplicationInfo = &applicationInfo; - std::vector instanceExtensions = { + std::unordered_set instanceExtensionsAvailable = {}; + if (const auto enumerateResult = vk::enumerateInstanceExtensionProperties(); enumerateResult.result == vk::Result::eSuccess) { + for (const auto& curExtension : enumerateResult.value) { + instanceExtensionsAvailable.emplace(curExtension.extensionName); + } + } + + std::vector instanceExtensions = {}; + + if (instanceExtensionsAvailable.contains(VK_KHR_SURFACE_EXTENSION_NAME)) { + instanceExtensions.emplace_back(VK_KHR_SURFACE_EXTENSION_NAME); + } + + bool debugUtils = false; + if (instanceExtensionsAvailable.contains(VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) { + instanceExtensions.emplace_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); + debugUtils = true; + } + #if defined(__APPLE__) - VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME, + if (instanceExtensionsAvailable.contains(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME)) { + instanceExtensionNames.emplace_back(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME); + } #endif - VK_EXT_DEBUG_UTILS_EXTENSION_NAME, - }; // Get any additional extensions that SDL wants as well if (targetWindow) { @@ -620,13 +638,7 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { VULKAN_HPP_DEFAULT_DISPATCHER.init(instance.get()); // Enable debug messenger if the instance was able to be created with debug_utils - if (std::find( - instanceExtensions.begin(), instanceExtensions.end(), - // std::string_view has a way to compare itself to `const char*` - // so by casting it, we get the actual string comparisons - // and not pointer-comparisons - std::string_view(VK_EXT_DEBUG_UTILS_EXTENSION_NAME) - ) != instanceExtensions.end()) { + if (debugUtils) { vk::DebugUtilsMessengerCreateInfoEXT debugCreateInfo{}; debugCreateInfo.messageSeverity = vk::DebugUtilsMessageSeverityFlagBitsEXT::eVerbose | vk::DebugUtilsMessageSeverityFlagBitsEXT::eInfo | vk::DebugUtilsMessageSeverityFlagBitsEXT::eError | vk::DebugUtilsMessageSeverityFlagBitsEXT::eWarning; From 50029e13333bddfd49b14cd0cac0946c5381da15 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Fri, 11 Aug 2023 23:07:12 -0700 Subject: [PATCH 14/45] Avoid usage of D24-S8 format This support is not supported on `radv`. Vulkan mandates D16 only and a combination of Depth and Depth-Stencil types only. --- src/core/renderer_vk/vk_pica.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/core/renderer_vk/vk_pica.cpp b/src/core/renderer_vk/vk_pica.cpp index 8e72b51c..33aca623 100644 --- a/src/core/renderer_vk/vk_pica.cpp +++ b/src/core/renderer_vk/vk_pica.cpp @@ -14,10 +14,20 @@ namespace Vulkan { } vk::Format depthFormatToVulkan(PICA::DepthFmt depthFormat) { switch (depthFormat) { + // VK_FORMAT_D16_UNORM is mandated by the vulkan specification case PICA::DepthFmt::Depth16: return vk::Format::eD16Unorm; case PICA::DepthFmt::Unknown1: return vk::Format::eUndefined; - case PICA::DepthFmt::Depth24: return vk::Format::eX8D24UnormPack32; - case PICA::DepthFmt::Depth24Stencil8: return vk::Format::eD24UnormS8Uint; + // The GPU may _not_ support these formats natively + // Only one of: + // VK_FORMAT_X8_D24_UNORM_PACK32 and VK_FORMAT_D32_SFLOAT + // and one of: + // VK_FORMAT_D24_UNORM_S8_UINT and VK_FORMAT_D32_SFLOAT_S8_UINT + // will be supported + // TODO: Detect this! + // case PICA::DepthFmt::Depth24: return vk::Format::eX8D24UnormPack32; + // case PICA::DepthFmt::Depth24Stencil8: return vk::Format::eD24UnormS8Uint; + case PICA::DepthFmt::Depth24: return vk::Format::eD32Sfloat; + case PICA::DepthFmt::Depth24Stencil8: return vk::Format::eD32SfloatS8Uint; } return vk::Format::eUndefined; } From c778c34433f03c0f2d283ec766f4a32f9e0c5eed Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Sat, 12 Aug 2023 18:23:29 -0700 Subject: [PATCH 15/45] Separate present/graphics workloads Separate the "Display" texture from the cache of framebuffer textures, move present/graphics into separate command buffers. --- include/renderer_vk/renderer_vk.hpp | 13 +- src/core/renderer_vk/renderer_vk.cpp | 253 +++++++++++++++++++-------- 2 files changed, 192 insertions(+), 74 deletions(-) diff --git a/include/renderer_vk/renderer_vk.hpp b/include/renderer_vk/renderer_vk.hpp index fa349148..b9ada4a9 100644 --- a/include/renderer_vk/renderer_vk.hpp +++ b/include/renderer_vk/renderer_vk.hpp @@ -44,15 +44,21 @@ class RendererVK final : public Renderer { // Frame-buffering data // Each vector is `frameBufferingCount` in size - std::vector frameCommandBuffers = {}; + std::vector framePresentCommandBuffers = {}; std::vector swapImageFreeSemaphore = {}; std::vector renderFinishedSemaphore = {}; std::vector frameFinishedFences = {}; + std::vector> frameFramebuffers = {}; + std::vector frameGraphicsCommandBuffers = {}; + + // Todo: + // Use `{colourBuffer,depthBuffer}Loc` to maintain an std::map-cache of framebuffers struct Texture { vk::UniqueImage image; vk::UniqueDeviceMemory imageMemory; vk::UniqueImageView imageView; }; + // Hash(loc, size, format) -> Texture std::map textureCache; static u32 colorBufferHash(u32 loc, u32 size, PICA::ColorFmt format); @@ -61,8 +67,9 @@ class RendererVK final : public Renderer { Texture& getColorRenderTexture(); Texture& getDepthRenderTexture(); - std::vector topScreenImages = {}; - std::vector bottomScreenImages = {}; + // Use `lower_bound` to find nearest texture for an address + // Blit them during `display()` + std::vector screenTexture = {}; std::map renderPassCache; diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index ef64bd5c..246ac6b4 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -108,7 +108,7 @@ RendererVK::Texture& RendererVK::getDepthRenderTexture() { textureInfo.setSamples(vk::SampleCountFlagBits::e1); textureInfo.setTiling(vk::ImageTiling::eOptimal); textureInfo.setUsage( - vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eInputAttachment | vk::ImageUsageFlagBits::eTransferSrc | + vk::ImageUsageFlagBits::eDepthStencilAttachment | vk::ImageUsageFlagBits::eInputAttachment | vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst ); textureInfo.setSharingMode(vk::SharingMode::eExclusive); @@ -125,7 +125,7 @@ RendererVK::Texture& RendererVK::getDepthRenderTexture() { viewInfo.viewType = vk::ImageViewType::e2D; viewInfo.format = Vulkan::depthFormatToVulkan(depthBufferFormat); viewInfo.components = vk::ComponentMapping(); - viewInfo.subresourceRange = vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eDepth, 0, 1, 0, 1); + viewInfo.subresourceRange = vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil, 0, 1, 0, 1); if (auto createResult = device->createImageViewUnique(viewInfo); createResult.result == vk::Result::eSuccess) { newTexture.imageView = std::move(createResult.value); @@ -364,12 +364,6 @@ RendererVK::~RendererVK() {} void RendererVK::reset() { renderPassCache.clear(); } void RendererVK::display() { - // Block, on the CPU, to ensure that this buffered-frame is ready for more work - if (auto waitResult = device->waitForFences({frameFinishedFences[frameBufferingIndex].get()}, true, std::numeric_limits::max()); - waitResult != vk::Result::eSuccess) { - Helpers::panic("Error waiting on swapchain fence: %s\n", vk::to_string(waitResult).c_str()); - } - // Get the next available swapchain image, and signal the semaphore when it's ready static constexpr u32 swapchainImageInvalid = std::numeric_limits::max(); u32 swapchainImageIndex = swapchainImageInvalid; @@ -405,7 +399,11 @@ void RendererVK::display() { } } - const vk::UniqueCommandBuffer& frameCommandBuffer = frameCommandBuffers[frameBufferingIndex]; + if (const vk::Result endResult = frameGraphicsCommandBuffers[frameBufferingIndex]->end(); endResult != vk::Result::eSuccess) { + Helpers::panic("Error ending command buffer recording: %s\n", vk::to_string(endResult).c_str()); + } + + const vk::UniqueCommandBuffer& frameCommandBuffer = framePresentCommandBuffers[frameBufferingIndex]; vk::CommandBufferBeginInfo beginInfo = {}; beginInfo.flags = vk::CommandBufferUsageFlagBits::eSimultaneousUse; @@ -426,24 +424,15 @@ void RendererVK::display() { { vk::ImageMemoryBarrier( vk::AccessFlagBits::eMemoryRead, vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eUndefined, - vk::ImageLayout::eTransferDstOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, - topScreenImages[frameBufferingIndex].get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) - ), - vk::ImageMemoryBarrier( - vk::AccessFlagBits::eMemoryRead, vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eUndefined, - vk::ImageLayout::eTransferDstOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, - bottomScreenImages[frameBufferingIndex].get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + vk::ImageLayout::eTransferDstOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, screenTexture[frameBufferingIndex].get(), + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) ), } ); static const std::array topClearColor = {{1.0f, 0.0f, 0.0f, 1.0f}}; static const std::array bottomClearColor = {{0.0f, 1.0f, 0.0f, 1.0f}}; frameCommandBuffer->clearColorImage( - topScreenImages[frameBufferingIndex].get(), vk::ImageLayout::eTransferDstOptimal, topClearColor, - vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) - ); - frameCommandBuffer->clearColorImage( - bottomScreenImages[frameBufferingIndex].get(), vk::ImageLayout::eTransferDstOptimal, bottomClearColor, + screenTexture[frameBufferingIndex].get(), vk::ImageLayout::eTransferDstOptimal, topClearColor, vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) ); } @@ -464,13 +453,8 @@ void RendererVK::display() { ), vk::ImageMemoryBarrier( vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eTransferRead, vk::ImageLayout::eTransferDstOptimal, - vk::ImageLayout::eTransferSrcOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, - topScreenImages[frameBufferingIndex].get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) - ), - vk::ImageMemoryBarrier( - vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eTransferRead, vk::ImageLayout::eTransferDstOptimal, - vk::ImageLayout::eTransferSrcOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, - bottomScreenImages[frameBufferingIndex].get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + vk::ImageLayout::eTransferSrcOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, screenTexture[frameBufferingIndex].get(), + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) ), } ); @@ -483,22 +467,13 @@ void RendererVK::display() { // Blit top/bottom screen into swapchain image - static const vk::ImageBlit topScreenBlit( - vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), {vk::Offset3D{}, vk::Offset3D{400, 240, 1}}, - vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), {vk::Offset3D{}, vk::Offset3D{400, 240, 1}} - ); - static const vk::ImageBlit bottomScreenBlit( - vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), {vk::Offset3D{}, vk::Offset3D{320, 240, 1}}, - vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), - {vk::Offset3D{(400 / 2) - (320 / 2), 240, 0}, vk::Offset3D{(400 / 2) + (320 / 2), 240 + 240, 1}} + static const vk::ImageBlit screenBlit( + vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), {vk::Offset3D{}, vk::Offset3D{400, 240 * 2, 1}}, + vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), {vk::Offset3D{}, vk::Offset3D{400, 240 * 2, 1}} ); frameCommandBuffer->blitImage( - topScreenImages[frameBufferingIndex].get(), vk::ImageLayout::eTransferSrcOptimal, swapchainImages[swapchainImageIndex], - vk::ImageLayout::eTransferDstOptimal, {topScreenBlit}, vk::Filter::eNearest - ); - frameCommandBuffer->blitImage( - bottomScreenImages[frameBufferingIndex].get(), vk::ImageLayout::eTransferSrcOptimal, swapchainImages[swapchainImageIndex], - vk::ImageLayout::eTransferDstOptimal, {bottomScreenBlit}, vk::Filter::eNearest + screenTexture[frameBufferingIndex].get(), vk::ImageLayout::eTransferSrcOptimal, swapchainImages[swapchainImageIndex], + vk::ImageLayout::eTransferDstOptimal, {screenBlit}, vk::Filter::eNearest ); // Prepare swapchain image for present @@ -565,6 +540,27 @@ void RendererVK::display() { } frameBufferingIndex = ((frameBufferingIndex + 1) % frameBufferingCount); + + // Wait for next frame to be ready + + // Block, on the CPU, to ensure that this buffered-frame is ready for more work + if (auto waitResult = device->waitForFences({frameFinishedFences[frameBufferingIndex].get()}, true, std::numeric_limits::max()); + waitResult != vk::Result::eSuccess) { + Helpers::panic("Error waiting on swapchain fence: %s\n", vk::to_string(waitResult).c_str()); + } + + { + frameFramebuffers[frameBufferingIndex].clear(); + + frameGraphicsCommandBuffers[frameBufferingIndex]->reset(); + + vk::CommandBufferBeginInfo beginInfo = {}; + beginInfo.flags = vk::CommandBufferUsageFlagBits::eSimultaneousUse; + + if (const vk::Result beginResult = frameGraphicsCommandBuffers[frameBufferingIndex]->begin(beginInfo); beginResult != vk::Result::eSuccess) { + Helpers::panic("Error beginning command buffer recording: %s\n", vk::to_string(beginResult).c_str()); + } + } } void RendererVK::initGraphicsContext(SDL_Window* window) { @@ -608,7 +604,7 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { #if defined(__APPLE__) if (instanceExtensionsAvailable.contains(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME)) { - instanceExtensionNames.emplace_back(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME); + instanceExtensions.emplace_back(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME); } #endif @@ -817,11 +813,25 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { commandBuffersInfo.commandBufferCount = frameBufferingCount; if (auto allocateResult = device->allocateCommandBuffersUnique(commandBuffersInfo); allocateResult.result == vk::Result::eSuccess) { - frameCommandBuffers = std::move(allocateResult.value); + framePresentCommandBuffers = std::move(allocateResult.value); } else { Helpers::panic("Error allocating command buffer: %s\n", vk::to_string(allocateResult.result).c_str()); } + if (auto allocateResult = device->allocateCommandBuffersUnique(commandBuffersInfo); allocateResult.result == vk::Result::eSuccess) { + frameGraphicsCommandBuffers = std::move(allocateResult.value); + } else { + Helpers::panic("Error allocating command buffer: %s\n", vk::to_string(allocateResult.result).c_str()); + } + + vk::CommandBufferBeginInfo beginInfo = {}; + beginInfo.flags = vk::CommandBufferUsageFlagBits::eSimultaneousUse; + for (const auto& graphicsCommandBuffer : frameGraphicsCommandBuffers) { + if (const vk::Result beginResult = graphicsCommandBuffer->begin(beginInfo); beginResult != vk::Result::eSuccess) { + Helpers::panic("Error beginning command buffer recording: %s\n", vk::to_string(beginResult).c_str()); + } + } + // Frame-buffering synchronization primitives vk::FenceCreateInfo fenceInfo = {}; fenceInfo.flags = vk::FenceCreateFlagBits::eSignaled; @@ -831,27 +841,25 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { swapImageFreeSemaphore.resize(frameBufferingCount); renderFinishedSemaphore.resize(frameBufferingCount); frameFinishedFences.resize(frameBufferingCount); + frameFramebuffers.resize(frameBufferingCount); + frameGraphicsCommandBuffers.resize(frameBufferingCount); - vk::ImageCreateInfo topScreenInfo = {}; - topScreenInfo.setImageType(vk::ImageType::e2D); - topScreenInfo.setFormat(vk::Format::eR8G8B8A8Unorm); - topScreenInfo.setExtent(vk::Extent3D(400, 240, 1)); - topScreenInfo.setMipLevels(1); - topScreenInfo.setArrayLayers(1); - topScreenInfo.setSamples(vk::SampleCountFlagBits::e1); - topScreenInfo.setTiling(vk::ImageTiling::eOptimal); - topScreenInfo.setUsage( + vk::ImageCreateInfo screenTextureInfo = {}; + screenTextureInfo.setImageType(vk::ImageType::e2D); + screenTextureInfo.setFormat(vk::Format::eR8G8B8A8Unorm); + screenTextureInfo.setExtent(vk::Extent3D(400, 240, 1)); + screenTextureInfo.setMipLevels(1); + screenTextureInfo.setArrayLayers(1); + screenTextureInfo.setSamples(vk::SampleCountFlagBits::e1); + screenTextureInfo.setTiling(vk::ImageTiling::eOptimal); + screenTextureInfo.setUsage( vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eInputAttachment | vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst ); - topScreenInfo.setSharingMode(vk::SharingMode::eExclusive); - topScreenInfo.setInitialLayout(vk::ImageLayout::eUndefined); + screenTextureInfo.setSharingMode(vk::SharingMode::eExclusive); + screenTextureInfo.setInitialLayout(vk::ImageLayout::eUndefined); - vk::ImageCreateInfo bottomScreenInfo = topScreenInfo; - bottomScreenInfo.setExtent(vk::Extent3D(320, 240, 1)); - - topScreenImages.resize(frameBufferingCount); - bottomScreenImages.resize(frameBufferingCount); + screenTexture.resize(frameBufferingCount); for (usize i = 0; i < frameBufferingCount; i++) { if (auto createResult = device->createSemaphoreUnique(semaphoreInfo); createResult.result == vk::Result::eSuccess) { @@ -872,25 +880,18 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { Helpers::panic("Error creating 'frame-finished' fence: %s\n", vk::to_string(createResult.result).c_str()); } - if (auto createResult = device->createImageUnique(topScreenInfo); createResult.result == vk::Result::eSuccess) { - topScreenImages[i] = std::move(createResult.value); + if (auto createResult = device->createImageUnique(screenTextureInfo); createResult.result == vk::Result::eSuccess) { + screenTexture[i] = std::move(createResult.value); } else { Helpers::panic("Error creating top-screen image: %s\n", vk::to_string(createResult.result).c_str()); } - - if (auto createResult = device->createImageUnique(bottomScreenInfo); createResult.result == vk::Result::eSuccess) { - bottomScreenImages[i] = std::move(createResult.value); - } else { - Helpers::panic("Error creating bottom-screen image: %s\n", vk::to_string(createResult.result).c_str()); - } } // Commit memory to all of our images { const auto getImage = [](const vk::UniqueImage& image) -> vk::Image { return image.get(); }; std::vector images; - std::transform(topScreenImages.begin(), topScreenImages.end(), std::back_inserter(images), getImage); - std::transform(bottomScreenImages.begin(), bottomScreenImages.end(), std::back_inserter(images), getImage); + std::transform(screenTexture.begin(), screenTexture.end(), std::back_inserter(images), getImage); if (auto [result, imageMemory] = Vulkan::commitImageHeap(device.get(), physicalDevice, images); result == vk::Result::eSuccess) { framebufferMemory = std::move(imageMemory); @@ -902,7 +903,112 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { void RendererVK::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) {} -void RendererVK::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) {} +void RendererVK::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) { + return; + if (fbSize[0] == 0 || fbSize[1] == 0) { + return; + } + const u32 inputWidth = inputSize & 0xffff; + const u32 inputGap = inputSize >> 16; + + const u32 outputWidth = outputSize & 0xffff; + const u32 outputGap = outputSize >> 16; + + Texture& colorTexture = getColorRenderTexture(); + + vk::ImageBlit blitRegion = {}; + // Hack: Detect whether we are writing to the top or bottom screen by checking output gap and drawing to the proper part of the output texture + // We consider output gap == 320 to mean bottom, and anything else to mean top + if (outputGap == 320) { + // Bottom screen + blitRegion = vk::ImageBlit( + vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), {vk::Offset3D{0, 0, 0}, vk::Offset3D{320, 240, 1}}, + vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), {vk::Offset3D{40, 240, 0}, vk::Offset3D{40 + 320, 240 + 240, 1}} + ); + } else { + // Top screen + blitRegion = vk::ImageBlit( + vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), {vk::Offset3D{0, 0, 0}, vk::Offset3D{400, 240, 1}}, + vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), {vk::Offset3D{0, 0, 0}, vk::Offset3D{400, 240, 1}} + ); + } + + vk::CommandBufferAllocateInfo commandBuffersInfo = {}; + commandBuffersInfo.commandPool = commandPool.get(); + commandBuffersInfo.level = vk::CommandBufferLevel::ePrimary; + commandBuffersInfo.commandBufferCount = 1; + + vk::UniqueCommandBuffer blitCommandBuffer = {}; + if (auto allocateResult = device->allocateCommandBuffersUnique(commandBuffersInfo); allocateResult.result == vk::Result::eSuccess) { + blitCommandBuffer = std::move(allocateResult.value[0]); + } else { + Helpers::panic("Error allocating command buffer: %s\n", vk::to_string(allocateResult.result).c_str()); + } + + vk::CommandBufferBeginInfo beginInfo = {}; + beginInfo.flags = vk::CommandBufferUsageFlagBits::eSimultaneousUse; + + if (const vk::Result beginResult = blitCommandBuffer->begin(beginInfo); beginResult != vk::Result::eSuccess) { + Helpers::panic("Error beginning command buffer recording: %s\n", vk::to_string(beginResult).c_str()); + } + + blitCommandBuffer->pipelineBarrier( + vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags(), {}, {}, + { + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eColorAttachmentWrite, vk::AccessFlagBits::eTransferRead, vk::ImageLayout::eColorAttachmentOptimal, + vk::ImageLayout::eTransferSrcOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, colorTexture.image.get(), + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ), + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eColorAttachmentWrite, vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eColorAttachmentOptimal, + vk::ImageLayout::eTransferDstOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, screenTexture[frameBufferingIndex].get(), + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ), + } + ); + + blitCommandBuffer->blitImage( + colorTexture.image.get(), vk::ImageLayout::eTransferSrcOptimal, screenTexture[frameBufferingIndex].get(), + vk::ImageLayout::eTransferDstOptimal, {blitRegion}, vk::Filter::eNearest + ); + + if (const vk::Result endResult = frameGraphicsCommandBuffers[frameBufferingIndex]->end(); endResult != vk::Result::eSuccess) { + Helpers::panic("Error ending command buffer recording: %s\n", vk::to_string(endResult).c_str()); + } + + blitCommandBuffer->pipelineBarrier( + vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags(), {}, {}, + { + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eTransferRead, vk::AccessFlagBits::eColorAttachmentWrite, vk::ImageLayout::eTransferSrcOptimal, + vk::ImageLayout::eColorAttachmentOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, colorTexture.image.get(), + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ), + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eColorAttachmentWrite, vk::ImageLayout::eTransferDstOptimal, + vk::ImageLayout::eColorAttachmentOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, screenTexture[frameBufferingIndex].get(), + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ), + } + ); + + vk::FenceCreateInfo fenceInfo = {}; + vk::UniqueFence finishedFence = {}; + if (auto createResult = device->createFenceUnique(fenceInfo); createResult.result == vk::Result::eSuccess) { + finishedFence = std::move(createResult.value); + } else { + Helpers::panic("Error creating fence: %s\n", vk::to_string(createResult.result).c_str()); + } + + vk::SubmitInfo submitInfo = {}; + + submitInfo.setCommandBuffers(blitCommandBuffer.get()); + + if (const vk::Result submitResult = graphicsQueue.submit({submitInfo}, finishedFence.get()); submitResult != vk::Result::eSuccess) { + Helpers::panic("Error submitting to graphics queue: %s\n", vk::to_string(submitResult).c_str()); + } +} void RendererVK::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) {} @@ -955,6 +1061,11 @@ void RendererVK::drawVertices(PICA::PrimType primType, std::spanbeginRenderPass(renderBeginInfo, vk::SubpassContents::eInline); + commandBuffer->endRenderPass(); } void RendererVK::screenshot(const std::string& name) {} From cb64c52d2fd332df58fc2fc74af559dbb41ebbc4 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Tue, 15 Aug 2023 11:19:12 -0700 Subject: [PATCH 16/45] Use inline `rotl`/`ror` Fixes apple-clang builds, which do not implement a lot of the `bit` header. --- src/core/renderer_vk/renderer_vk.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index 246ac6b4..5de3d534 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -25,11 +25,14 @@ static s32 findQueueFamily( return -1; } +static u32 rotl32(u32 x, u32 n) { return (x << n) | (x >> (32 - n)); } +static u32 ror32(u32 x, u32 n) { return (x >> n) | (x << (32 - n)); } + u32 RendererVK::colorBufferHash(u32 loc, u32 size, PICA::ColorFmt format) { - return std::rotl(loc, 17) ^ std::rotr(size, 23) ^ (static_cast(format) << 60); + return rotl32(loc, 17) ^ ror32(size, 23) ^ (static_cast(format) << 60); } u32 RendererVK::depthBufferHash(u32 loc, u32 size, PICA::DepthFmt format) { - return std::rotl(loc, 17) ^ std::rotr(size, 29) ^ (static_cast(format) << 60); + return rotl32(loc, 17) ^ ror32(size, 29) ^ (static_cast(format) << 60); } RendererVK::Texture& RendererVK::getColorRenderTexture() { From f62f1bf9b269419c53d91591650475ee2c49c474 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Tue, 15 Aug 2023 13:22:51 -0700 Subject: [PATCH 17/45] Fix ambiguous extension name conversion --- src/core/renderer_vk/renderer_vk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index 5de3d534..b7653393 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -589,7 +589,7 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { std::unordered_set instanceExtensionsAvailable = {}; if (const auto enumerateResult = vk::enumerateInstanceExtensionProperties(); enumerateResult.result == vk::Result::eSuccess) { for (const auto& curExtension : enumerateResult.value) { - instanceExtensionsAvailable.emplace(curExtension.extensionName); + instanceExtensionsAvailable.emplace(curExtension.extensionName.data()); } } From 97b6b7f1223cd03534070c455b9b10aff73a48b5 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Wed, 16 Aug 2023 21:11:25 -0700 Subject: [PATCH 18/45] Add Vulkan Host-Shader compilation Compiles Vulkan Host shaders into spirv binary files and embeds them into the application's virtual file-system. --- CMakeLists.txt | 22 +++++++++++++++++++++- src/host_shaders/vulkan_display.frag | 7 +++++++ src/host_shaders/vulkan_display.vert | 23 +++++++++++++++++++++++ 3 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 src/host_shaders/vulkan_display.frag create mode 100644 src/host_shaders/vulkan_display.vert diff --git a/CMakeLists.txt b/CMakeLists.txt index 6d15bbe9..81f3a1eb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -251,10 +251,30 @@ if(ENABLE_VULKAN) set(HEADER_FILES ${HEADER_FILES} ${RENDERER_VK_INCLUDE_FILES}) source_group("Source Files\\Core\\Vulkan Renderer" FILES ${RENDERER_VK_SOURCE_FILES}) + + set(RENDERER_VK_HOST_SHADERS_SOURCE + "src/host_shaders/vulkan_display.frag" + "src/host_shaders/vulkan_display.vert" + ) + +foreach( HOST_SHADER_SOURCE ${RENDERER_VK_HOST_SHADERS_SOURCE} ) + get_filename_component( FILE_NAME ${HOST_SHADER_SOURCE} NAME ) + set( HOST_SHADER_SPIRV "${PROJECT_BINARY_DIR}/host_shaders/${FILE_NAME}.spv" ) + add_custom_command( + OUTPUT ${HOST_SHADER_SPIRV} + COMMAND ${CMAKE_COMMAND} -E make_directory "${PROJECT_BINARY_DIR}/host_shaders/" + COMMAND Vulkan::glslangValidator -t --target-env vulkan1.1 -g -V "${PROJECT_SOURCE_DIR}/${HOST_SHADER_SOURCE}" -o ${HOST_SHADER_SPIRV} + #COMMAND ${SPIRV_OPT} -O ${HOST_SHADER_SPIRV} -o ${HOST_SHADER_SPIRV} + DEPENDS ${HOST_SHADER_SOURCE} + ) + list( APPEND RENDERER_VK_HOST_SHADERS_SPIRV ${HOST_SHADER_SPIRV} ) +endforeach() + cmrc_add_resource_library( resources_renderer_vk NAMESPACE RendererVK - WHENCE "src/host_shaders/" + WHENCE "${PROJECT_BINARY_DIR}/host_shaders/" + ${RENDERER_VK_HOST_SHADERS_SPIRV} ) endif() diff --git a/src/host_shaders/vulkan_display.frag b/src/host_shaders/vulkan_display.frag new file mode 100644 index 00000000..1b6bd937 --- /dev/null +++ b/src/host_shaders/vulkan_display.frag @@ -0,0 +1,7 @@ +#version 460 core +layout(location = 0) in vec2 UV; +layout(location = 0) out vec4 FragColor; + +layout(binding = 0) uniform sampler2D u_texture; + +void main() { FragColor = texture(u_texture, UV); } \ No newline at end of file diff --git a/src/host_shaders/vulkan_display.vert b/src/host_shaders/vulkan_display.vert new file mode 100644 index 00000000..766b8d0c --- /dev/null +++ b/src/host_shaders/vulkan_display.vert @@ -0,0 +1,23 @@ +#version 460 core +layout(location = 0) out vec2 UV; + +void main() { + const vec4 positions[4] = vec4[]( + vec4(-1.0, 1.0, 1.0, 1.0), // Top-left + vec4(1.0, 1.0, 1.0, 1.0), // Top-right + vec4(-1.0, -1.0, 1.0, 1.0), // Bottom-left + vec4(1.0, -1.0, 1.0, 1.0) // Bottom-right + ); + + // The 3DS displays both screens' framebuffer rotated 90 deg counter clockwise + // So we adjust our texcoords accordingly + const vec2 texcoords[4] = vec2[]( + vec2(1.0, 1.0), // Top-right + vec2(1.0, 0.0), // Bottom-right + vec2(0.0, 1.0), // Top-left + vec2(0.0, 0.0) // Bottom-left + ); + + gl_Position = positions[gl_VertexIndex]; + UV = texcoords[gl_VertexIndex]; +} \ No newline at end of file From 9e2781e87459c2e24fc9187c1cedd4d2e6d4340e Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Wed, 16 Aug 2023 22:36:25 -0700 Subject: [PATCH 19/45] Refactor render-texture cache Implement `displayTransfer` in parity with the OpenGL renderer. Allow arguments to `get{Color,Depth}RenderTexture`. --- include/renderer_vk/renderer_vk.hpp | 23 +++-- src/core/renderer_vk/renderer_vk.cpp | 143 +++++++++++++++++---------- 2 files changed, 109 insertions(+), 57 deletions(-) diff --git a/include/renderer_vk/renderer_vk.hpp b/include/renderer_vk/renderer_vk.hpp index b9ada4a9..b7ff779b 100644 --- a/include/renderer_vk/renderer_vk.hpp +++ b/include/renderer_vk/renderer_vk.hpp @@ -1,6 +1,7 @@ #include #include +#include "math_util.hpp" #include "renderer.hpp" #include "vk_api.hpp" @@ -40,8 +41,6 @@ class RendererVK final : public Renderer { // Todo: make this a configuration option static constexpr usize frameBufferingCount = 3; - vk::UniqueDeviceMemory framebufferMemory = {}; - // Frame-buffering data // Each vector is `frameBufferingCount` in size std::vector framePresentCommandBuffers = {}; @@ -54,9 +53,21 @@ class RendererVK final : public Renderer { // Todo: // Use `{colourBuffer,depthBuffer}Loc` to maintain an std::map-cache of framebuffers struct Texture { + u32 loc = 0; + u32 sizePerPixel = 0; + std::array size = {}; + vk::UniqueImage image; vk::UniqueDeviceMemory imageMemory; vk::UniqueImageView imageView; + + Math::Rect getSubRect(u32 inputAddress, u32 width, u32 height) { + // PICA textures have top-left origin, same as Vulkan + const u32 startOffset = (inputAddress - loc) / sizePerPixel; + const u32 x0 = (startOffset % (size[0] * 8)) / 8; + const u32 y0 = (startOffset / (size[0] * 8)) * 8; + return Math::Rect{x0, y0, x0 + width, y0 + height}; + } }; // Hash(loc, size, format) -> Texture std::map textureCache; @@ -64,12 +75,12 @@ class RendererVK final : public Renderer { static u32 colorBufferHash(u32 loc, u32 size, PICA::ColorFmt format); static u32 depthBufferHash(u32 loc, u32 size, PICA::DepthFmt format); - Texture& getColorRenderTexture(); - Texture& getDepthRenderTexture(); + Texture& getColorRenderTexture(u32 addr, PICA::ColorFmt format, u32 width, u32 height); + Texture& getDepthRenderTexture(u32 addr, PICA::DepthFmt format, u32 width, u32 height); - // Use `lower_bound` to find nearest texture for an address - // Blit them during `display()` + // Framebuffer for the top/bottom image std::vector screenTexture = {}; + vk::UniqueDeviceMemory framebufferMemory = {}; std::map renderPassCache; diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index b7653393..4b40cac1 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -35,9 +35,8 @@ u32 RendererVK::depthBufferHash(u32 loc, u32 size, PICA::DepthFmt format) { return rotl32(loc, 17) ^ ror32(size, 29) ^ (static_cast(format) << 60); } -RendererVK::Texture& RendererVK::getColorRenderTexture() { - const u32 renderTextureHash = - colorBufferHash(colourBufferLoc, fbSize[0] * fbSize[1] * PICA::sizePerPixel(colourBufferFormat), colourBufferFormat); +RendererVK::Texture& RendererVK::getColorRenderTexture(u32 addr, PICA::ColorFmt format, u32 width, u32 height) { + const u32 renderTextureHash = colorBufferHash(addr, width * height * PICA::sizePerPixel(format), format); // Cache hit if (textureCache.contains(renderTextureHash)) { @@ -46,11 +45,14 @@ RendererVK::Texture& RendererVK::getColorRenderTexture() { // Cache miss Texture& newTexture = textureCache[renderTextureHash]; + newTexture.loc = addr; + newTexture.sizePerPixel = PICA::sizePerPixel(format); + newTexture.size = fbSize; vk::ImageCreateInfo textureInfo = {}; textureInfo.setImageType(vk::ImageType::e2D); - textureInfo.setFormat(Vulkan::colorFormatToVulkan(colourBufferFormat)); - textureInfo.setExtent(vk::Extent3D(fbSize[0], fbSize[1], 1)); + textureInfo.setFormat(Vulkan::colorFormatToVulkan(format)); + textureInfo.setExtent(vk::Extent3D(width, height, 1)); textureInfo.setMipLevels(1); textureInfo.setArrayLayers(1); textureInfo.setSamples(vk::SampleCountFlagBits::e1); @@ -71,7 +73,7 @@ RendererVK::Texture& RendererVK::getColorRenderTexture() { vk::ImageViewCreateInfo viewInfo = {}; viewInfo.image = newTexture.image.get(); viewInfo.viewType = vk::ImageViewType::e2D; - viewInfo.format = Vulkan::colorFormatToVulkan(colourBufferFormat); + viewInfo.format = textureInfo.format; viewInfo.components = vk::ComponentMapping(); viewInfo.subresourceRange = vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1); @@ -91,8 +93,8 @@ RendererVK::Texture& RendererVK::getColorRenderTexture() { return newTexture; } -RendererVK::Texture& RendererVK::getDepthRenderTexture() { - const u32 renderTextureHash = depthBufferHash(depthBufferLoc, fbSize[0] * fbSize[1] * PICA::sizePerPixel(depthBufferFormat), depthBufferFormat); +RendererVK::Texture& RendererVK::getDepthRenderTexture(u32 addr, PICA::DepthFmt format, u32 width, u32 height) { + const u32 renderTextureHash = depthBufferHash(addr, width * height * PICA::sizePerPixel(format), format); // Cache hit if (textureCache.contains(renderTextureHash)) { @@ -101,11 +103,14 @@ RendererVK::Texture& RendererVK::getDepthRenderTexture() { // Cache miss Texture& newTexture = textureCache[renderTextureHash]; + newTexture.loc = addr; + newTexture.sizePerPixel = PICA::sizePerPixel(format); + newTexture.size = fbSize; vk::ImageCreateInfo textureInfo = {}; textureInfo.setImageType(vk::ImageType::e2D); - textureInfo.setFormat(Vulkan::depthFormatToVulkan(depthBufferFormat)); - textureInfo.setExtent(vk::Extent3D(fbSize[0], fbSize[1], 1)); + textureInfo.setFormat(Vulkan::depthFormatToVulkan(format)); + textureInfo.setExtent(vk::Extent3D(width, height, 1)); textureInfo.setMipLevels(1); textureInfo.setArrayLayers(1); textureInfo.setSamples(vk::SampleCountFlagBits::e1); @@ -126,16 +131,10 @@ RendererVK::Texture& RendererVK::getDepthRenderTexture() { vk::ImageViewCreateInfo viewInfo = {}; viewInfo.image = newTexture.image.get(); viewInfo.viewType = vk::ImageViewType::e2D; - viewInfo.format = Vulkan::depthFormatToVulkan(depthBufferFormat); + viewInfo.format = textureInfo.format; viewInfo.components = vk::ComponentMapping(); viewInfo.subresourceRange = vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil, 0, 1, 0, 1); - if (auto createResult = device->createImageViewUnique(viewInfo); createResult.result == vk::Result::eSuccess) { - newTexture.imageView = std::move(createResult.value); - } else { - Helpers::panic("Error creating depth render-texture: %s\n", vk::to_string(createResult.result).c_str()); - } - if (auto [result, imageMemory] = Vulkan::commitImageHeap(device.get(), physicalDevice, {&newTexture.image.get(), 1}); result == vk::Result::eSuccess) { newTexture.imageMemory = std::move(imageMemory); @@ -143,6 +142,12 @@ RendererVK::Texture& RendererVK::getDepthRenderTexture() { Helpers::panic("Error allocating depth render-texture memory: %s\n", vk::to_string(result).c_str()); } + if (auto createResult = device->createImageViewUnique(viewInfo); createResult.result == vk::Result::eSuccess) { + newTexture.imageView = std::move(createResult.value); + } else { + Helpers::panic("Error creating depth render-texture: %s\n", vk::to_string(createResult.result).c_str()); + } + return newTexture; } @@ -270,7 +275,7 @@ vk::Result RendererVK::recreateSwapchain(vk::SurfaceKHR surface, vk::Extent2D sw // Fifo support is required by all vulkan implementations, waits for vsync vk::PresentModeKHR swapchainPresentMode = vk::PresentModeKHR::eFifo; if (auto getResult = physicalDevice.getSurfacePresentModesKHR(surface); getResult.result == vk::Result::eSuccess) { - std::vector& presentModes = getResult.value; + const std::vector& presentModes = getResult.value; // Use mailbox if available, lowest-latency vsync-enabled mode if (std::find(presentModes.begin(), presentModes.end(), vk::PresentModeKHR::eMailbox) != presentModes.end()) { @@ -415,6 +420,13 @@ void RendererVK::display() { Helpers::panic("Error beginning command buffer recording: %s\n", vk::to_string(beginResult).c_str()); } + const bool topActiveFb = externalRegs[PICA::ExternalRegs::Framebuffer0Select] & 1; + const u32 topScreenAddr = externalRegs[topActiveFb ? PICA::ExternalRegs::Framebuffer0AFirstAddr : PICA::ExternalRegs::Framebuffer0ASecondAddr]; + + const bool bottomActiveFb = externalRegs[PICA::ExternalRegs::Framebuffer1Select] & 1; + const u32 bottomScreenAddr = + externalRegs[bottomActiveFb ? PICA::ExternalRegs::Framebuffer1AFirstAddr : PICA::ExternalRegs::Framebuffer1ASecondAddr]; + //// Render Frame(Simulated - just clear the images to different colors for now) { static const std::array frameScopeColor = {{1.0f, 0.0f, 1.0f, 1.0f}}; @@ -850,7 +862,7 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { vk::ImageCreateInfo screenTextureInfo = {}; screenTextureInfo.setImageType(vk::ImageType::e2D); screenTextureInfo.setFormat(vk::Format::eR8G8B8A8Unorm); - screenTextureInfo.setExtent(vk::Extent3D(400, 240, 1)); + screenTextureInfo.setExtent(vk::Extent3D(400, 240 * 2, 1)); screenTextureInfo.setMipLevels(1); screenTextureInfo.setArrayLayers(1); screenTextureInfo.setSamples(vk::SampleCountFlagBits::e1); @@ -906,36 +918,56 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { void RendererVK::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) {} +// NOTE: The GPU format has RGB5551 and RGB655 swapped compared to internal regs format +static PICA::ColorFmt ToColorFmt(u32 format) { + switch (format) { + case 2: return PICA::ColorFmt::RGB565; + case 3: return PICA::ColorFmt::RGBA5551; + default: return static_cast(format); + } +} + void RendererVK::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) { - return; - if (fbSize[0] == 0 || fbSize[1] == 0) { - return; - } const u32 inputWidth = inputSize & 0xffff; - const u32 inputGap = inputSize >> 16; + const u32 inputHeight = inputSize >> 16; + const PICA::ColorFmt inputFormat = ToColorFmt(Helpers::getBits<8, 3>(flags)); + const PICA::ColorFmt outputFormat = ToColorFmt(Helpers::getBits<12, 3>(flags)); + const bool verticalFlip = flags & 1; + const PICA::Scaling scaling = static_cast(Helpers::getBits<24, 2>(flags)); - const u32 outputWidth = outputSize & 0xffff; - const u32 outputGap = outputSize >> 16; + u32 outputWidth = outputSize & 0xffff; + u32 outputHeight = outputSize >> 16; - Texture& colorTexture = getColorRenderTexture(); + Texture& srcFramebuffer = getColorRenderTexture(inputAddr, inputFormat, inputWidth, inputHeight); + Math::Rect srcRect = srcFramebuffer.getSubRect(inputAddr, outputWidth, outputHeight); - vk::ImageBlit blitRegion = {}; - // Hack: Detect whether we are writing to the top or bottom screen by checking output gap and drawing to the proper part of the output texture - // We consider output gap == 320 to mean bottom, and anything else to mean top - if (outputGap == 320) { - // Bottom screen - blitRegion = vk::ImageBlit( - vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), {vk::Offset3D{0, 0, 0}, vk::Offset3D{320, 240, 1}}, - vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), {vk::Offset3D{40, 240, 0}, vk::Offset3D{40 + 320, 240 + 240, 1}} - ); - } else { - // Top screen - blitRegion = vk::ImageBlit( - vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), {vk::Offset3D{0, 0, 0}, vk::Offset3D{400, 240, 1}}, - vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), {vk::Offset3D{0, 0, 0}, vk::Offset3D{400, 240, 1}} - ); + if (verticalFlip) { + std::swap(srcRect.bottom, srcRect.top); } + // Apply scaling for the destination rectangle. + if (scaling == PICA::Scaling::X || scaling == PICA::Scaling::XY) { + outputWidth >>= 1; + } + + if (scaling == PICA::Scaling::XY) { + outputHeight >>= 1; + } + + Texture& destFramebuffer = getColorRenderTexture(outputAddr, outputFormat, outputWidth, outputHeight); + Math::Rect destRect = destFramebuffer.getSubRect(outputAddr, outputWidth, outputHeight); + + if (inputWidth != outputWidth) { + // Helpers::warn("Strided display transfer is not handled correctly!\n"); + } + + const vk::ImageBlit blitRegion( + vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), + {vk::Offset3D{(int)srcRect.left, (int)srcRect.top, 0}, vk::Offset3D{(int)srcRect.right, (int)srcRect.bottom, 1}}, + vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), + {vk::Offset3D{(int)destRect.left, (int)destRect.top, 0}, vk::Offset3D{(int)destRect.right, (int)destRect.bottom, 1}} + ); + vk::CommandBufferAllocateInfo commandBuffersInfo = {}; commandBuffersInfo.commandPool = commandPool.get(); commandBuffersInfo.level = vk::CommandBufferLevel::ePrimary; @@ -960,20 +992,20 @@ void RendererVK::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u { vk::ImageMemoryBarrier( vk::AccessFlagBits::eColorAttachmentWrite, vk::AccessFlagBits::eTransferRead, vk::ImageLayout::eColorAttachmentOptimal, - vk::ImageLayout::eTransferSrcOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, colorTexture.image.get(), + vk::ImageLayout::eTransferSrcOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, srcFramebuffer.image.get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) ), vk::ImageMemoryBarrier( vk::AccessFlagBits::eColorAttachmentWrite, vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eColorAttachmentOptimal, - vk::ImageLayout::eTransferDstOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, screenTexture[frameBufferingIndex].get(), + vk::ImageLayout::eTransferDstOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, destFramebuffer.image.get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) ), } ); blitCommandBuffer->blitImage( - colorTexture.image.get(), vk::ImageLayout::eTransferSrcOptimal, screenTexture[frameBufferingIndex].get(), - vk::ImageLayout::eTransferDstOptimal, {blitRegion}, vk::Filter::eNearest + srcFramebuffer.image.get(), vk::ImageLayout::eTransferSrcOptimal, destFramebuffer.image.get(), vk::ImageLayout::eTransferDstOptimal, + {blitRegion}, vk::Filter::eLinear ); if (const vk::Result endResult = frameGraphicsCommandBuffers[frameBufferingIndex]->end(); endResult != vk::Result::eSuccess) { @@ -981,21 +1013,25 @@ void RendererVK::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u } blitCommandBuffer->pipelineBarrier( - vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags(), {}, {}, + vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eColorAttachmentOutput, vk::DependencyFlags(), {}, {}, { vk::ImageMemoryBarrier( vk::AccessFlagBits::eTransferRead, vk::AccessFlagBits::eColorAttachmentWrite, vk::ImageLayout::eTransferSrcOptimal, - vk::ImageLayout::eColorAttachmentOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, colorTexture.image.get(), + vk::ImageLayout::eColorAttachmentOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, srcFramebuffer.image.get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) ), vk::ImageMemoryBarrier( vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eColorAttachmentWrite, vk::ImageLayout::eTransferDstOptimal, - vk::ImageLayout::eColorAttachmentOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, screenTexture[frameBufferingIndex].get(), + vk::ImageLayout::eColorAttachmentOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, destFramebuffer.image.get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) ), } ); + if (const vk::Result endResult = blitCommandBuffer->end(); endResult != vk::Result::eSuccess) { + Helpers::panic("Error ending command buffer recording: %s\n", vk::to_string(endResult).c_str()); + } + vk::FenceCreateInfo fenceInfo = {}; vk::UniqueFence finishedFence = {}; if (auto createResult = device->createFenceUnique(fenceInfo); createResult.result == vk::Result::eSuccess) { @@ -1011,6 +1047,11 @@ void RendererVK::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u if (const vk::Result submitResult = graphicsQueue.submit({submitInfo}, finishedFence.get()); submitResult != vk::Result::eSuccess) { Helpers::panic("Error submitting to graphics queue: %s\n", vk::to_string(submitResult).c_str()); } + + // Block, on the CPU, to ensure that this buffered-frame is ready for more work + if (auto waitResult = device->waitForFences({finishedFence.get()}, true, std::numeric_limits::max()); waitResult != vk::Result::eSuccess) { + Helpers::panic("Error waiting on swapchain fence: %s\n", vk::to_string(waitResult).c_str()); + } } void RendererVK::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) {} @@ -1031,11 +1072,11 @@ void RendererVK::drawVertices(PICA::PrimType primType, std::span renderTargets; - const auto& colorTexture = getColorRenderTexture(); + const auto& colorTexture = getColorRenderTexture(colourBufferLoc, colourBufferFormat, fbSize[0], fbSize[1]); renderTargets.emplace_back(colorTexture.imageView.get()); if (depthTestEnable) { - const auto& depthTexture = getDepthRenderTexture(); + const auto& depthTexture = getDepthRenderTexture(depthBufferLoc, depthBufferFormat, fbSize[0], fbSize[1]); renderTargets.emplace_back(depthTexture.imageView.get()); } From cb8c53e0b87a64d356b9fbab02dc0afe2bf68dbf Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Wed, 16 Aug 2023 22:52:44 -0700 Subject: [PATCH 20/45] Map RGB8 to RGBA8 RGB8 is not supported by drivers like RADV. Instead, we map it to RGBA8. RGBA8 is mandated to be supported by the vulkan spec. --- src/core/renderer_vk/vk_pica.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/core/renderer_vk/vk_pica.cpp b/src/core/renderer_vk/vk_pica.cpp index 33aca623..e7fc9033 100644 --- a/src/core/renderer_vk/vk_pica.cpp +++ b/src/core/renderer_vk/vk_pica.cpp @@ -5,7 +5,11 @@ namespace Vulkan { vk::Format colorFormatToVulkan(PICA::ColorFmt colorFormat) { switch (colorFormat) { case PICA::ColorFmt::RGBA8: return vk::Format::eR8G8B8A8Unorm; - case PICA::ColorFmt::RGB8: return vk::Format::eR8G8B8Unorm; + // VK_FORMAT_R8G8B8A8_UNORM is mandated by the vulkan specification + // VK_FORMAT_R8G8B8_UNORM may not be supported + // TODO: Detect this! + // case PICA::ColorFmt::RGB8: return vk::Format::eR8G8B8Unorm; + case PICA::ColorFmt::RGB8: return vk::Format::eR8G8B8A8Unorm; case PICA::ColorFmt::RGBA5551: return vk::Format::eR5G5B5A1UnormPack16; case PICA::ColorFmt::RGB565: return vk::Format::eR5G6B5UnormPack16; case PICA::ColorFmt::RGBA4: return vk::Format::eR4G4B4A4UnormPack16; From 52ddaae2215e235e9e5b5aa07121946b993905ab Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Thu, 17 Aug 2023 20:14:10 -0700 Subject: [PATCH 21/45] Add `getCurrentCommandBuffer` for all frame workloads Rather than allocating new command buffers for each small task, schedule all work onto the frame's main command buffer to be dispatched at display-time. Fixes all layout transition and synchronization issues. --- include/renderer_vk/renderer_vk.hpp | 2 + src/core/renderer_vk/renderer_vk.cpp | 72 +++++++--------------------- 2 files changed, 19 insertions(+), 55 deletions(-) diff --git a/include/renderer_vk/renderer_vk.hpp b/include/renderer_vk/renderer_vk.hpp index b7ff779b..36b7d429 100644 --- a/include/renderer_vk/renderer_vk.hpp +++ b/include/renderer_vk/renderer_vk.hpp @@ -50,6 +50,8 @@ class RendererVK final : public Renderer { std::vector> frameFramebuffers = {}; std::vector frameGraphicsCommandBuffers = {}; + const vk::CommandBuffer& getCurrentCommandBuffer() const { return frameGraphicsCommandBuffers[frameBufferingIndex].get(); } + // Todo: // Use `{colourBuffer,depthBuffer}Loc` to maintain an std::map-cache of framebuffers struct Texture { diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index 4b40cac1..430e9a7e 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -407,7 +407,7 @@ void RendererVK::display() { } } - if (const vk::Result endResult = frameGraphicsCommandBuffers[frameBufferingIndex]->end(); endResult != vk::Result::eSuccess) { + if (const vk::Result endResult = getCurrentCommandBuffer().end(); endResult != vk::Result::eSuccess) { Helpers::panic("Error ending command buffer recording: %s\n", vk::to_string(endResult).c_str()); } @@ -554,6 +554,7 @@ void RendererVK::display() { } } + // We are now working on the next frame frameBufferingIndex = ((frameBufferingIndex + 1) % frameBufferingCount); // Wait for next frame to be ready @@ -567,12 +568,12 @@ void RendererVK::display() { { frameFramebuffers[frameBufferingIndex].clear(); - frameGraphicsCommandBuffers[frameBufferingIndex]->reset(); + getCurrentCommandBuffer().reset(); vk::CommandBufferBeginInfo beginInfo = {}; beginInfo.flags = vk::CommandBufferUsageFlagBits::eSimultaneousUse; - if (const vk::Result beginResult = frameGraphicsCommandBuffers[frameBufferingIndex]->begin(beginInfo); beginResult != vk::Result::eSuccess) { + if (const vk::Result beginResult = getCurrentCommandBuffer().begin(beginInfo); beginResult != vk::Result::eSuccess) { Helpers::panic("Error beginning command buffer recording: %s\n", vk::to_string(beginResult).c_str()); } } @@ -968,26 +969,16 @@ void RendererVK::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u {vk::Offset3D{(int)destRect.left, (int)destRect.top, 0}, vk::Offset3D{(int)destRect.right, (int)destRect.bottom, 1}} ); - vk::CommandBufferAllocateInfo commandBuffersInfo = {}; - commandBuffersInfo.commandPool = commandPool.get(); - commandBuffersInfo.level = vk::CommandBufferLevel::ePrimary; - commandBuffersInfo.commandBufferCount = 1; + const vk::CommandBuffer& blitCommandBuffer = getCurrentCommandBuffer(); - vk::UniqueCommandBuffer blitCommandBuffer = {}; - if (auto allocateResult = device->allocateCommandBuffersUnique(commandBuffersInfo); allocateResult.result == vk::Result::eSuccess) { - blitCommandBuffer = std::move(allocateResult.value[0]); - } else { - Helpers::panic("Error allocating command buffer: %s\n", vk::to_string(allocateResult.result).c_str()); - } + static const std::array displayTransferColor = {{1.0f, 1.0f, 0.0f, 1.0f}}; + Vulkan::DebugLabelScope scope( + blitCommandBuffer, displayTransferColor, + "DisplayTransfer inputAddr 0x%08X outputAddr 0x%08X inputWidth %d outputWidth %d inputHeight %d outputHeight %d", inputAddr, outputAddr, + inputWidth, outputWidth, inputHeight, outputHeight + ); - vk::CommandBufferBeginInfo beginInfo = {}; - beginInfo.flags = vk::CommandBufferUsageFlagBits::eSimultaneousUse; - - if (const vk::Result beginResult = blitCommandBuffer->begin(beginInfo); beginResult != vk::Result::eSuccess) { - Helpers::panic("Error beginning command buffer recording: %s\n", vk::to_string(beginResult).c_str()); - } - - blitCommandBuffer->pipelineBarrier( + blitCommandBuffer.pipelineBarrier( vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags(), {}, {}, { vk::ImageMemoryBarrier( @@ -1003,16 +994,12 @@ void RendererVK::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u } ); - blitCommandBuffer->blitImage( + blitCommandBuffer.blitImage( srcFramebuffer.image.get(), vk::ImageLayout::eTransferSrcOptimal, destFramebuffer.image.get(), vk::ImageLayout::eTransferDstOptimal, {blitRegion}, vk::Filter::eLinear ); - if (const vk::Result endResult = frameGraphicsCommandBuffers[frameBufferingIndex]->end(); endResult != vk::Result::eSuccess) { - Helpers::panic("Error ending command buffer recording: %s\n", vk::to_string(endResult).c_str()); - } - - blitCommandBuffer->pipelineBarrier( + blitCommandBuffer.pipelineBarrier( vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eColorAttachmentOutput, vk::DependencyFlags(), {}, {}, { vk::ImageMemoryBarrier( @@ -1027,31 +1014,6 @@ void RendererVK::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u ), } ); - - if (const vk::Result endResult = blitCommandBuffer->end(); endResult != vk::Result::eSuccess) { - Helpers::panic("Error ending command buffer recording: %s\n", vk::to_string(endResult).c_str()); - } - - vk::FenceCreateInfo fenceInfo = {}; - vk::UniqueFence finishedFence = {}; - if (auto createResult = device->createFenceUnique(fenceInfo); createResult.result == vk::Result::eSuccess) { - finishedFence = std::move(createResult.value); - } else { - Helpers::panic("Error creating fence: %s\n", vk::to_string(createResult.result).c_str()); - } - - vk::SubmitInfo submitInfo = {}; - - submitInfo.setCommandBuffers(blitCommandBuffer.get()); - - if (const vk::Result submitResult = graphicsQueue.submit({submitInfo}, finishedFence.get()); submitResult != vk::Result::eSuccess) { - Helpers::panic("Error submitting to graphics queue: %s\n", vk::to_string(submitResult).c_str()); - } - - // Block, on the CPU, to ensure that this buffered-frame is ready for more work - if (auto waitResult = device->waitForFences({finishedFence.get()}, true, std::numeric_limits::max()); waitResult != vk::Result::eSuccess) { - Helpers::panic("Error waiting on swapchain fence: %s\n", vk::to_string(waitResult).c_str()); - } } void RendererVK::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) {} @@ -1106,10 +1068,10 @@ void RendererVK::drawVertices(PICA::PrimType primType, std::spanbeginRenderPass(renderBeginInfo, vk::SubpassContents::eInline); - commandBuffer->endRenderPass(); + commandBuffer.beginRenderPass(renderBeginInfo, vk::SubpassContents::eInline); + commandBuffer.endRenderPass(); } void RendererVK::screenshot(const std::string& name) {} From 6052abe551606cdcca994193722c53d2251bcb06 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Thu, 17 Aug 2023 21:23:19 -0700 Subject: [PATCH 22/45] Fix swapchain synchronization issues While we wait on the CPU-side fence for these command buffers, we must also wait on the GPU-side semaphore to be compliant to validation layers synchronization. We still get an error on the very first frame since it will wait for a semaphore that has nothing signaling it. --- src/core/renderer_vk/renderer_vk.cpp | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index 430e9a7e..d889c683 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -508,10 +508,23 @@ void RendererVK::display() { vk::SubmitInfo submitInfo = {}; // Wait for any previous uses of the image image to finish presenting - if (swapchainImageIndex != swapchainImageInvalid) { - submitInfo.setWaitSemaphores(swapImageFreeSemaphore[frameBufferingIndex].get()); - static const vk::PipelineStageFlags waitStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput; - submitInfo.setWaitDstStageMask(waitStageMask); + std::vector waitSemaphores; + std::vector waitSemaphoreStages; + { + if (swapchainImageIndex != swapchainImageInvalid) { + waitSemaphores.emplace_back(swapImageFreeSemaphore[frameBufferingIndex].get()); + static const vk::PipelineStageFlags waitStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput; + waitSemaphoreStages.emplace_back(waitStageMask); + } + + // Ensure a proper semaphore wait on render-finished + // We already wait on the fence, but this must be done to be compliant + // to validation layers + waitSemaphores.emplace_back(renderFinishedSemaphore[frameBufferingIndex].get()); + waitSemaphoreStages.emplace_back(vk::PipelineStageFlagBits::eColorAttachmentOutput); + + submitInfo.setWaitSemaphores(waitSemaphores); + submitInfo.setWaitDstStageMask(waitSemaphoreStages); } // Signal when finished submitInfo.setSignalSemaphores(renderFinishedSemaphore[frameBufferingIndex].get()); @@ -880,12 +893,16 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { for (usize i = 0; i < frameBufferingCount; i++) { if (auto createResult = device->createSemaphoreUnique(semaphoreInfo); createResult.result == vk::Result::eSuccess) { swapImageFreeSemaphore[i] = std::move(createResult.value); + + Vulkan::setObjectName(device.get(), swapImageFreeSemaphore[i].get(), "swapImageFreeSemaphore#%zu", i); } else { Helpers::panic("Error creating 'present-ready' semaphore: %s\n", vk::to_string(createResult.result).c_str()); } if (auto createResult = device->createSemaphoreUnique(semaphoreInfo); createResult.result == vk::Result::eSuccess) { renderFinishedSemaphore[i] = std::move(createResult.value); + + Vulkan::setObjectName(device.get(), renderFinishedSemaphore[i].get(), "renderFinishedSemaphore#%zu", i); } else { Helpers::panic("Error creating 'post-render' semaphore: %s\n", vk::to_string(createResult.result).c_str()); } From d4b75deaf8a2aa48bb255a5c3d1b980f2fa7118d Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Thu, 17 Aug 2023 22:29:40 -0700 Subject: [PATCH 23/45] Remove separate presentation/graphics command buffers Now there is just one primary command buffer where all work is enqueued into. At the end of the frame, the next frame's CPU-side fence is waited on before resetting and beginning its command buffer again for recording. This command buffer must always be in the RECORDING state. --- include/renderer_vk/renderer_vk.hpp | 5 ++- src/core/renderer_vk/renderer_vk.cpp | 51 +++++++++------------------- 2 files changed, 18 insertions(+), 38 deletions(-) diff --git a/include/renderer_vk/renderer_vk.hpp b/include/renderer_vk/renderer_vk.hpp index 36b7d429..e0d932a9 100644 --- a/include/renderer_vk/renderer_vk.hpp +++ b/include/renderer_vk/renderer_vk.hpp @@ -43,14 +43,13 @@ class RendererVK final : public Renderer { // Frame-buffering data // Each vector is `frameBufferingCount` in size - std::vector framePresentCommandBuffers = {}; std::vector swapImageFreeSemaphore = {}; std::vector renderFinishedSemaphore = {}; std::vector frameFinishedFences = {}; std::vector> frameFramebuffers = {}; - std::vector frameGraphicsCommandBuffers = {}; + std::vector frameCommandBuffers = {}; - const vk::CommandBuffer& getCurrentCommandBuffer() const { return frameGraphicsCommandBuffers[frameBufferingIndex].get(); } + const vk::CommandBuffer& getCurrentCommandBuffer() const { return frameCommandBuffers[frameBufferingIndex].get(); } // Todo: // Use `{colourBuffer,depthBuffer}Loc` to maintain an std::map-cache of framebuffers diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index d889c683..f4dadf8f 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -407,19 +407,6 @@ void RendererVK::display() { } } - if (const vk::Result endResult = getCurrentCommandBuffer().end(); endResult != vk::Result::eSuccess) { - Helpers::panic("Error ending command buffer recording: %s\n", vk::to_string(endResult).c_str()); - } - - const vk::UniqueCommandBuffer& frameCommandBuffer = framePresentCommandBuffers[frameBufferingIndex]; - - vk::CommandBufferBeginInfo beginInfo = {}; - beginInfo.flags = vk::CommandBufferUsageFlagBits::eSimultaneousUse; - - if (const vk::Result beginResult = frameCommandBuffer->begin(beginInfo); beginResult != vk::Result::eSuccess) { - Helpers::panic("Error beginning command buffer recording: %s\n", vk::to_string(beginResult).c_str()); - } - const bool topActiveFb = externalRegs[PICA::ExternalRegs::Framebuffer0Select] & 1; const u32 topScreenAddr = externalRegs[topActiveFb ? PICA::ExternalRegs::Framebuffer0AFirstAddr : PICA::ExternalRegs::Framebuffer0ASecondAddr]; @@ -431,10 +418,10 @@ void RendererVK::display() { { static const std::array frameScopeColor = {{1.0f, 0.0f, 1.0f, 1.0f}}; - Vulkan::DebugLabelScope debugScope(frameCommandBuffer.get(), frameScopeColor, "Frame"); + Vulkan::DebugLabelScope debugScope(getCurrentCommandBuffer(), frameScopeColor, "Frame"); // Prepare images for color-clear - frameCommandBuffer->pipelineBarrier( + getCurrentCommandBuffer().pipelineBarrier( vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags(), {}, {}, { vk::ImageMemoryBarrier( @@ -446,7 +433,7 @@ void RendererVK::display() { ); static const std::array topClearColor = {{1.0f, 0.0f, 0.0f, 1.0f}}; static const std::array bottomClearColor = {{0.0f, 1.0f, 0.0f, 1.0f}}; - frameCommandBuffer->clearColorImage( + getCurrentCommandBuffer().clearColorImage( screenTexture[frameBufferingIndex].get(), vk::ImageLayout::eTransferDstOptimal, topClearColor, vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) ); @@ -455,10 +442,10 @@ void RendererVK::display() { //// Present if (swapchainImageIndex != swapchainImageInvalid) { static const std::array presentScopeColor = {{1.0f, 1.0f, 0.0f, 1.0f}}; - Vulkan::DebugLabelScope debugScope(frameCommandBuffer.get(), presentScopeColor, "Present"); + Vulkan::DebugLabelScope debugScope(getCurrentCommandBuffer(), presentScopeColor, "Present"); // Prepare swapchain image for color-clear/blit-dst, prepare top/bottom screen for blit-src - frameCommandBuffer->pipelineBarrier( + getCurrentCommandBuffer().pipelineBarrier( vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags(), {}, {}, { vk::ImageMemoryBarrier( @@ -475,7 +462,7 @@ void RendererVK::display() { ); static const std::array clearColor = {{0.0f, 0.0f, 0.0f, 1.0f}}; - frameCommandBuffer->clearColorImage( + getCurrentCommandBuffer().clearColorImage( swapchainImages[swapchainImageIndex], vk::ImageLayout::eTransferDstOptimal, clearColor, vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) ); @@ -486,13 +473,13 @@ void RendererVK::display() { vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), {vk::Offset3D{}, vk::Offset3D{400, 240 * 2, 1}}, vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), {vk::Offset3D{}, vk::Offset3D{400, 240 * 2, 1}} ); - frameCommandBuffer->blitImage( + getCurrentCommandBuffer().blitImage( screenTexture[frameBufferingIndex].get(), vk::ImageLayout::eTransferSrcOptimal, swapchainImages[swapchainImageIndex], vk::ImageLayout::eTransferDstOptimal, {screenBlit}, vk::Filter::eNearest ); // Prepare swapchain image for present - frameCommandBuffer->pipelineBarrier( + getCurrentCommandBuffer().pipelineBarrier( vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eColorAttachmentOutput, vk::DependencyFlags(), {}, {}, {vk::ImageMemoryBarrier( vk::AccessFlagBits::eNone, vk::AccessFlagBits::eColorAttachmentWrite, vk::ImageLayout::eTransferDstOptimal, @@ -502,7 +489,7 @@ void RendererVK::display() { ); } - if (const vk::Result endResult = frameCommandBuffer->end(); endResult != vk::Result::eSuccess) { + if (const vk::Result endResult = getCurrentCommandBuffer().end(); endResult != vk::Result::eSuccess) { Helpers::panic("Error ending command buffer recording: %s\n", vk::to_string(endResult).c_str()); } @@ -529,7 +516,7 @@ void RendererVK::display() { // Signal when finished submitInfo.setSignalSemaphores(renderFinishedSemaphore[frameBufferingIndex].get()); - submitInfo.setCommandBuffers(frameCommandBuffer.get()); + submitInfo.setCommandBuffers(getCurrentCommandBuffer()); device->resetFences({frameFinishedFences[frameBufferingIndex].get()}); @@ -842,23 +829,17 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { commandBuffersInfo.commandBufferCount = frameBufferingCount; if (auto allocateResult = device->allocateCommandBuffersUnique(commandBuffersInfo); allocateResult.result == vk::Result::eSuccess) { - framePresentCommandBuffers = std::move(allocateResult.value); - } else { - Helpers::panic("Error allocating command buffer: %s\n", vk::to_string(allocateResult.result).c_str()); - } - - if (auto allocateResult = device->allocateCommandBuffersUnique(commandBuffersInfo); allocateResult.result == vk::Result::eSuccess) { - frameGraphicsCommandBuffers = std::move(allocateResult.value); + frameCommandBuffers = std::move(allocateResult.value); } else { Helpers::panic("Error allocating command buffer: %s\n", vk::to_string(allocateResult.result).c_str()); } + // Initialize the first command buffer to be in the RECORDING state vk::CommandBufferBeginInfo beginInfo = {}; beginInfo.flags = vk::CommandBufferUsageFlagBits::eSimultaneousUse; - for (const auto& graphicsCommandBuffer : frameGraphicsCommandBuffers) { - if (const vk::Result beginResult = graphicsCommandBuffer->begin(beginInfo); beginResult != vk::Result::eSuccess) { - Helpers::panic("Error beginning command buffer recording: %s\n", vk::to_string(beginResult).c_str()); - } + + if (const vk::Result beginResult = frameCommandBuffers[frameBufferingIndex]->begin(beginInfo); beginResult != vk::Result::eSuccess) { + Helpers::panic("Error beginning command buffer recording: %s\n", vk::to_string(beginResult).c_str()); } // Frame-buffering synchronization primitives @@ -871,7 +852,7 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { renderFinishedSemaphore.resize(frameBufferingCount); frameFinishedFences.resize(frameBufferingCount); frameFramebuffers.resize(frameBufferingCount); - frameGraphicsCommandBuffers.resize(frameBufferingCount); + frameCommandBuffers.resize(frameBufferingCount); vk::ImageCreateInfo screenTextureInfo = {}; screenTextureInfo.setImageType(vk::ImageType::e2D); From 4b193c8d6ba8e76e1fd4b9a545b8d786019edb8a Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Sun, 20 Aug 2023 00:26:27 -0700 Subject: [PATCH 24/45] Add general purpose vulkan render cache Takes in a general `vk::Format` rather than PICA-types --- include/renderer_vk/renderer_vk.hpp | 3 ++- src/core/renderer_vk/renderer_vk.cpp | 18 +++++++++++++----- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/include/renderer_vk/renderer_vk.hpp b/include/renderer_vk/renderer_vk.hpp index e0d932a9..98e00c53 100644 --- a/include/renderer_vk/renderer_vk.hpp +++ b/include/renderer_vk/renderer_vk.hpp @@ -83,8 +83,9 @@ class RendererVK final : public Renderer { std::vector screenTexture = {}; vk::UniqueDeviceMemory framebufferMemory = {}; - std::map renderPassCache; + std::map renderPassCache; + vk::RenderPass getRenderPass(vk::Format colorFormat, std::optional depthFormat); vk::RenderPass getRenderPass(PICA::ColorFmt colorFormat, std::optional depthFormat); // Recreate the swapchain, possibly re-using the old one in the case of a resize diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index f4dadf8f..c53d820d 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -151,11 +151,11 @@ RendererVK::Texture& RendererVK::getDepthRenderTexture(u32 addr, PICA::DepthFmt return newTexture; } -vk::RenderPass RendererVK::getRenderPass(PICA::ColorFmt colorFormat, std::optional depthFormat) { - u32 renderPassHash = static_cast(colorFormat); +vk::RenderPass RendererVK::getRenderPass(vk::Format colorFormat, std::optional depthFormat) { + u64 renderPassHash = static_cast(colorFormat); if (depthFormat.has_value()) { - renderPassHash |= (static_cast(depthFormat.value()) << 8); + renderPassHash |= (static_cast(depthFormat.value()) << 32); } // Cache hit @@ -170,7 +170,7 @@ vk::RenderPass RendererVK::getRenderPass(PICA::ColorFmt colorFormat, std::option std::vector renderPassAttachments = {}; vk::AttachmentDescription colorAttachment = {}; - colorAttachment.format = Vulkan::colorFormatToVulkan(colorFormat); + colorAttachment.format = colorFormat; colorAttachment.samples = vk::SampleCountFlagBits::e1; colorAttachment.loadOp = vk::AttachmentLoadOp::eLoad; colorAttachment.storeOp = vk::AttachmentStoreOp::eStore; @@ -182,7 +182,7 @@ vk::RenderPass RendererVK::getRenderPass(PICA::ColorFmt colorFormat, std::option if (depthFormat.has_value()) { vk::AttachmentDescription depthAttachment = {}; - depthAttachment.format = Vulkan::depthFormatToVulkan(depthFormat.value()); + depthAttachment.format = depthFormat.value(); depthAttachment.samples = vk::SampleCountFlagBits::e1; depthAttachment.loadOp = vk::AttachmentLoadOp::eLoad; depthAttachment.storeOp = vk::AttachmentStoreOp::eStore; @@ -229,6 +229,14 @@ vk::RenderPass RendererVK::getRenderPass(PICA::ColorFmt colorFormat, std::option return {}; } +vk::RenderPass RendererVK::getRenderPass(PICA::ColorFmt colorFormat, std::optional depthFormat) { + if (depthFormat.has_value()) { + return getRenderPass(Vulkan::colorFormatToVulkan(colorFormat), Vulkan::depthFormatToVulkan(depthFormat.value())); + } else { + return getRenderPass(Vulkan::colorFormatToVulkan(colorFormat), {}); + } +} + vk::Result RendererVK::recreateSwapchain(vk::SurfaceKHR surface, vk::Extent2D swapchainExtent) { static constexpr u32 screenTextureWidth = 400; // Top screen is 400 pixels wide, bottom is 320 static constexpr u32 screenTextureHeight = 2 * 240; // Both screens are 240 pixels tall From 13e52ac047c825976c5b33e6b01124c92f30e3ae Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Sun, 20 Aug 2023 00:27:27 -0700 Subject: [PATCH 25/45] Add shader-module loading Loads spirv files from the embedded virtual file system and loads them into vulkan shader-modules. --- src/core/renderer_vk/renderer_vk.cpp | 30 ++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index c53d820d..a03b999a 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -1,5 +1,6 @@ #include "renderer_vk/renderer_vk.hpp" +#include #include #include #include @@ -10,6 +11,26 @@ #include "renderer_vk/vk_memory.hpp" #include "renderer_vk/vk_pica.hpp" +CMRC_DECLARE(RendererVK); + +static vk::UniqueShaderModule createShaderModule(vk::Device device, std::span shaderCode) { + vk::ShaderModuleCreateInfo shaderModuleInfo = {}; + shaderModuleInfo.pCode = reinterpret_cast(shaderCode.data()); + shaderModuleInfo.codeSize = shaderCode.size(); + + vk::UniqueShaderModule shaderModule = {}; + if (auto createResult = device.createShaderModuleUnique(shaderModuleInfo); createResult.result == vk::Result::eSuccess) { + shaderModule = std::move(createResult.value); + } else { + Helpers::panic("Error creating shader module: %s\n", vk::to_string(createResult.result).c_str()); + } + return shaderModule; +} + +static inline vk::UniqueShaderModule createShaderModule(vk::Device device, cmrc::file shaderFile) { + return createShaderModule(device, std::span(reinterpret_cast(shaderFile.begin()), shaderFile.size())); +} + // Finds the first queue family that satisfies `queueMask` and excludes `queueExcludeMask` bits // Returns -1 if not found // Todo: Smarter selection for present/graphics/compute/transfer @@ -921,6 +942,15 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { Helpers::panic("Error allocating framebuffer memory: %s\n", vk::to_string(result).c_str()); } } + + auto vk_resources = cmrc::RendererVK::get_filesystem(); + auto displayVertexShader = vk_resources.open("vulkan_display.vert.spv"); + auto displayFragmentShader = vk_resources.open("vulkan_display.frag.spv"); + + vk::UniqueShaderModule displayVertexShaderModule = createShaderModule(device.get(), displayVertexShader); + vk::UniqueShaderModule displayFragmentShaderModule = createShaderModule(device.get(), displayFragmentShader); + + vk::RenderPass screenTextureRenderPass = getRenderPass(screenTextureInfo.format, {}); } void RendererVK::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) {} From 89f3eb3a8783809c6f2277e6db6cbc4c7f4c3203 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Sun, 20 Aug 2023 00:36:51 -0700 Subject: [PATCH 26/45] Add simple graphics pipeline creation This will eventually need to move into a cache, but this is introducing some foundational patterns so that we know how to properly design a pipeline cache. Currently does not provide a `DescriptorSetLayout` argument. Causing validation errors. This will be a general-case cache right from the get-go. --- src/core/renderer_vk/renderer_vk.cpp | 154 +++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index a03b999a..0f9aa37a 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -31,6 +31,156 @@ static inline vk::UniqueShaderModule createShaderModule(vk::Device device, cmrc: return createShaderModule(device, std::span(reinterpret_cast(shaderFile.begin()), shaderFile.size())); } +std::tuple createGraphicsPipeline( + vk::Device device, std::span pushConstants, std::span setLayouts, + vk::ShaderModule vertModule, vk::ShaderModule fragModule, std::span vertexBindingDescriptions, + std::span vertexAttributeDescriptions, vk::RenderPass renderPass +) { + // Create Pipeline Layout + vk::PipelineLayoutCreateInfo graphicsPipelineLayoutInfo = {}; + + graphicsPipelineLayoutInfo.pSetLayouts = setLayouts.data(); + graphicsPipelineLayoutInfo.setLayoutCount = setLayouts.size(); + graphicsPipelineLayoutInfo.pPushConstantRanges = pushConstants.data(); + graphicsPipelineLayoutInfo.pushConstantRangeCount = pushConstants.size(); + + vk::UniquePipelineLayout graphicsPipelineLayout = {}; + if (auto createResult = device.createPipelineLayoutUnique(graphicsPipelineLayoutInfo); createResult.result == vk::Result::eSuccess) { + graphicsPipelineLayout = std::move(createResult.value); + } else { + Helpers::panic("Error creating pipeline layout: %s\n", vk::to_string(createResult.result).c_str()); + return {}; + } + + // Describe the stage and entry point of each shader + const vk::PipelineShaderStageCreateInfo ShaderStagesInfo[2] = { + vk::PipelineShaderStageCreateInfo( + {}, // Flags + vk::ShaderStageFlagBits::eVertex, // Shader Stage + vertModule, // Shader Module + "main", // Shader entry point function name + {} // Shader specialization info + ), + vk::PipelineShaderStageCreateInfo( + {}, // Flags + vk::ShaderStageFlagBits::eFragment, // Shader Stage + fragModule, // Shader Module + "main", // Shader entry point function name + {} // Shader specialization info + ), + }; + + vk::PipelineVertexInputStateCreateInfo vertexInputState = {}; + + vertexInputState.vertexBindingDescriptionCount = vertexBindingDescriptions.size(); + vertexInputState.pVertexBindingDescriptions = vertexBindingDescriptions.data(); + + vertexInputState.vertexAttributeDescriptionCount = vertexAttributeDescriptions.size(); + vertexInputState.pVertexAttributeDescriptions = vertexAttributeDescriptions.data(); + + vk::PipelineInputAssemblyStateCreateInfo inputAssemblyState = {}; + inputAssemblyState.topology = vk::PrimitiveTopology::eTriangleList; + inputAssemblyState.primitiveRestartEnable = false; + + vk::PipelineViewportStateCreateInfo viewportState = {}; + + static const vk::Viewport defaultViewport = {0, 0, 16, 16, 0.0f, 1.0f}; + static const vk::Rect2D defaultScissor = {{0, 0}, {16, 16}}; + viewportState.viewportCount = 1; + viewportState.pViewports = &defaultViewport; + viewportState.scissorCount = 1; + viewportState.pScissors = &defaultScissor; + + vk::PipelineRasterizationStateCreateInfo rasterizationState = {}; + + rasterizationState.depthClampEnable = false; + rasterizationState.rasterizerDiscardEnable = false; + rasterizationState.polygonMode = vk::PolygonMode::eFill; + rasterizationState.cullMode = vk::CullModeFlagBits::eBack; + rasterizationState.frontFace = vk::FrontFace::eClockwise; + rasterizationState.depthBiasEnable = false; + rasterizationState.depthBiasConstantFactor = 0.0f; + rasterizationState.depthBiasClamp = 0.0f; + rasterizationState.depthBiasSlopeFactor = 0.0; + rasterizationState.lineWidth = 1.0f; + + vk::PipelineMultisampleStateCreateInfo multisampleState = {}; + + multisampleState.rasterizationSamples = vk::SampleCountFlagBits::e1; + multisampleState.sampleShadingEnable = false; + multisampleState.minSampleShading = 1.0f; + multisampleState.pSampleMask = nullptr; + multisampleState.alphaToCoverageEnable = true; + multisampleState.alphaToOneEnable = false; + + vk::PipelineDepthStencilStateCreateInfo depthStencilState = {}; + + depthStencilState.depthTestEnable = true; + depthStencilState.depthWriteEnable = true; + depthStencilState.depthCompareOp = vk::CompareOp::eLessOrEqual; + depthStencilState.depthBoundsTestEnable = false; + depthStencilState.stencilTestEnable = false; + depthStencilState.front = vk::StencilOp::eKeep; + depthStencilState.back = vk::StencilOp::eKeep; + depthStencilState.minDepthBounds = 0.0f; + depthStencilState.maxDepthBounds = 1.0f; + + vk::PipelineColorBlendStateCreateInfo colorBlendState = {}; + + colorBlendState.logicOpEnable = false; + colorBlendState.logicOp = vk::LogicOp::eClear; + colorBlendState.attachmentCount = 1; + + vk::PipelineColorBlendAttachmentState blendAttachmentState = {}; + + blendAttachmentState.blendEnable = false; + blendAttachmentState.srcColorBlendFactor = vk::BlendFactor::eZero; + blendAttachmentState.dstColorBlendFactor = vk::BlendFactor::eZero; + blendAttachmentState.colorBlendOp = vk::BlendOp::eAdd; + blendAttachmentState.srcAlphaBlendFactor = vk::BlendFactor::eZero; + blendAttachmentState.dstAlphaBlendFactor = vk::BlendFactor::eZero; + blendAttachmentState.alphaBlendOp = vk::BlendOp::eAdd; + blendAttachmentState.colorWriteMask = + vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA; + + colorBlendState.pAttachments = &blendAttachmentState; + + vk::PipelineDynamicStateCreateInfo dynamicState = {}; + static vk::DynamicState dynamicStates[] = {// The viewport and scissor of the framebuffer will be dynamic at + // run-time + vk::DynamicState::eViewport, vk::DynamicState::eScissor}; + dynamicState.dynamicStateCount = std::size(dynamicStates); + dynamicState.pDynamicStates = dynamicStates; + + vk::GraphicsPipelineCreateInfo renderPipelineInfo = {}; + + renderPipelineInfo.stageCount = 2; // Vert + Frag stages + renderPipelineInfo.pStages = ShaderStagesInfo; + renderPipelineInfo.pVertexInputState = &vertexInputState; + renderPipelineInfo.pInputAssemblyState = &inputAssemblyState; + renderPipelineInfo.pViewportState = &viewportState; + renderPipelineInfo.pRasterizationState = &rasterizationState; + renderPipelineInfo.pMultisampleState = &multisampleState; + renderPipelineInfo.pDepthStencilState = &depthStencilState; + renderPipelineInfo.pColorBlendState = &colorBlendState; + renderPipelineInfo.pDynamicState = &dynamicState; + renderPipelineInfo.subpass = 0; + renderPipelineInfo.renderPass = renderPass; + renderPipelineInfo.layout = graphicsPipelineLayout.get(); + + // Create Pipeline + vk::UniquePipeline pipeline = {}; + + if (auto createResult = device.createGraphicsPipelineUnique({}, renderPipelineInfo); createResult.result == vk::Result::eSuccess) { + pipeline = std::move(createResult.value); + } else { + Helpers::panic("Error creating graphics pipeline: %s\n", vk::to_string(createResult.result).c_str()); + return {}; + } + + return std::make_tuple(std::move(pipeline), std::move(graphicsPipelineLayout)); +} + // Finds the first queue family that satisfies `queueMask` and excludes `queueExcludeMask` bits // Returns -1 if not found // Todo: Smarter selection for present/graphics/compute/transfer @@ -951,6 +1101,10 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { vk::UniqueShaderModule displayFragmentShaderModule = createShaderModule(device.get(), displayFragmentShader); vk::RenderPass screenTextureRenderPass = getRenderPass(screenTextureInfo.format, {}); + + auto [pipeline, pipelineLayout] = createGraphicsPipeline( + device.get(), {}, {}, displayVertexShaderModule.get(), displayFragmentShaderModule.get(), {}, {}, screenTextureRenderPass + ); } void RendererVK::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) {} From 6ebbd80286390737749e546caaf4f87a8b6377cf Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Sun, 20 Aug 2023 21:03:15 -0700 Subject: [PATCH 27/45] Add Display-pipeline as member variables --- include/renderer_vk/renderer_vk.hpp | 3 +++ src/core/renderer_vk/renderer_vk.cpp | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/renderer_vk/renderer_vk.hpp b/include/renderer_vk/renderer_vk.hpp index 98e00c53..5f216b7d 100644 --- a/include/renderer_vk/renderer_vk.hpp +++ b/include/renderer_vk/renderer_vk.hpp @@ -88,6 +88,9 @@ class RendererVK final : public Renderer { vk::RenderPass getRenderPass(vk::Format colorFormat, std::optional depthFormat); vk::RenderPass getRenderPass(PICA::ColorFmt colorFormat, std::optional depthFormat); + vk::UniquePipeline displayPipeline; + vk::UniquePipelineLayout displayPipelineLayout; + // Recreate the swapchain, possibly re-using the old one in the case of a resize vk::Result recreateSwapchain(vk::SurfaceKHR surface, vk::Extent2D swapchainExtent); diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index 0f9aa37a..3e1dd64a 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -1102,7 +1102,7 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { vk::RenderPass screenTextureRenderPass = getRenderPass(screenTextureInfo.format, {}); - auto [pipeline, pipelineLayout] = createGraphicsPipeline( + std::tie(displayPipeline, displayPipelineLayout) = createGraphicsPipeline( device.get(), {}, {}, displayVertexShaderModule.get(), displayFragmentShaderModule.get(), {}, {}, screenTextureRenderPass ); } From 72c77e41b4d577799f3655946fcfaa69b049970f Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Sun, 20 Aug 2023 21:29:37 -0700 Subject: [PATCH 28/45] Draft Vulkan DescriptorHeap A utility class from a personal project for managing a heap of descriptors of a particular layout. Allows the display graphics pipeline to be successfully created, satisfying its descriptor layout issues. --- CMakeLists.txt | 10 +- include/renderer_vk/renderer_vk.hpp | 2 + include/renderer_vk/vk_descriptor_heap.hpp | 49 ++++++++ src/core/renderer_vk/renderer_vk.cpp | 14 ++- src/core/renderer_vk/vk_descriptor_heap.cpp | 119 ++++++++++++++++++++ 5 files changed, 189 insertions(+), 5 deletions(-) create mode 100644 include/renderer_vk/vk_descriptor_heap.hpp create mode 100644 src/core/renderer_vk/vk_descriptor_heap.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 81f3a1eb..a12cf337 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -239,13 +239,15 @@ if(ENABLE_VULKAN) ) set(RENDERER_VK_INCLUDE_FILES include/renderer_vk/renderer_vk.hpp - include/renderer_vk/vk_api.hpp include/renderer_vk/vk_debug.hpp include/renderer_vk/vk_memory.hpp - include/renderer_vk/vk_pica.hpp + include/renderer_vk/vk_api.hpp include/renderer_vk/vk_debug.hpp + include/renderer_vk/vk_descriptor_heap.hpp + include/renderer_vk/vk_memory.hpp include/renderer_vk/vk_pica.hpp ) set(RENDERER_VK_SOURCE_FILES src/core/renderer_vk/renderer_vk.cpp - src/core/renderer_vk/vk_api.cpp src/core/renderer_vk/vk_debug.cpp src/core/renderer_vk/vk_memory.cpp - src/core/renderer_vk/vk_pica.cpp + src/core/renderer_vk/vk_api.cpp src/core/renderer_vk/vk_debug.cpp + src/core/renderer_vk/vk_descriptor_heap.cpp + src/core/renderer_vk/vk_memory.cpp src/core/renderer_vk/vk_pica.cpp ) set(HEADER_FILES ${HEADER_FILES} ${RENDERER_VK_INCLUDE_FILES}) diff --git a/include/renderer_vk/renderer_vk.hpp b/include/renderer_vk/renderer_vk.hpp index 5f216b7d..e93a97b1 100644 --- a/include/renderer_vk/renderer_vk.hpp +++ b/include/renderer_vk/renderer_vk.hpp @@ -4,6 +4,7 @@ #include "math_util.hpp" #include "renderer.hpp" #include "vk_api.hpp" +#include "vk_descriptor_heap.hpp" class GPU; @@ -88,6 +89,7 @@ class RendererVK final : public Renderer { vk::RenderPass getRenderPass(vk::Format colorFormat, std::optional depthFormat); vk::RenderPass getRenderPass(PICA::ColorFmt colorFormat, std::optional depthFormat); + std::unique_ptr displayDescriptorHeap; vk::UniquePipeline displayPipeline; vk::UniquePipelineLayout displayPipelineLayout; diff --git a/include/renderer_vk/vk_descriptor_heap.hpp b/include/renderer_vk/vk_descriptor_heap.hpp new file mode 100644 index 00000000..8a9630e3 --- /dev/null +++ b/include/renderer_vk/vk_descriptor_heap.hpp @@ -0,0 +1,49 @@ +#pragma once + +#include +#include + +#include "helpers.hpp" +#include "vk_api.hpp" + +namespace Vulkan { + // Implements a basic heap of descriptor sets given a layout of particular + // bindings. Create a descriptor set by providing a list of bindings and it will + // automatically create both the pool, layout, and maintail a heap of descriptor + // sets. Descriptor sets will be reused and recycled. Assume that newly + // allocated descriptor sets are in an undefined state. + class DescriptorHeap { + private: + const vk::Device device; + + vk::UniqueDescriptorPool descriptorPool; + vk::UniqueDescriptorSetLayout descriptorSetLayout; + std::vector descriptorSets; + + std::vector bindings; + + std::vector allocationMap; + + explicit DescriptorHeap(vk::Device device); + + public: + ~DescriptorHeap() = default; + + DescriptorHeap(DescriptorHeap&&) = default; + + const vk::DescriptorPool& getDescriptorPool() const { return descriptorPool.get(); }; + + const vk::DescriptorSetLayout& getDescriptorSetLayout() const { return descriptorSetLayout.get(); }; + + const std::span getDescriptorSets() const { return descriptorSets; }; + + std::span getBindings() const { return bindings; }; + + std::optional allocateDescriptorSet(); + bool freeDescriptorSet(vk::DescriptorSet set); + + static std::optional create( + vk::Device device, std::span bindings, u16 descriptorHeapCount = 1024 + ); + }; +} // namespace Vulkan \ No newline at end of file diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index 3e1dd64a..49543bc5 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -1093,6 +1093,17 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { } } + static vk::DescriptorSetLayoutBinding displayShaderLayout[] = { + {// Just a singular texture slot + 0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, + }; + + if (auto createResult = Vulkan::DescriptorHeap::create(device.get(), displayShaderLayout); createResult.has_value()) { + displayDescriptorHeap = std::make_unique(std::move(createResult.value())); + } else { + Helpers::panic("Error creating descriptor heap\n"); + } + auto vk_resources = cmrc::RendererVK::get_filesystem(); auto displayVertexShader = vk_resources.open("vulkan_display.vert.spv"); auto displayFragmentShader = vk_resources.open("vulkan_display.frag.spv"); @@ -1103,7 +1114,8 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { vk::RenderPass screenTextureRenderPass = getRenderPass(screenTextureInfo.format, {}); std::tie(displayPipeline, displayPipelineLayout) = createGraphicsPipeline( - device.get(), {}, {}, displayVertexShaderModule.get(), displayFragmentShaderModule.get(), {}, {}, screenTextureRenderPass + device.get(), {}, {{displayDescriptorHeap.get()->getDescriptorSetLayout()}}, displayVertexShaderModule.get(), + displayFragmentShaderModule.get(), {}, {}, screenTextureRenderPass ); } diff --git a/src/core/renderer_vk/vk_descriptor_heap.cpp b/src/core/renderer_vk/vk_descriptor_heap.cpp new file mode 100644 index 00000000..ecf71d92 --- /dev/null +++ b/src/core/renderer_vk/vk_descriptor_heap.cpp @@ -0,0 +1,119 @@ +#include "renderer_vk/vk_descriptor_heap.hpp" + +#include +#include +#include + +namespace Vulkan { + + DescriptorHeap::DescriptorHeap(vk::Device device) : device(device) {} + + std::optional DescriptorHeap::allocateDescriptorSet() { + // Find a free slot + const auto freeSlot = std::find(allocationMap.begin(), allocationMap.end(), false); + + // If there is no free slot, return + if (freeSlot == allocationMap.end()) { + return std::nullopt; + } + + // Mark the slot as allocated + *freeSlot = true; + + const u16 index = static_cast(std::distance(allocationMap.begin(), freeSlot)); + + vk::UniqueDescriptorSet& newDescriptorSet = descriptorSets[index]; + + if (!newDescriptorSet) { + // Descriptor set doesn't exist yet. Allocate a new one + vk::DescriptorSetAllocateInfo allocateInfo = {}; + + allocateInfo.descriptorPool = descriptorPool.get(); + allocateInfo.pSetLayouts = &descriptorSetLayout.get(); + allocateInfo.descriptorSetCount = 1; + + if (auto AllocateResult = device.allocateDescriptorSetsUnique(allocateInfo); AllocateResult.result == vk::Result::eSuccess) { + newDescriptorSet = std::move(AllocateResult.value[0]); + } else { + // Error allocating descriptor set + return std::nullopt; + } + } + + return newDescriptorSet.get(); + } + + bool DescriptorHeap::freeDescriptorSet(vk::DescriptorSet Set) { + // Find the descriptor set + const auto found = + std::find_if(descriptorSets.begin(), descriptorSets.end(), [&Set](const auto& CurSet) -> bool { return CurSet.get() == Set; }); + + // If the descriptor set is not found, return + if (found == descriptorSets.end()) { + return false; + } + + // Mark the slot as free + const u16 index = static_cast(std::distance(descriptorSets.begin(), found)); + + allocationMap[index] = false; + + return true; + } + + std::optional DescriptorHeap::create( + vk::Device device, std::span bindings, u16 descriptorHeapCount + ) { + DescriptorHeap newDescriptorHeap(device); + + // Create a histogram of each of the descriptor types and how many of each + // the pool should have + // Todo: maybe keep this around as a hash table to do more dynamic + // allocations of descriptor sets rather than allocating them all up-front + std::vector poolSizes; + { + std::unordered_map descriptorTypeCounts; + + for (const auto& binding : bindings) { + descriptorTypeCounts[binding.descriptorType] += binding.descriptorCount; + } + for (const auto& descriptorTypeCount : descriptorTypeCounts) { + poolSizes.push_back(vk::DescriptorPoolSize(descriptorTypeCount.first, descriptorTypeCount.second * descriptorHeapCount)); + } + } + + // Create descriptor pool + { + vk::DescriptorPoolCreateInfo poolInfo; + poolInfo.flags = vk::DescriptorPoolCreateFlagBits::eFreeDescriptorSet; + poolInfo.maxSets = descriptorHeapCount; + poolInfo.pPoolSizes = poolSizes.data(); + poolInfo.poolSizeCount = poolSizes.size(); + if (auto createResult = device.createDescriptorPoolUnique(poolInfo); createResult.result == vk::Result::eSuccess) { + newDescriptorHeap.descriptorPool = std::move(createResult.value); + } else { + return std::nullopt; + } + } + + // Create descriptor set layout + { + vk::DescriptorSetLayoutCreateInfo layoutInfo; + layoutInfo.pBindings = bindings.data(); + layoutInfo.bindingCount = bindings.size(); + + if (auto createResult = device.createDescriptorSetLayoutUnique(layoutInfo); createResult.result == vk::Result::eSuccess) { + newDescriptorHeap.descriptorSetLayout = std::move(createResult.value); + } else { + return std::nullopt; + } + } + + newDescriptorHeap.descriptorSets.resize(descriptorHeapCount); + newDescriptorHeap.allocationMap.resize(descriptorHeapCount); + + newDescriptorHeap.bindings.assign(bindings.begin(), bindings.end()); + + return {std::move(newDescriptorHeap)}; + } +} // namespace Vulkan \ No newline at end of file From 4b7bd9df3d32d17942a332780bb4965332dcc935 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Sun, 20 Aug 2023 22:06:46 -0700 Subject: [PATCH 29/45] Add Vulkan Descriptor-Update batching Allows multiple descriptor operations to be batched up and dispatched in one API call rather than scattered through out the code base. --- CMakeLists.txt | 2 + .../vk_descriptor_update_batch.hpp | 62 ++++++++++++ .../vk_descriptor_update_batch.cpp | 98 +++++++++++++++++++ 3 files changed, 162 insertions(+) create mode 100644 include/renderer_vk/vk_descriptor_update_batch.hpp create mode 100644 src/core/renderer_vk/vk_descriptor_update_batch.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index a12cf337..1218b97b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -241,12 +241,14 @@ if(ENABLE_VULKAN) set(RENDERER_VK_INCLUDE_FILES include/renderer_vk/renderer_vk.hpp include/renderer_vk/vk_api.hpp include/renderer_vk/vk_debug.hpp include/renderer_vk/vk_descriptor_heap.hpp + include/renderer_vk/vk_descriptor_update_batch.hpp include/renderer_vk/vk_memory.hpp include/renderer_vk/vk_pica.hpp ) set(RENDERER_VK_SOURCE_FILES src/core/renderer_vk/renderer_vk.cpp src/core/renderer_vk/vk_api.cpp src/core/renderer_vk/vk_debug.cpp src/core/renderer_vk/vk_descriptor_heap.cpp + src/core/renderer_vk/vk_descriptor_update_batch.cpp src/core/renderer_vk/vk_memory.cpp src/core/renderer_vk/vk_pica.cpp ) diff --git a/include/renderer_vk/vk_descriptor_update_batch.hpp b/include/renderer_vk/vk_descriptor_update_batch.hpp new file mode 100644 index 00000000..1a10214d --- /dev/null +++ b/include/renderer_vk/vk_descriptor_update_batch.hpp @@ -0,0 +1,62 @@ +#pragma once + +#include +#include +#include + +#include "helpers.hpp" +#include "vk_api.hpp" + +namespace Vulkan { + // Implements a re-usable structure for batching up descriptor writes with a + // finite amount of space for both convenience and to reduce the overall amount + // of API calls to `vkUpdateDescriptorSets` + class DescriptorUpdateBatch { + private: + const vk::Device device; + + const usize descriptorWriteMax; + const usize descriptorCopyMax; + + using DescriptorInfoUnion = std::variant; + + // Todo: Maybe some kind of hash so that these structures can be re-used + // among descriptor writes. + std::unique_ptr descriptorInfos; + std::unique_ptr descriptorWrites; + std::unique_ptr descriptorCopies; + + usize descriptorWriteEnd = 0; + usize descriptorCopyEnd = 0; + + DescriptorUpdateBatch(vk::Device device, usize descriptorWriteMax, usize descriptorCopyMax) + : device(device), descriptorWriteMax(descriptorWriteMax), descriptorCopyMax(descriptorCopyMax) {} + + public: + ~DescriptorUpdateBatch() = default; + + DescriptorUpdateBatch(DescriptorUpdateBatch&&) = default; + + void flush(); + + void addImage( + vk::DescriptorSet targetDescriptor, u8 targetBinding, vk::ImageView imageView, vk::ImageLayout imageLayout = vk::ImageLayout::eGeneral + ); + void addSampler(vk::DescriptorSet targetDescriptor, u8 targetBinding, vk::Sampler sampler); + + void addImageSampler( + vk::DescriptorSet targetDescriptor, u8 targetBinding, vk::ImageView imageView, vk::Sampler sampler, + vk::ImageLayout imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal + ); + void addBuffer( + vk::DescriptorSet targetDescriptor, u8 targetBinding, vk::Buffer buffer, vk::DeviceSize offset, vk::DeviceSize size = VK_WHOLE_SIZE + ); + + void copyBinding( + vk::DescriptorSet sourceDescriptor, vk::DescriptorSet targetDescriptor, u8 sourceBinding, u8 targetBinding, u8 sourceArrayElement = 0, + u8 targetArrayElement = 0, u8 descriptorCount = 1 + ); + + static std::optional create(vk::Device device, usize descriptorWriteMax = 256, usize descriptorCopyMax = 256); + }; +} // namespace Vulkan \ No newline at end of file diff --git a/src/core/renderer_vk/vk_descriptor_update_batch.cpp b/src/core/renderer_vk/vk_descriptor_update_batch.cpp new file mode 100644 index 00000000..a414ca2d --- /dev/null +++ b/src/core/renderer_vk/vk_descriptor_update_batch.cpp @@ -0,0 +1,98 @@ +#include "renderer_vk/vk_descriptor_update_batch.hpp" + +#include +#include + +namespace Vulkan { + + void DescriptorUpdateBatch::flush() { + device.updateDescriptorSets({std::span(descriptorWrites.get(), descriptorWriteEnd)}, {std::span(descriptorCopies.get(), descriptorCopyEnd)}); + + descriptorWriteEnd = 0; + descriptorCopyEnd = 0; + } + + void DescriptorUpdateBatch::addImage(vk::DescriptorSet targetDescriptor, u8 targetBinding, vk::ImageView imageView, vk::ImageLayout imageLayout) { + if (descriptorWriteEnd >= descriptorWriteMax) { + flush(); + } + + const auto& imageInfo = descriptorInfos[descriptorWriteEnd].emplace(vk::Sampler(), imageView, imageLayout); + + descriptorWrites[descriptorWriteEnd] = + vk::WriteDescriptorSet(targetDescriptor, targetBinding, 0, 1, vk::DescriptorType::eSampledImage, &imageInfo, nullptr, nullptr); + + ++descriptorWriteEnd; + } + + void DescriptorUpdateBatch::addSampler(vk::DescriptorSet targetDescriptor, u8 targetBinding, vk::Sampler sampler) { + if (descriptorWriteEnd >= descriptorWriteMax) { + flush(); + } + + const auto& imageInfo = descriptorInfos[descriptorWriteEnd].emplace(sampler, vk::ImageView(), vk::ImageLayout()); + + descriptorWrites[descriptorWriteEnd] = + vk::WriteDescriptorSet(targetDescriptor, targetBinding, 0, 1, vk::DescriptorType::eSampler, &imageInfo, nullptr, nullptr); + + ++descriptorWriteEnd; + } + + void DescriptorUpdateBatch::addImageSampler( + vk::DescriptorSet targetDescriptor, u8 targetBinding, vk::ImageView imageView, vk::Sampler sampler, vk::ImageLayout imageLayout + ) { + if (descriptorWriteEnd >= descriptorWriteMax) { + flush(); + } + + const auto& imageInfo = descriptorInfos[descriptorWriteEnd].emplace(sampler, imageView, imageLayout); + + descriptorWrites[descriptorWriteEnd] = + vk::WriteDescriptorSet(targetDescriptor, targetBinding, 0, 1, vk::DescriptorType::eCombinedImageSampler, &imageInfo, nullptr, nullptr); + + ++descriptorWriteEnd; + } + + void DescriptorUpdateBatch::addBuffer( + vk::DescriptorSet targetDescriptor, u8 targetBinding, vk::Buffer buffer, vk::DeviceSize offset, vk::DeviceSize size + ) { + if (descriptorWriteEnd >= descriptorWriteMax) { + flush(); + } + + const auto& bufferInfo = descriptorInfos[descriptorWriteEnd].emplace(buffer, offset, size); + + descriptorWrites[descriptorWriteEnd] = + vk::WriteDescriptorSet(targetDescriptor, targetBinding, 0, 1, vk::DescriptorType::eStorageImage, nullptr, &bufferInfo, nullptr); + + ++descriptorWriteEnd; + } + + void DescriptorUpdateBatch::copyBinding( + vk::DescriptorSet sourceDescriptor, vk::DescriptorSet targetDescriptor, u8 sourceBinding, u8 targetBinding, u8 sourceArrayElement, + u8 targetArrayElement, u8 descriptorCount + ) { + if (descriptorCopyEnd >= descriptorCopyMax) { + flush(); + } + + descriptorCopies[descriptorCopyEnd] = vk::CopyDescriptorSet( + sourceDescriptor, sourceBinding, sourceArrayElement, targetDescriptor, targetBinding, targetArrayElement, descriptorCount + ); + + ++descriptorCopyEnd; + } + + std::optional DescriptorUpdateBatch::create(vk::Device device, usize descriptorWriteMax, usize descriptorCopyMax) + + { + DescriptorUpdateBatch newDescriptorUpdateBatch(device, descriptorWriteMax, descriptorCopyMax); + + newDescriptorUpdateBatch.descriptorInfos = std::make_unique(descriptorWriteMax); + newDescriptorUpdateBatch.descriptorWrites = std::make_unique(descriptorWriteMax); + newDescriptorUpdateBatch.descriptorCopies = std::make_unique(descriptorCopyMax); + + return {std::move(newDescriptorUpdateBatch)}; + } + +} // namespace Vulkan \ No newline at end of file From 14b1d7d8a882028fd35a902ab2f8b3bf3070269b Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Sun, 20 Aug 2023 23:01:12 -0700 Subject: [PATCH 30/45] Add display-shader presentation Uses the graphics pipeline to both blit and transpose the 3ds-formatted textures. Does not read from the actual texture just yet since we dont write to the descriptor just yet. Some other patterns need to line up before then. --- include/renderer_vk/renderer_vk.hpp | 10 ++- src/core/renderer_vk/renderer_vk.cpp | 109 +++++++++++++++++++++++---- 2 files changed, 103 insertions(+), 16 deletions(-) diff --git a/include/renderer_vk/renderer_vk.hpp b/include/renderer_vk/renderer_vk.hpp index e93a97b1..d2723e35 100644 --- a/include/renderer_vk/renderer_vk.hpp +++ b/include/renderer_vk/renderer_vk.hpp @@ -5,6 +5,7 @@ #include "renderer.hpp" #include "vk_api.hpp" #include "vk_descriptor_heap.hpp" +#include "vk_descriptor_update_batch.hpp" class GPU; @@ -72,7 +73,7 @@ class RendererVK final : public Renderer { } }; // Hash(loc, size, format) -> Texture - std::map textureCache; + std::map textureCache; static u32 colorBufferHash(u32 loc, u32 size, PICA::ColorFmt format); static u32 depthBufferHash(u32 loc, u32 size, PICA::DepthFmt format); @@ -82,6 +83,8 @@ class RendererVK final : public Renderer { // Framebuffer for the top/bottom image std::vector screenTexture = {}; + std::vector screenTextureViews = {}; + std::vector screenTextureFramebuffers = {}; vk::UniqueDeviceMemory framebufferMemory = {}; std::map renderPassCache; @@ -89,9 +92,14 @@ class RendererVK final : public Renderer { vk::RenderPass getRenderPass(vk::Format colorFormat, std::optional depthFormat); vk::RenderPass getRenderPass(PICA::ColorFmt colorFormat, std::optional depthFormat); + std::unique_ptr descriptorUpdateBatch; + + // Display pipeline data std::unique_ptr displayDescriptorHeap; vk::UniquePipeline displayPipeline; vk::UniquePipelineLayout displayPipelineLayout; + std::vector topDisplayPipelineDescriptorSet; + std::vector bottomDisplayPipelineDescriptorSet; // Recreate the swapchain, possibly re-using the old one in the case of a resize vk::Result recreateSwapchain(vk::SurfaceKHR surface, vk::Extent2D swapchainExtent); diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index 49543bc5..0a0450bb 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -595,27 +595,53 @@ void RendererVK::display() { //// Render Frame(Simulated - just clear the images to different colors for now) { - static const std::array frameScopeColor = {{1.0f, 0.0f, 1.0f, 1.0f}}; + static const std::array renderScreenScopeColor = {{1.0f, 0.0f, 1.0f, 1.0f}}; - Vulkan::DebugLabelScope debugScope(getCurrentCommandBuffer(), frameScopeColor, "Frame"); - - // Prepare images for color-clear + Vulkan::DebugLabelScope debugScope(getCurrentCommandBuffer(), renderScreenScopeColor, "Render Screen"); + // Prepare screen texture for rendering into getCurrentCommandBuffer().pipelineBarrier( - vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags(), {}, {}, + vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eColorAttachmentOutput, vk::DependencyFlags(), {}, {}, { vk::ImageMemoryBarrier( - vk::AccessFlagBits::eMemoryRead, vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eUndefined, - vk::ImageLayout::eTransferDstOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, screenTexture[frameBufferingIndex].get(), - vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + vk::AccessFlagBits::eMemoryRead, vk::AccessFlagBits::eColorAttachmentWrite, vk::ImageLayout::eUndefined, + vk::ImageLayout::eColorAttachmentOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, + screenTexture[frameBufferingIndex].get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) ), } ); - static const std::array topClearColor = {{1.0f, 0.0f, 0.0f, 1.0f}}; - static const std::array bottomClearColor = {{0.0f, 1.0f, 0.0f, 1.0f}}; - getCurrentCommandBuffer().clearColorImage( - screenTexture[frameBufferingIndex].get(), vk::ImageLayout::eTransferDstOptimal, topClearColor, - vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) - ); + + vk::RenderPassBeginInfo renderPassBeginInfo = {}; + renderPassBeginInfo.renderPass = getRenderPass(vk::Format::eR8G8B8A8Unorm, {}); + + renderPassBeginInfo.framebuffer = screenTextureFramebuffers[frameBufferingIndex].get(); + renderPassBeginInfo.renderArea.offset = vk::Offset2D(); + renderPassBeginInfo.renderArea.extent = vk::Extent2D(400, 240 * 2); + + getCurrentCommandBuffer().beginRenderPass(renderPassBeginInfo, vk::SubpassContents::eInline); + + // Render top screen + if (topActiveFb) { + getCurrentCommandBuffer().bindDescriptorSets( + vk::PipelineBindPoint::eGraphics, displayPipelineLayout.get(), 0, {topDisplayPipelineDescriptorSet[frameBufferingIndex]}, {} + ); + getCurrentCommandBuffer().bindPipeline(vk::PipelineBindPoint::eGraphics, displayPipeline.get()); + getCurrentCommandBuffer().setViewport(0, vk::Viewport(0, 0, 400, 240)); + getCurrentCommandBuffer().setScissor(0, vk::Rect2D({0, 0}, {400, 240})); + getCurrentCommandBuffer().draw(3, 1, 0, 0); + } + + // Render bottom screen + if (bottomActiveFb) { + getCurrentCommandBuffer().bindDescriptorSets( + vk::PipelineBindPoint::eGraphics, displayPipelineLayout.get(), 0, {bottomDisplayPipelineDescriptorSet[frameBufferingIndex]}, {} + ); + getCurrentCommandBuffer().bindPipeline(vk::PipelineBindPoint::eGraphics, displayPipeline.get()); + getCurrentCommandBuffer().setViewport(0, vk::Viewport(40, 0, 320, 240)); + getCurrentCommandBuffer().setScissor(0, vk::Rect2D({40, 0}, {320, 240})); + getCurrentCommandBuffer().draw(3, 1, 0, 0); + } + + getCurrentCommandBuffer().endRenderPass(); } //// Present @@ -1049,8 +1075,10 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { screenTextureInfo.setInitialLayout(vk::ImageLayout::eUndefined); screenTexture.resize(frameBufferingCount); + screenTextureViews.resize(frameBufferingCount); + screenTextureFramebuffers.resize(frameBufferingCount); - for (usize i = 0; i < frameBufferingCount; i++) { + for (usize i = 0; i < frameBufferingCount; ++i) { if (auto createResult = device->createSemaphoreUnique(semaphoreInfo); createResult.result == vk::Result::eSuccess) { swapImageFreeSemaphore[i] = std::move(createResult.value); @@ -1093,17 +1121,68 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { } } + // Memory is bounded, create views and framebuffer for screentexture + vk::ImageViewCreateInfo screenTextureViewCreateInfo = {}; + screenTextureViewCreateInfo.viewType = vk::ImageViewType::e2D; + screenTextureViewCreateInfo.format = vk::Format::eR8G8B8A8Unorm; + screenTextureViewCreateInfo.components.r = vk::ComponentSwizzle::eR; + screenTextureViewCreateInfo.components.g = vk::ComponentSwizzle::eG; + screenTextureViewCreateInfo.components.b = vk::ComponentSwizzle::eB; + screenTextureViewCreateInfo.components.a = vk::ComponentSwizzle::eA; + screenTextureViewCreateInfo.subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + + for (usize i = 0; i < frameBufferingCount; ++i) { + screenTextureViewCreateInfo.image = screenTexture[i].get(); + + if (auto createResult = device->createImageViewUnique(screenTextureViewCreateInfo); createResult.result == vk::Result::eSuccess) { + screenTextureViews[i] = std::move(createResult.value); + } else { + Helpers::panic("Error creating screen texture view: %s\n", vk::to_string(createResult.result).c_str()); + } + + vk::FramebufferCreateInfo framebufferInfo = {}; + framebufferInfo.setRenderPass(getRenderPass(vk::Format::eR8G8B8A8Unorm, {})); + framebufferInfo.setAttachments(screenTextureViews[i].get()); + framebufferInfo.setWidth(400); + framebufferInfo.setHeight(240 * 2); + framebufferInfo.setLayers(1); + if (auto createResult = device->createFramebufferUnique(framebufferInfo); createResult.result == vk::Result::eSuccess) { + screenTextureFramebuffers[i] = std::move(createResult.value); + } else { + Helpers::panic("Error creating screen-texture framebuffer: %s\n", vk::to_string(createResult.result).c_str()); + } + } + static vk::DescriptorSetLayoutBinding displayShaderLayout[] = { {// Just a singular texture slot 0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, }; + if (auto createResult = Vulkan::DescriptorUpdateBatch::create(device.get()); createResult.has_value()) { + descriptorUpdateBatch = std::make_unique(std::move(createResult.value())); + } else { + Helpers::panic("Error creating descriptor update batch\n"); + } + if (auto createResult = Vulkan::DescriptorHeap::create(device.get(), displayShaderLayout); createResult.has_value()) { displayDescriptorHeap = std::make_unique(std::move(createResult.value())); } else { Helpers::panic("Error creating descriptor heap\n"); } + for (usize i = 0; i < frameBufferingCount; ++i) { + if (auto allocateResult = displayDescriptorHeap->allocateDescriptorSet(); allocateResult.has_value()) { + topDisplayPipelineDescriptorSet.emplace_back(allocateResult.value()); + } else { + Helpers::panic("Error creating descriptor set\n"); + } + if (auto allocateResult = displayDescriptorHeap->allocateDescriptorSet(); allocateResult.has_value()) { + bottomDisplayPipelineDescriptorSet.emplace_back(allocateResult.value()); + } else { + Helpers::panic("Error creating descriptor set\n"); + } + } + auto vk_resources = cmrc::RendererVK::get_filesystem(); auto displayVertexShader = vk_resources.open("vulkan_display.vert.spv"); auto displayFragmentShader = vk_resources.open("vulkan_display.frag.spv"); From 7a86595a1b7b558e4742bc7911932b5271c14118 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Sun, 20 Aug 2023 23:35:52 -0700 Subject: [PATCH 31/45] Add vulkan sampler cache --- CMakeLists.txt | 2 ++ include/renderer_vk/vk_sampler_cache.hpp | 28 ++++++++++++++++++++ src/core/renderer_vk/vk_sampler_cache.cpp | 31 +++++++++++++++++++++++ 3 files changed, 61 insertions(+) create mode 100644 include/renderer_vk/vk_sampler_cache.hpp create mode 100644 src/core/renderer_vk/vk_sampler_cache.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 1218b97b..b953175e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -242,6 +242,7 @@ if(ENABLE_VULKAN) include/renderer_vk/vk_api.hpp include/renderer_vk/vk_debug.hpp include/renderer_vk/vk_descriptor_heap.hpp include/renderer_vk/vk_descriptor_update_batch.hpp + include/renderer_vk/vk_sampler_cache.hpp include/renderer_vk/vk_memory.hpp include/renderer_vk/vk_pica.hpp ) @@ -249,6 +250,7 @@ if(ENABLE_VULKAN) src/core/renderer_vk/vk_api.cpp src/core/renderer_vk/vk_debug.cpp src/core/renderer_vk/vk_descriptor_heap.cpp src/core/renderer_vk/vk_descriptor_update_batch.cpp + src/core/renderer_vk/vk_sampler_cache.cpp src/core/renderer_vk/vk_memory.cpp src/core/renderer_vk/vk_pica.cpp ) diff --git a/include/renderer_vk/vk_sampler_cache.hpp b/include/renderer_vk/vk_sampler_cache.hpp new file mode 100644 index 00000000..8cb27689 --- /dev/null +++ b/include/renderer_vk/vk_sampler_cache.hpp @@ -0,0 +1,28 @@ +#pragma once + +#include +#include + +#include "helpers.hpp" +#include "vk_api.hpp" + +namespace Vulkan { + // Implements a simple pool of reusable sampler objects + class SamplerCache { + private: + const vk::Device device; + + std::unordered_map samplerMap; + + explicit SamplerCache(vk::Device device); + + public: + ~SamplerCache() = default; + + SamplerCache(SamplerCache&&) = default; + + const vk::Sampler& getSampler(const vk::SamplerCreateInfo& samplerInfo); + + static std::optional create(vk::Device device); + }; +} // namespace Vulkan \ No newline at end of file diff --git a/src/core/renderer_vk/vk_sampler_cache.cpp b/src/core/renderer_vk/vk_sampler_cache.cpp new file mode 100644 index 00000000..884264b1 --- /dev/null +++ b/src/core/renderer_vk/vk_sampler_cache.cpp @@ -0,0 +1,31 @@ +#include "renderer_vk/vk_sampler_cache.hpp" + +#include + +#include "helpers.hpp" + +namespace Vulkan { + + SamplerCache::SamplerCache(vk::Device device) : device(device) {} + + const vk::Sampler& SamplerCache::getSampler(const vk::SamplerCreateInfo& samplerInfo) { + const std::size_t samplerHash = std::hash()(samplerInfo); + + // Cache hit + if (samplerMap.contains(samplerHash)) { + return samplerMap.at(samplerHash).get(); + } + + if (auto createResult = device.createSamplerUnique(samplerInfo); createResult.result == vk::Result::eSuccess) { + return (samplerMap[samplerHash] = std::move(createResult.value)).get(); + } else { + Helpers::panic("Error creating sampler: %s\n", vk::to_string(createResult.result).c_str()); + } + } + + std::optional SamplerCache::create(vk::Device device) { + SamplerCache newSamplerCache(device); + + return {std::move(newSamplerCache)}; + } +} // namespace Vulkan \ No newline at end of file From 0258640da931cb515da0545b886376154c764bd8 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Sun, 20 Aug 2023 23:40:47 -0700 Subject: [PATCH 32/45] Add cached sampler creation Using the sampler cache we can maintain a pool of reusable samplers. The `sampler2D` utility function can help make some trivial samplers. --- include/renderer_vk/renderer_vk.hpp | 3 ++ src/core/renderer_vk/renderer_vk.cpp | 75 +++++++++++++++++++++++++--- 2 files changed, 72 insertions(+), 6 deletions(-) diff --git a/include/renderer_vk/renderer_vk.hpp b/include/renderer_vk/renderer_vk.hpp index d2723e35..bae27f3c 100644 --- a/include/renderer_vk/renderer_vk.hpp +++ b/include/renderer_vk/renderer_vk.hpp @@ -6,6 +6,7 @@ #include "vk_api.hpp" #include "vk_descriptor_heap.hpp" #include "vk_descriptor_update_batch.hpp" +#include "vk_sampler_cache.hpp" class GPU; @@ -78,6 +79,7 @@ class RendererVK final : public Renderer { static u32 colorBufferHash(u32 loc, u32 size, PICA::ColorFmt format); static u32 depthBufferHash(u32 loc, u32 size, PICA::DepthFmt format); + Texture* findColorRenderTexture(u32 addr); Texture& getColorRenderTexture(u32 addr, PICA::ColorFmt format, u32 width, u32 height); Texture& getDepthRenderTexture(u32 addr, PICA::DepthFmt format, u32 width, u32 height); @@ -93,6 +95,7 @@ class RendererVK final : public Renderer { vk::RenderPass getRenderPass(PICA::ColorFmt colorFormat, std::optional depthFormat); std::unique_ptr descriptorUpdateBatch; + std::unique_ptr samplerCache; // Display pipeline data std::unique_ptr displayDescriptorHeap; diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index 0a0450bb..23d317b1 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -13,6 +13,31 @@ CMRC_DECLARE(RendererVK); +static vk::SamplerCreateInfo sampler2D(bool filtered = true, bool clamp = false) { + vk::SamplerCreateInfo samplerInfo = {}; + samplerInfo.magFilter = filtered ? vk::Filter::eLinear : vk::Filter::eNearest; + samplerInfo.minFilter = filtered ? vk::Filter::eLinear : vk::Filter::eNearest; + + samplerInfo.mipmapMode = vk::SamplerMipmapMode::eLinear; + + samplerInfo.addressModeU = clamp ? vk::SamplerAddressMode::eClampToEdge : vk::SamplerAddressMode::eRepeat; + samplerInfo.addressModeV = clamp ? vk::SamplerAddressMode::eClampToEdge : vk::SamplerAddressMode::eRepeat; + samplerInfo.addressModeW = clamp ? vk::SamplerAddressMode::eClampToEdge : vk::SamplerAddressMode::eRepeat; + + samplerInfo.mipLodBias = 0.0f; + samplerInfo.anisotropyEnable = VK_FALSE; + samplerInfo.maxAnisotropy = 16.0f; + + samplerInfo.compareEnable = VK_FALSE; + samplerInfo.compareOp = vk::CompareOp::eAlways; + + samplerInfo.minLod = 0.0f; + samplerInfo.maxLod = VK_LOD_CLAMP_NONE; + samplerInfo.borderColor = vk::BorderColor::eFloatTransparentBlack; + samplerInfo.unnormalizedCoordinates = VK_FALSE; + return samplerInfo; +} + static vk::UniqueShaderModule createShaderModule(vk::Device device, std::span shaderCode) { vk::ShaderModuleCreateInfo shaderModuleInfo = {}; shaderModuleInfo.pCode = reinterpret_cast(shaderCode.data()); @@ -200,14 +225,37 @@ static u32 rotl32(u32 x, u32 n) { return (x << n) | (x >> (32 - n)); } static u32 ror32(u32 x, u32 n) { return (x >> n) | (x << (32 - n)); } u32 RendererVK::colorBufferHash(u32 loc, u32 size, PICA::ColorFmt format) { - return rotl32(loc, 17) ^ ror32(size, 23) ^ (static_cast(format) << 60); + return loc | (static_cast(ror32(size, 23) ^ (static_cast(format))) << 32); } u32 RendererVK::depthBufferHash(u32 loc, u32 size, PICA::DepthFmt format) { - return rotl32(loc, 17) ^ ror32(size, 29) ^ (static_cast(format) << 60); + return loc | (static_cast(ror32(size, 29) ^ (static_cast(format))) << 32); +} + +RendererVK::Texture* RendererVK::findColorRenderTexture(u32 addr) { + const auto lower = textureCache.lower_bound(addr); + + if (lower == textureCache.end()) { + // Not found + return nullptr; + } + + if (lower == textureCache.begin()) { + return &lower->second; + } + + Texture* texture = &lower->second; + + const usize sizeInBytes = texture->size[0] * texture->size[1] * texture->sizePerPixel; + + if ((addr - lower->second.loc) <= sizeInBytes) { + return texture; + } + + return nullptr; } RendererVK::Texture& RendererVK::getColorRenderTexture(u32 addr, PICA::ColorFmt format, u32 width, u32 height) { - const u32 renderTextureHash = colorBufferHash(addr, width * height * PICA::sizePerPixel(format), format); + const u64 renderTextureHash = colorBufferHash(addr, width * height * PICA::sizePerPixel(format), format); // Cache hit if (textureCache.contains(renderTextureHash)) { @@ -265,7 +313,7 @@ RendererVK::Texture& RendererVK::getColorRenderTexture(u32 addr, PICA::ColorFmt } RendererVK::Texture& RendererVK::getDepthRenderTexture(u32 addr, PICA::DepthFmt format, u32 width, u32 height) { - const u32 renderTextureHash = depthBufferHash(addr, width * height * PICA::sizePerPixel(format), format); + const u64 renderTextureHash = depthBufferHash(addr, width * height * PICA::sizePerPixel(format), format); // Cache hit if (textureCache.contains(renderTextureHash)) { @@ -620,7 +668,10 @@ void RendererVK::display() { getCurrentCommandBuffer().beginRenderPass(renderPassBeginInfo, vk::SubpassContents::eInline); // Render top screen - if (topActiveFb) { + if (Texture* topScreen = findColorRenderTexture(topScreenAddr); topScreen) { + descriptorUpdateBatch->addImageSampler( + topDisplayPipelineDescriptorSet[frameBufferingIndex], 0, topScreen->imageView.get(), samplerCache->getSampler(sampler2D()) + ); getCurrentCommandBuffer().bindDescriptorSets( vk::PipelineBindPoint::eGraphics, displayPipelineLayout.get(), 0, {topDisplayPipelineDescriptorSet[frameBufferingIndex]}, {} ); @@ -631,7 +682,10 @@ void RendererVK::display() { } // Render bottom screen - if (bottomActiveFb) { + if (Texture* bottomScreen = findColorRenderTexture(bottomScreenAddr); bottomScreenAddr) { + descriptorUpdateBatch->addImageSampler( + bottomDisplayPipelineDescriptorSet[frameBufferingIndex], 0, bottomScreen->imageView.get(), samplerCache->getSampler(sampler2D()) + ); getCurrentCommandBuffer().bindDescriptorSets( vk::PipelineBindPoint::eGraphics, displayPipelineLayout.get(), 0, {bottomDisplayPipelineDescriptorSet[frameBufferingIndex]}, {} ); @@ -698,6 +752,9 @@ void RendererVK::display() { Helpers::panic("Error ending command buffer recording: %s\n", vk::to_string(endResult).c_str()); } + // Flush all descriptor writes + descriptorUpdateBatch->flush(); + vk::SubmitInfo submitInfo = {}; // Wait for any previous uses of the image image to finish presenting std::vector waitSemaphores; @@ -1164,6 +1221,12 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { Helpers::panic("Error creating descriptor update batch\n"); } + if (auto createResult = Vulkan::SamplerCache::create(device.get()); createResult.has_value()) { + samplerCache = std::make_unique(std::move(createResult.value())); + } else { + Helpers::panic("Error creating sampler cache\n"); + } + if (auto createResult = Vulkan::DescriptorHeap::create(device.get(), displayShaderLayout); createResult.has_value()) { displayDescriptorHeap = std::make_unique(std::move(createResult.value())); } else { From b3812548fe2d6afd0fec37f54910bd3b9abcf6b5 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Sun, 20 Aug 2023 23:42:34 -0700 Subject: [PATCH 33/45] Fix bottom screen detection --- src/core/renderer_vk/renderer_vk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index 23d317b1..5635b169 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -682,7 +682,7 @@ void RendererVK::display() { } // Render bottom screen - if (Texture* bottomScreen = findColorRenderTexture(bottomScreenAddr); bottomScreenAddr) { + if (Texture* bottomScreen = findColorRenderTexture(bottomScreenAddr); bottomScreen) { descriptorUpdateBatch->addImageSampler( bottomDisplayPipelineDescriptorSet[frameBufferingIndex], 0, bottomScreen->imageView.get(), samplerCache->getSampler(sampler2D()) ); From cfd02f936ed0845e2d2413c01e51d232e346d30b Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Mon, 21 Aug 2023 22:34:16 -0700 Subject: [PATCH 34/45] Fix swapchain/screenTexture layout synchronization issues Been deving with a headless machine for too long. Fixes some presentation issues. --- src/core/renderer_vk/renderer_vk.cpp | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index 5635b169..0de6606e 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -707,27 +707,29 @@ void RendererVK::display() { getCurrentCommandBuffer().pipelineBarrier( vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags(), {}, {}, { + // swapchainImage: Undefined -> TransferDst vk::ImageMemoryBarrier( vk::AccessFlagBits::eMemoryRead, vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eUndefined, vk::ImageLayout::eTransferDstOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, swapchainImages[swapchainImageIndex], vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) ), + // screenTexture: ColorAttachmentOptimal -> TransferSrc vk::ImageMemoryBarrier( - vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eTransferRead, vk::ImageLayout::eTransferDstOptimal, + vk::AccessFlagBits::eColorAttachmentWrite, vk::AccessFlagBits::eTransferRead, vk::ImageLayout::eColorAttachmentOptimal, vk::ImageLayout::eTransferSrcOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, screenTexture[frameBufferingIndex].get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) ), } ); + // Clear swapchain image with black static const std::array clearColor = {{0.0f, 0.0f, 0.0f, 1.0f}}; getCurrentCommandBuffer().clearColorImage( swapchainImages[swapchainImageIndex], vk::ImageLayout::eTransferDstOptimal, clearColor, vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) ); - // Blit top/bottom screen into swapchain image - + // Blit screentexture into swapchain image static const vk::ImageBlit screenBlit( vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), {vk::Offset3D{}, vk::Offset3D{400, 240 * 2, 1}}, vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), {vk::Offset3D{}, vk::Offset3D{400, 240 * 2, 1}} @@ -738,13 +740,23 @@ void RendererVK::display() { ); // Prepare swapchain image for present + // Transfer screenTexture back into ColorAttachmentOptimal getCurrentCommandBuffer().pipelineBarrier( vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eColorAttachmentOutput, vk::DependencyFlags(), {}, {}, - {vk::ImageMemoryBarrier( - vk::AccessFlagBits::eNone, vk::AccessFlagBits::eColorAttachmentWrite, vk::ImageLayout::eTransferDstOptimal, - vk::ImageLayout::ePresentSrcKHR, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, swapchainImages[swapchainImageIndex], - vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) - )} + { + // swapchainImage: TransferDst -> Preset (wait for all writes) + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eColorAttachmentWrite, vk::ImageLayout::eTransferDstOptimal, + vk::ImageLayout::ePresentSrcKHR, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, swapchainImages[swapchainImageIndex], + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ), + // screenTexture: TransferSrc -> ColorAttachmentOptimal (wait for all reads) + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eTransferRead, vk::AccessFlagBits::eColorAttachmentRead, vk::ImageLayout::eTransferSrcOptimal, + vk::ImageLayout::eColorAttachmentOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, + screenTexture[frameBufferingIndex].get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ), + } ); } From d35c803ad6e6fa9475668ffdf97bec1059d28623 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Mon, 21 Aug 2023 22:40:40 -0700 Subject: [PATCH 35/45] Fix RenderTexture usage/image-aspect All color textures need to be sampled due to how we are transposing the image when writing to the render-texture. Depth-Stencil render-textures need to designate if it wants _either_ depth or stencil when creating an image-view. --- src/core/renderer_vk/renderer_vk.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index 0de6606e..2c2187a8 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -278,7 +278,7 @@ RendererVK::Texture& RendererVK::getColorRenderTexture(u32 addr, PICA::ColorFmt textureInfo.setTiling(vk::ImageTiling::eOptimal); textureInfo.setUsage( vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eInputAttachment | vk::ImageUsageFlagBits::eTransferSrc | - vk::ImageUsageFlagBits::eTransferDst + vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled ); textureInfo.setSharingMode(vk::SharingMode::eExclusive); textureInfo.setInitialLayout(vk::ImageLayout::eUndefined); @@ -352,7 +352,8 @@ RendererVK::Texture& RendererVK::getDepthRenderTexture(u32 addr, PICA::DepthFmt viewInfo.viewType = vk::ImageViewType::e2D; viewInfo.format = textureInfo.format; viewInfo.components = vk::ComponentMapping(); - viewInfo.subresourceRange = vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil, 0, 1, 0, 1); + //viewInfo.subresourceRange = vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil, 0, 1, 0, 1); + viewInfo.subresourceRange = vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eDepth, 0, 1, 0, 1); if (auto [result, imageMemory] = Vulkan::commitImageHeap(device.get(), physicalDevice, {&newTexture.image.get(), 1}); result == vk::Result::eSuccess) { From 84e0d58d9eb1ffb83fcc906d64d49356238eca68 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Mon, 21 Aug 2023 22:52:51 -0700 Subject: [PATCH 36/45] Add initial render-target layout transition, depth-stencil aspect fix Fixes a lot of the render-target validation error messages. We "might" change the default layout for render-targets to shader-read-only and only use attachment-optimal layouts when they are armed for a render-pass. Render-Textures more often need to be ready for shaders than they need to be ready for render-passes. --- src/core/renderer_vk/renderer_vk.cpp | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index 2c2187a8..7cc7b492 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -309,6 +309,16 @@ RendererVK::Texture& RendererVK::getColorRenderTexture(u32 addr, PICA::ColorFmt Helpers::panic("Error creating color render-texture: %s\n", vk::to_string(createResult.result).c_str()); } + // Initial layout transition + getCurrentCommandBuffer().pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, vk::DependencyFlags{}, {}, {}, + {vk::ImageMemoryBarrier( + vk::AccessFlagBits::eMemoryWrite, vk::AccessFlagBits::eColorAttachmentRead, vk::ImageLayout::eUndefined, + vk::ImageLayout::eColorAttachmentOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, newTexture.image.get(), + viewInfo.subresourceRange + )} + ); + return newTexture; } @@ -352,9 +362,13 @@ RendererVK::Texture& RendererVK::getDepthRenderTexture(u32 addr, PICA::DepthFmt viewInfo.viewType = vk::ImageViewType::e2D; viewInfo.format = textureInfo.format; viewInfo.components = vk::ComponentMapping(); - //viewInfo.subresourceRange = vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil, 0, 1, 0, 1); + // viewInfo.subresourceRange = vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil, 0, 1, 0, 1); viewInfo.subresourceRange = vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eDepth, 0, 1, 0, 1); + if (PICA::hasStencil(format)) { + viewInfo.subresourceRange.aspectMask |= vk::ImageAspectFlagBits::eStencil; + } + if (auto [result, imageMemory] = Vulkan::commitImageHeap(device.get(), physicalDevice, {&newTexture.image.get(), 1}); result == vk::Result::eSuccess) { newTexture.imageMemory = std::move(imageMemory); @@ -368,6 +382,16 @@ RendererVK::Texture& RendererVK::getDepthRenderTexture(u32 addr, PICA::DepthFmt Helpers::panic("Error creating depth render-texture: %s\n", vk::to_string(createResult.result).c_str()); } + // Initial layout transition + getCurrentCommandBuffer().pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, vk::DependencyFlags{}, {}, {}, + {vk::ImageMemoryBarrier( + vk::AccessFlagBits::eMemoryWrite, vk::AccessFlagBits::eDepthStencilAttachmentRead, vk::ImageLayout::eUndefined, + vk::ImageLayout::eDepthStencilAttachmentOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, newTexture.image.get(), + viewInfo.subresourceRange + )} + ); + return newTexture; } From be5ebeefedf14caf624a9b708750ce339ce96f1a Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Thu, 24 Aug 2023 09:47:11 -0700 Subject: [PATCH 37/45] Default all texture layouts to `eShaderReadOnlyOptimal` By default, a texture sitting somewhere and ignored should be in a shader-read-only state, and transitioned into other layouts in exception to this passive state. This allows all shaders to be ready to be interpreted by shaders by default. --- src/core/renderer_vk/renderer_vk.cpp | 73 +++++++++++++++------------- 1 file changed, 40 insertions(+), 33 deletions(-) diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index 7cc7b492..1fc39f7d 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -289,6 +289,10 @@ RendererVK::Texture& RendererVK::getColorRenderTexture(u32 addr, PICA::ColorFmt Helpers::panic("Error creating color render-texture image: %s\n", vk::to_string(createResult.result).c_str()); } + Vulkan::setObjectName( + device.get(), newTexture.image.get(), "TextureCache:%08x %ux%u %s", addr, width, height, vk::to_string(textureInfo.format).c_str() + ); + vk::ImageViewCreateInfo viewInfo = {}; viewInfo.image = newTexture.image.get(); viewInfo.viewType = vk::ImageViewType::e2D; @@ -313,9 +317,8 @@ RendererVK::Texture& RendererVK::getColorRenderTexture(u32 addr, PICA::ColorFmt getCurrentCommandBuffer().pipelineBarrier( vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, vk::DependencyFlags{}, {}, {}, {vk::ImageMemoryBarrier( - vk::AccessFlagBits::eMemoryWrite, vk::AccessFlagBits::eColorAttachmentRead, vk::ImageLayout::eUndefined, - vk::ImageLayout::eColorAttachmentOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, newTexture.image.get(), - viewInfo.subresourceRange + vk::AccessFlagBits::eMemoryWrite, vk::AccessFlagBits::eShaderRead, vk::ImageLayout::eUndefined, vk::ImageLayout::eShaderReadOnlyOptimal, + VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, newTexture.image.get(), viewInfo.subresourceRange )} ); @@ -357,6 +360,10 @@ RendererVK::Texture& RendererVK::getDepthRenderTexture(u32 addr, PICA::DepthFmt Helpers::panic("Error creating depth render-texture image: %s\n", vk::to_string(createResult.result).c_str()); } + Vulkan::setObjectName( + device.get(), newTexture.image.get(), "TextureCache:%08x %ux%u %s", addr, width, height, vk::to_string(textureInfo.format).c_str() + ); + vk::ImageViewCreateInfo viewInfo = {}; viewInfo.image = newTexture.image.get(); viewInfo.viewType = vk::ImageViewType::e2D; @@ -386,9 +393,8 @@ RendererVK::Texture& RendererVK::getDepthRenderTexture(u32 addr, PICA::DepthFmt getCurrentCommandBuffer().pipelineBarrier( vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, vk::DependencyFlags{}, {}, {}, {vk::ImageMemoryBarrier( - vk::AccessFlagBits::eMemoryWrite, vk::AccessFlagBits::eDepthStencilAttachmentRead, vk::ImageLayout::eUndefined, - vk::ImageLayout::eDepthStencilAttachmentOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, newTexture.image.get(), - viewInfo.subresourceRange + vk::AccessFlagBits::eMemoryWrite, vk::AccessFlagBits::eShaderRead, vk::ImageLayout::eUndefined, vk::ImageLayout::eShaderReadOnlyOptimal, + VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, newTexture.image.get(), viewInfo.subresourceRange )} ); @@ -420,8 +426,8 @@ vk::RenderPass RendererVK::getRenderPass(vk::Format colorFormat, std::optional renderScreenScopeColor = {{1.0f, 0.0f, 1.0f, 1.0f}}; Vulkan::DebugLabelScope debugScope(getCurrentCommandBuffer(), renderScreenScopeColor, "Render Screen"); - // Prepare screen texture for rendering into - getCurrentCommandBuffer().pipelineBarrier( - vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eColorAttachmentOutput, vk::DependencyFlags(), {}, {}, - { - vk::ImageMemoryBarrier( - vk::AccessFlagBits::eMemoryRead, vk::AccessFlagBits::eColorAttachmentWrite, vk::ImageLayout::eUndefined, - vk::ImageLayout::eColorAttachmentOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, - screenTexture[frameBufferingIndex].get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) - ), - } - ); vk::RenderPassBeginInfo renderPassBeginInfo = {}; renderPassBeginInfo.renderPass = getRenderPass(vk::Format::eR8G8B8A8Unorm, {}); @@ -738,9 +733,9 @@ void RendererVK::display() { vk::ImageLayout::eTransferDstOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, swapchainImages[swapchainImageIndex], vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) ), - // screenTexture: ColorAttachmentOptimal -> TransferSrc + // screenTexture: ShaderReadOnlyOptimal -> TransferSrc vk::ImageMemoryBarrier( - vk::AccessFlagBits::eColorAttachmentWrite, vk::AccessFlagBits::eTransferRead, vk::ImageLayout::eColorAttachmentOptimal, + vk::AccessFlagBits::eColorAttachmentWrite, vk::AccessFlagBits::eTransferRead, vk::ImageLayout::eShaderReadOnlyOptimal, vk::ImageLayout::eTransferSrcOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, screenTexture[frameBufferingIndex].get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) ), @@ -767,7 +762,7 @@ void RendererVK::display() { // Prepare swapchain image for present // Transfer screenTexture back into ColorAttachmentOptimal getCurrentCommandBuffer().pipelineBarrier( - vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eColorAttachmentOutput, vk::DependencyFlags(), {}, {}, + vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllGraphics, vk::DependencyFlags(), {}, {}, { // swapchainImage: TransferDst -> Preset (wait for all writes) vk::ImageMemoryBarrier( @@ -775,10 +770,10 @@ void RendererVK::display() { vk::ImageLayout::ePresentSrcKHR, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, swapchainImages[swapchainImageIndex], vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) ), - // screenTexture: TransferSrc -> ColorAttachmentOptimal (wait for all reads) + // screenTexture: TransferSrc -> eShaderReadOnlyOptimal (wait for all reads) vk::ImageMemoryBarrier( - vk::AccessFlagBits::eTransferRead, vk::AccessFlagBits::eColorAttachmentRead, vk::ImageLayout::eTransferSrcOptimal, - vk::ImageLayout::eColorAttachmentOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, + vk::AccessFlagBits::eTransferRead, vk::AccessFlagBits::eShaderRead, vk::ImageLayout::eTransferSrcOptimal, + vk::ImageLayout::eShaderReadOnlyOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, screenTexture[frameBufferingIndex].get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) ), } @@ -1197,6 +1192,8 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { if (auto createResult = device->createImageUnique(screenTextureInfo); createResult.result == vk::Result::eSuccess) { screenTexture[i] = std::move(createResult.value); + + Vulkan::setObjectName(device.get(), screenTexture[i].get(), "screenTexture#%zu", i); } else { Helpers::panic("Error creating top-screen image: %s\n", vk::to_string(createResult.result).c_str()); } @@ -1215,7 +1212,7 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { } } - // Memory is bounded, create views and framebuffer for screentexture + // Memory is bounded, create views, framebuffer, and layout transitions for screentexture vk::ImageViewCreateInfo screenTextureViewCreateInfo = {}; screenTextureViewCreateInfo.viewType = vk::ImageViewType::e2D; screenTextureViewCreateInfo.format = vk::Format::eR8G8B8A8Unorm; @@ -1234,6 +1231,16 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { Helpers::panic("Error creating screen texture view: %s\n", vk::to_string(createResult.result).c_str()); } + // Initial layout transition + getCurrentCommandBuffer().pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, vk::DependencyFlags{}, {}, {}, + {vk::ImageMemoryBarrier( + vk::AccessFlagBits::eMemoryWrite, vk::AccessFlagBits::eShaderRead, vk::ImageLayout::eUndefined, + vk::ImageLayout::eShaderReadOnlyOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, screenTexture[i].get(), + screenTextureViewCreateInfo.subresourceRange + )} + ); + vk::FramebufferCreateInfo framebufferInfo = {}; framebufferInfo.setRenderPass(getRenderPass(vk::Format::eR8G8B8A8Unorm, {})); framebufferInfo.setAttachments(screenTextureViews[i].get()); @@ -1363,12 +1370,12 @@ void RendererVK::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags(), {}, {}, { vk::ImageMemoryBarrier( - vk::AccessFlagBits::eColorAttachmentWrite, vk::AccessFlagBits::eTransferRead, vk::ImageLayout::eColorAttachmentOptimal, + vk::AccessFlagBits::eColorAttachmentWrite, vk::AccessFlagBits::eTransferRead, vk::ImageLayout::eShaderReadOnlyOptimal, vk::ImageLayout::eTransferSrcOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, srcFramebuffer.image.get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) ), vk::ImageMemoryBarrier( - vk::AccessFlagBits::eColorAttachmentWrite, vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eColorAttachmentOptimal, + vk::AccessFlagBits::eColorAttachmentWrite, vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eShaderReadOnlyOptimal, vk::ImageLayout::eTransferDstOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, destFramebuffer.image.get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) ), @@ -1381,16 +1388,16 @@ void RendererVK::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u ); blitCommandBuffer.pipelineBarrier( - vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eColorAttachmentOutput, vk::DependencyFlags(), {}, {}, + vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllGraphics, vk::DependencyFlags(), {}, {}, { vk::ImageMemoryBarrier( vk::AccessFlagBits::eTransferRead, vk::AccessFlagBits::eColorAttachmentWrite, vk::ImageLayout::eTransferSrcOptimal, - vk::ImageLayout::eColorAttachmentOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, srcFramebuffer.image.get(), + vk::ImageLayout::eShaderReadOnlyOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, srcFramebuffer.image.get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) ), vk::ImageMemoryBarrier( vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eColorAttachmentWrite, vk::ImageLayout::eTransferDstOptimal, - vk::ImageLayout::eColorAttachmentOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, destFramebuffer.image.get(), + vk::ImageLayout::eShaderReadOnlyOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, destFramebuffer.image.get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) ), } From d781802eb05d961519540b42d9314ff7b4800c44 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Thu, 24 Aug 2023 10:39:53 -0700 Subject: [PATCH 38/45] Fix render-texture cache lookups `[32bit loc | 32bit attributes]` Use `std::map::lower_bound(loc << 32)` to find the first address that matches the key in O(logn) time, finer grained searchs can happen after the fact in O(n) time. Fixes render-texture cache lookups --- include/renderer_vk/renderer_vk.hpp | 3 --- src/core/renderer_vk/renderer_vk.cpp | 29 ++++++++++++++-------------- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/include/renderer_vk/renderer_vk.hpp b/include/renderer_vk/renderer_vk.hpp index bae27f3c..51b84971 100644 --- a/include/renderer_vk/renderer_vk.hpp +++ b/include/renderer_vk/renderer_vk.hpp @@ -76,9 +76,6 @@ class RendererVK final : public Renderer { // Hash(loc, size, format) -> Texture std::map textureCache; - static u32 colorBufferHash(u32 loc, u32 size, PICA::ColorFmt format); - static u32 depthBufferHash(u32 loc, u32 size, PICA::DepthFmt format); - Texture* findColorRenderTexture(u32 addr); Texture& getColorRenderTexture(u32 addr, PICA::ColorFmt format, u32 width, u32 height); Texture& getDepthRenderTexture(u32 addr, PICA::DepthFmt format, u32 width, u32 height); diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index 1fc39f7d..ed7fc41e 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -224,30 +224,29 @@ static s32 findQueueFamily( static u32 rotl32(u32 x, u32 n) { return (x << n) | (x >> (32 - n)); } static u32 ror32(u32 x, u32 n) { return (x >> n) | (x << (32 - n)); } -u32 RendererVK::colorBufferHash(u32 loc, u32 size, PICA::ColorFmt format) { - return loc | (static_cast(ror32(size, 23) ^ (static_cast(format))) << 32); +// Lower 32 bits is the format + size, upper 32-bits is the address +static u64 colorBufferHash(u32 loc, u32 size, PICA::ColorFmt format) { + return (static_cast(loc) << 32) | (ror32(size, 23) ^ static_cast(format)); } -u32 RendererVK::depthBufferHash(u32 loc, u32 size, PICA::DepthFmt format) { - return loc | (static_cast(ror32(size, 29) ^ (static_cast(format))) << 32); +static u64 depthBufferHash(u32 loc, u32 size, PICA::DepthFmt format) { + return (static_cast(loc) << 32) | (ror32(size, 29) ^ static_cast(format)); } RendererVK::Texture* RendererVK::findColorRenderTexture(u32 addr) { - const auto lower = textureCache.lower_bound(addr); + // Find first render-texture hash that is >= to addr + auto match = textureCache.lower_bound(static_cast(addr) << 32); - if (lower == textureCache.end()) { + if (match == textureCache.end()) { // Not found return nullptr; } - if (lower == textureCache.begin()) { - return &lower->second; - } - - Texture* texture = &lower->second; + Texture* texture = &match->second; const usize sizeInBytes = texture->size[0] * texture->size[1] * texture->sizePerPixel; - if ((addr - lower->second.loc) <= sizeInBytes) { + // Ensure this address is within the span of the texture + if ((addr - match->second.loc) <= sizeInBytes) { return texture; } @@ -672,7 +671,7 @@ void RendererVK::display() { const u32 bottomScreenAddr = externalRegs[bottomActiveFb ? PICA::ExternalRegs::Framebuffer1AFirstAddr : PICA::ExternalRegs::Framebuffer1ASecondAddr]; - //// Render Frame(Simulated - just clear the images to different colors for now) + //// Render Display { static const std::array renderScreenScopeColor = {{1.0f, 0.0f, 1.0f, 1.0f}}; @@ -688,7 +687,7 @@ void RendererVK::display() { getCurrentCommandBuffer().beginRenderPass(renderPassBeginInfo, vk::SubpassContents::eInline); // Render top screen - if (Texture* topScreen = findColorRenderTexture(topScreenAddr); topScreen) { + if (const Texture* topScreen = findColorRenderTexture(topScreenAddr); topScreen) { descriptorUpdateBatch->addImageSampler( topDisplayPipelineDescriptorSet[frameBufferingIndex], 0, topScreen->imageView.get(), samplerCache->getSampler(sampler2D()) ); @@ -702,7 +701,7 @@ void RendererVK::display() { } // Render bottom screen - if (Texture* bottomScreen = findColorRenderTexture(bottomScreenAddr); bottomScreen) { + if (const Texture* bottomScreen = findColorRenderTexture(bottomScreenAddr); bottomScreen) { descriptorUpdateBatch->addImageSampler( bottomDisplayPipelineDescriptorSet[frameBufferingIndex], 0, bottomScreen->imageView.get(), samplerCache->getSampler(sampler2D()) ); From e4195d4d4dc9170e18fec110ec3f25ece5493713 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Thu, 24 Aug 2023 11:27:49 -0700 Subject: [PATCH 39/45] Implement color-buffer clears --- include/renderer_vk/renderer_vk.hpp | 2 +- src/core/renderer_vk/renderer_vk.cpp | 71 ++++++++++++++++++++++++++-- 2 files changed, 67 insertions(+), 6 deletions(-) diff --git a/include/renderer_vk/renderer_vk.hpp b/include/renderer_vk/renderer_vk.hpp index 51b84971..07fcdd86 100644 --- a/include/renderer_vk/renderer_vk.hpp +++ b/include/renderer_vk/renderer_vk.hpp @@ -76,7 +76,7 @@ class RendererVK final : public Renderer { // Hash(loc, size, format) -> Texture std::map textureCache; - Texture* findColorRenderTexture(u32 addr); + Texture* findRenderTexture(u32 addr); Texture& getColorRenderTexture(u32 addr, PICA::ColorFmt format, u32 width, u32 height); Texture& getDepthRenderTexture(u32 addr, PICA::DepthFmt format, u32 width, u32 height); diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index ed7fc41e..3387deda 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -232,7 +232,7 @@ static u64 depthBufferHash(u32 loc, u32 size, PICA::DepthFmt format) { return (static_cast(loc) << 32) | (ror32(size, 29) ^ static_cast(format)); } -RendererVK::Texture* RendererVK::findColorRenderTexture(u32 addr) { +RendererVK::Texture* RendererVK::findRenderTexture(u32 addr) { // Find first render-texture hash that is >= to addr auto match = textureCache.lower_bound(static_cast(addr) << 32); @@ -687,7 +687,10 @@ void RendererVK::display() { getCurrentCommandBuffer().beginRenderPass(renderPassBeginInfo, vk::SubpassContents::eInline); // Render top screen - if (const Texture* topScreen = findColorRenderTexture(topScreenAddr); topScreen) { + if (const Texture* topScreen = findRenderTexture(topScreenAddr); topScreen) { + static const std::array scopeColor = {{1.0f, 0.0f, 0.0f, 1.0f}}; + Vulkan::DebugLabelScope debugScope(getCurrentCommandBuffer(), scopeColor, "Top Screen: %08x", topScreenAddr); + descriptorUpdateBatch->addImageSampler( topDisplayPipelineDescriptorSet[frameBufferingIndex], 0, topScreen->imageView.get(), samplerCache->getSampler(sampler2D()) ); @@ -701,7 +704,10 @@ void RendererVK::display() { } // Render bottom screen - if (const Texture* bottomScreen = findColorRenderTexture(bottomScreenAddr); bottomScreen) { + if (const Texture* bottomScreen = findRenderTexture(bottomScreenAddr); bottomScreen) { + static const std::array scopeColor = {{0.0f, 1.0f, 0.0f, 1.0f}}; + Vulkan::DebugLabelScope debugScope(getCurrentCommandBuffer(), scopeColor, "Bottom Screen: %08x", bottomScreenAddr); + descriptorUpdateBatch->addImageSampler( bottomDisplayPipelineDescriptorSet[frameBufferingIndex], 0, bottomScreen->imageView.get(), samplerCache->getSampler(sampler2D()) ); @@ -719,7 +725,7 @@ void RendererVK::display() { //// Present if (swapchainImageIndex != swapchainImageInvalid) { - static const std::array presentScopeColor = {{1.0f, 1.0f, 0.0f, 1.0f}}; + static const std::array presentScopeColor = {{1.0f, 1.0f, 1.0f, 1.0f}}; Vulkan::DebugLabelScope debugScope(getCurrentCommandBuffer(), presentScopeColor, "Present"); // Prepare swapchain image for color-clear/blit-dst, prepare top/bottom screen for blit-src @@ -1304,7 +1310,59 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { ); } -void RendererVK::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) {} +void RendererVK::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) { + const Texture* renderTexture = findRenderTexture(startAddress); + + if (!renderTexture) { + // not found + return; + } + + // Color-Clear + { + vk::ClearColorValue clearColor = {}; + + clearColor.float32[0] = Helpers::getBits<24, 8>(value) / 255.0f; // r + clearColor.float32[1] = Helpers::getBits<16, 8>(value) / 255.0f; // g + clearColor.float32[2] = Helpers::getBits<8, 8>(value) / 255.0f; // b + clearColor.float32[3] = Helpers::getBits<0, 8>(value) / 255.0f; // a + + Vulkan::DebugLabelScope scope( + getCurrentCommandBuffer(), clearColor.float32, "ClearBuffer start:%08X end:%08X value:%08X control:%08X\n", startAddress, endAddress, + value, control + ); + + getCurrentCommandBuffer().pipelineBarrier( + vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags(), {}, {}, + { + // renderTexture: ShaderReadOnlyOptimal -> TransferDst + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eShaderRead, vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eShaderReadOnlyOptimal, + vk::ImageLayout::eTransferDstOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, renderTexture->image.get(), + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ), + } + ); + + // Clear RenderTarget + getCurrentCommandBuffer().clearColorImage( + renderTexture->image.get(), vk::ImageLayout::eTransferDstOptimal, clearColor, + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ); + + getCurrentCommandBuffer().pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllGraphics, vk::DependencyFlags(), {}, {}, + { + // renderTexture: TransferDst -> eShaderReadOnlyOptimal + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eShaderRead, vk::ImageLayout::eTransferDstOptimal, + vk::ImageLayout::eShaderReadOnlyOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, renderTexture->image.get(), + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ), + } + ); + } +} // NOTE: The GPU format has RGB5551 and RGB655 swapped compared to internal regs format static PICA::ColorFmt ToColorFmt(u32 format) { @@ -1457,7 +1515,10 @@ void RendererVK::drawVertices(PICA::PrimType primType, std::span labelColor = {{1.0f, 0.0f, 0.0f, 1.0f}}; + Vulkan::insertDebugLabel(commandBuffer, labelColor, "DrawVertices: %u vertices", vertices.size()); commandBuffer.endRenderPass(); } From 57ee0a3db91b228a8b4d4c985ae243647680b752 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Thu, 24 Aug 2023 11:29:35 -0700 Subject: [PATCH 40/45] Fix display drawing Disable depth-testing, the display vertex-shader, and the viewport/scissor positioning of the bottom screen. We have visual! --- src/core/renderer_vk/renderer_vk.cpp | 8 ++++---- src/host_shaders/vulkan_display.vert | 20 ++------------------ 2 files changed, 6 insertions(+), 22 deletions(-) diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index 3387deda..ae4abc44 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -140,8 +140,8 @@ std::tuple createGraphicsPipeline( vk::PipelineDepthStencilStateCreateInfo depthStencilState = {}; - depthStencilState.depthTestEnable = true; - depthStencilState.depthWriteEnable = true; + depthStencilState.depthTestEnable = false; + depthStencilState.depthWriteEnable = false; depthStencilState.depthCompareOp = vk::CompareOp::eLessOrEqual; depthStencilState.depthBoundsTestEnable = false; depthStencilState.stencilTestEnable = false; @@ -715,8 +715,8 @@ void RendererVK::display() { vk::PipelineBindPoint::eGraphics, displayPipelineLayout.get(), 0, {bottomDisplayPipelineDescriptorSet[frameBufferingIndex]}, {} ); getCurrentCommandBuffer().bindPipeline(vk::PipelineBindPoint::eGraphics, displayPipeline.get()); - getCurrentCommandBuffer().setViewport(0, vk::Viewport(40, 0, 320, 240)); - getCurrentCommandBuffer().setScissor(0, vk::Rect2D({40, 0}, {320, 240})); + getCurrentCommandBuffer().setViewport(0, vk::Viewport(40, 240, 320, 240)); + getCurrentCommandBuffer().setScissor(0, vk::Rect2D({40, 240}, {320, 240})); getCurrentCommandBuffer().draw(3, 1, 0, 0); } diff --git a/src/host_shaders/vulkan_display.vert b/src/host_shaders/vulkan_display.vert index 766b8d0c..284997ca 100644 --- a/src/host_shaders/vulkan_display.vert +++ b/src/host_shaders/vulkan_display.vert @@ -2,22 +2,6 @@ layout(location = 0) out vec2 UV; void main() { - const vec4 positions[4] = vec4[]( - vec4(-1.0, 1.0, 1.0, 1.0), // Top-left - vec4(1.0, 1.0, 1.0, 1.0), // Top-right - vec4(-1.0, -1.0, 1.0, 1.0), // Bottom-left - vec4(1.0, -1.0, 1.0, 1.0) // Bottom-right - ); - - // The 3DS displays both screens' framebuffer rotated 90 deg counter clockwise - // So we adjust our texcoords accordingly - const vec2 texcoords[4] = vec2[]( - vec2(1.0, 1.0), // Top-right - vec2(1.0, 0.0), // Bottom-right - vec2(0.0, 1.0), // Top-left - vec2(0.0, 0.0) // Bottom-left - ); - - gl_Position = positions[gl_VertexIndex]; - UV = texcoords[gl_VertexIndex]; + UV = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2); + gl_Position = vec4(UV * 2.0f + -1.0f, 0.0f, 1.0f); } \ No newline at end of file From 1540c941d03e9e51128c8cf8c46400d51be77628 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Thu, 24 Aug 2023 13:09:46 -0700 Subject: [PATCH 41/45] Write and flush top/bottom-screen descriptors A descriptor is consumed immediately when bound to a command buffer and cannot be updated after unless we mark the descriptors with `UPDATE_AFTER_BIND` flags and such. For now, just flush the writes immediately before binding the descriptors. Fixes all of the validation messages regarding invalid command buffers due to updated/deleted descriptor-sets. --- src/core/renderer_vk/renderer_vk.cpp | 72 ++++++++++++++++------------ 1 file changed, 41 insertions(+), 31 deletions(-) diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index ae4abc44..eeddea32 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -686,38 +686,51 @@ void RendererVK::display() { getCurrentCommandBuffer().beginRenderPass(renderPassBeginInfo, vk::SubpassContents::eInline); - // Render top screen - if (const Texture* topScreen = findRenderTexture(topScreenAddr); topScreen) { - static const std::array scopeColor = {{1.0f, 0.0f, 0.0f, 1.0f}}; - Vulkan::DebugLabelScope debugScope(getCurrentCommandBuffer(), scopeColor, "Top Screen: %08x", topScreenAddr); + const Texture* topScreen = findRenderTexture(topScreenAddr); + const Texture* bottomScreen = findRenderTexture(bottomScreenAddr); - descriptorUpdateBatch->addImageSampler( - topDisplayPipelineDescriptorSet[frameBufferingIndex], 0, topScreen->imageView.get(), samplerCache->getSampler(sampler2D()) - ); - getCurrentCommandBuffer().bindDescriptorSets( - vk::PipelineBindPoint::eGraphics, displayPipelineLayout.get(), 0, {topDisplayPipelineDescriptorSet[frameBufferingIndex]}, {} - ); + if (topScreen || bottomScreen) { getCurrentCommandBuffer().bindPipeline(vk::PipelineBindPoint::eGraphics, displayPipeline.get()); - getCurrentCommandBuffer().setViewport(0, vk::Viewport(0, 0, 400, 240)); - getCurrentCommandBuffer().setScissor(0, vk::Rect2D({0, 0}, {400, 240})); - getCurrentCommandBuffer().draw(3, 1, 0, 0); - } - // Render bottom screen - if (const Texture* bottomScreen = findRenderTexture(bottomScreenAddr); bottomScreen) { - static const std::array scopeColor = {{0.0f, 1.0f, 0.0f, 1.0f}}; - Vulkan::DebugLabelScope debugScope(getCurrentCommandBuffer(), scopeColor, "Bottom Screen: %08x", bottomScreenAddr); + // Update descriptors before binding to the command buffer + if (topScreen) { + descriptorUpdateBatch->addImageSampler( + topDisplayPipelineDescriptorSet[frameBufferingIndex], 0, topScreen->imageView.get(), samplerCache->getSampler(sampler2D()) + ); + } - descriptorUpdateBatch->addImageSampler( - bottomDisplayPipelineDescriptorSet[frameBufferingIndex], 0, bottomScreen->imageView.get(), samplerCache->getSampler(sampler2D()) - ); - getCurrentCommandBuffer().bindDescriptorSets( - vk::PipelineBindPoint::eGraphics, displayPipelineLayout.get(), 0, {bottomDisplayPipelineDescriptorSet[frameBufferingIndex]}, {} - ); - getCurrentCommandBuffer().bindPipeline(vk::PipelineBindPoint::eGraphics, displayPipeline.get()); - getCurrentCommandBuffer().setViewport(0, vk::Viewport(40, 240, 320, 240)); - getCurrentCommandBuffer().setScissor(0, vk::Rect2D({40, 240}, {320, 240})); - getCurrentCommandBuffer().draw(3, 1, 0, 0); + if (bottomScreen) { + descriptorUpdateBatch->addImageSampler( + bottomDisplayPipelineDescriptorSet[frameBufferingIndex], 0, bottomScreen->imageView.get(), samplerCache->getSampler(sampler2D()) + ); + } + descriptorUpdateBatch->flush(); + + // Render top screen + if (topScreen) { + static const std::array scopeColor = {{1.0f, 0.0f, 0.0f, 1.0f}}; + Vulkan::DebugLabelScope debugScope(getCurrentCommandBuffer(), scopeColor, "Top Screen: %08x", topScreenAddr); + + getCurrentCommandBuffer().bindDescriptorSets( + vk::PipelineBindPoint::eGraphics, displayPipelineLayout.get(), 0, {topDisplayPipelineDescriptorSet[frameBufferingIndex]}, {} + ); + getCurrentCommandBuffer().setViewport(0, vk::Viewport(0, 0, 400, 240)); + getCurrentCommandBuffer().setScissor(0, vk::Rect2D({0, 0}, {400, 240})); + getCurrentCommandBuffer().draw(3, 1, 0, 0); + } + + // Render bottom screen + if (bottomScreen) { + static const std::array scopeColor = {{0.0f, 1.0f, 0.0f, 1.0f}}; + Vulkan::DebugLabelScope debugScope(getCurrentCommandBuffer(), scopeColor, "Bottom Screen: %08x", bottomScreenAddr); + getCurrentCommandBuffer().bindDescriptorSets( + vk::PipelineBindPoint::eGraphics, displayPipelineLayout.get(), 0, {bottomDisplayPipelineDescriptorSet[frameBufferingIndex]}, {} + ); + getCurrentCommandBuffer().bindPipeline(vk::PipelineBindPoint::eGraphics, displayPipeline.get()); + getCurrentCommandBuffer().setViewport(0, vk::Viewport(40, 240, 320, 240)); + getCurrentCommandBuffer().setScissor(0, vk::Rect2D({40, 240}, {320, 240})); + getCurrentCommandBuffer().draw(3, 1, 0, 0); + } } getCurrentCommandBuffer().endRenderPass(); @@ -789,9 +802,6 @@ void RendererVK::display() { Helpers::panic("Error ending command buffer recording: %s\n", vk::to_string(endResult).c_str()); } - // Flush all descriptor writes - descriptorUpdateBatch->flush(); - vk::SubmitInfo submitInfo = {}; // Wait for any previous uses of the image image to finish presenting std::vector waitSemaphores; From 085d7d55680c259fe66fd124e1eaa74eb0539e69 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Sat, 26 Aug 2023 22:04:21 -0700 Subject: [PATCH 42/45] Add clear-buffer support for Depth/Stencil --- include/renderer_vk/renderer_vk.hpp | 1 + src/core/renderer_vk/renderer_vk.cpp | 67 +++++++++++++++++++++++++--- 2 files changed, 62 insertions(+), 6 deletions(-) diff --git a/include/renderer_vk/renderer_vk.hpp b/include/renderer_vk/renderer_vk.hpp index 07fcdd86..92007674 100644 --- a/include/renderer_vk/renderer_vk.hpp +++ b/include/renderer_vk/renderer_vk.hpp @@ -61,6 +61,7 @@ class RendererVK final : public Renderer { u32 sizePerPixel = 0; std::array size = {}; + vk::Format format; vk::UniqueImage image; vk::UniqueDeviceMemory imageMemory; vk::UniqueImageView imageView; diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index eeddea32..de36e2a3 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -267,9 +267,11 @@ RendererVK::Texture& RendererVK::getColorRenderTexture(u32 addr, PICA::ColorFmt newTexture.sizePerPixel = PICA::sizePerPixel(format); newTexture.size = fbSize; + newTexture.format = Vulkan::colorFormatToVulkan(format); + vk::ImageCreateInfo textureInfo = {}; textureInfo.setImageType(vk::ImageType::e2D); - textureInfo.setFormat(Vulkan::colorFormatToVulkan(format)); + textureInfo.setFormat(newTexture.format); textureInfo.setExtent(vk::Extent3D(width, height, 1)); textureInfo.setMipLevels(1); textureInfo.setArrayLayers(1); @@ -295,7 +297,7 @@ RendererVK::Texture& RendererVK::getColorRenderTexture(u32 addr, PICA::ColorFmt vk::ImageViewCreateInfo viewInfo = {}; viewInfo.image = newTexture.image.get(); viewInfo.viewType = vk::ImageViewType::e2D; - viewInfo.format = textureInfo.format; + viewInfo.format = newTexture.format; viewInfo.components = vk::ComponentMapping(); viewInfo.subresourceRange = vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1); @@ -338,9 +340,11 @@ RendererVK::Texture& RendererVK::getDepthRenderTexture(u32 addr, PICA::DepthFmt newTexture.sizePerPixel = PICA::sizePerPixel(format); newTexture.size = fbSize; + newTexture.format = Vulkan::depthFormatToVulkan(format); + vk::ImageCreateInfo textureInfo = {}; textureInfo.setImageType(vk::ImageType::e2D); - textureInfo.setFormat(Vulkan::depthFormatToVulkan(format)); + textureInfo.setFormat(newTexture.format); textureInfo.setExtent(vk::Extent3D(width, height, 1)); textureInfo.setMipLevels(1); textureInfo.setArrayLayers(1); @@ -366,7 +370,7 @@ RendererVK::Texture& RendererVK::getDepthRenderTexture(u32 addr, PICA::DepthFmt vk::ImageViewCreateInfo viewInfo = {}; viewInfo.image = newTexture.image.get(); viewInfo.viewType = vk::ImageViewType::e2D; - viewInfo.format = textureInfo.format; + viewInfo.format = newTexture.format; viewInfo.components = vk::ComponentMapping(); // viewInfo.subresourceRange = vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil, 0, 1, 0, 1); viewInfo.subresourceRange = vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eDepth, 0, 1, 0, 1); @@ -1328,8 +1332,8 @@ void RendererVK::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 co return; } - // Color-Clear - { + if (*vk::componentName(renderTexture->format, 0) != 'D') { + // Color-Clear vk::ClearColorValue clearColor = {}; clearColor.float32[0] = Helpers::getBits<24, 8>(value) / 255.0f; // r @@ -1371,6 +1375,57 @@ void RendererVK::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 co ), } ); + } else { + // Depth-Clear + vk::ClearDepthStencilValue clearDepthStencil = {}; + + if (vk::componentBits(renderTexture->format, 0) == 16) { + clearDepthStencil.depth = (value & 0xffff) / 65535.0f; + } else { + clearDepthStencil.depth = (value & 0xffffff) / 16777215.0f; + } + + clearDepthStencil.stencil = (value >> 24); // Stencil + + const std::array scopeColor = {{clearDepthStencil.depth, clearDepthStencil.depth, clearDepthStencil.depth, 1.0f}}; + Vulkan::DebugLabelScope scope( + getCurrentCommandBuffer(), scopeColor, "ClearBuffer start:%08X end:%08X value:%08X control:%08X\n", startAddress, endAddress, value, + control + ); + + getCurrentCommandBuffer().pipelineBarrier( + vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags(), {}, {}, + { + // renderTexture: ShaderReadOnlyOptimal -> TransferDst + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eShaderRead, vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eShaderReadOnlyOptimal, + vk::ImageLayout::eTransferDstOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, renderTexture->image.get(), + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil, 0, 1, 0, 1) + ), + } + ); + + static vk::ImageSubresourceRange depthStencilRanges[2] = { + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eDepth, 0, 1, 0, 1), + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eStencil, 0, 1, 0, 1)}; + + // Clear RenderTarget + getCurrentCommandBuffer().clearDepthStencilImage( + renderTexture->image.get(), vk::ImageLayout::eTransferDstOptimal, &clearDepthStencil, vk::componentCount(renderTexture->format), + depthStencilRanges + ); + + getCurrentCommandBuffer().pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllGraphics, vk::DependencyFlags(), {}, {}, + { + // renderTexture: TransferDst -> eShaderReadOnlyOptimal + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eShaderRead, vk::ImageLayout::eTransferDstOptimal, + vk::ImageLayout::eShaderReadOnlyOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, renderTexture->image.get(), + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil, 0, 1, 0, 1) + ), + } + ); } } From 225d2095b9cf1cedc945344c92c041e19cbfdd93 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Sat, 26 Aug 2023 22:11:44 -0700 Subject: [PATCH 43/45] Remove redundant semaphore wait Don't need this anymore now that we sync on the host anyways --- src/core/renderer_vk/renderer_vk.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index de36e2a3..cd90fd70 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -817,12 +817,6 @@ void RendererVK::display() { waitSemaphoreStages.emplace_back(waitStageMask); } - // Ensure a proper semaphore wait on render-finished - // We already wait on the fence, but this must be done to be compliant - // to validation layers - waitSemaphores.emplace_back(renderFinishedSemaphore[frameBufferingIndex].get()); - waitSemaphoreStages.emplace_back(vk::PipelineStageFlagBits::eColorAttachmentOutput); - submitInfo.setWaitSemaphores(waitSemaphores); submitInfo.setWaitDstStageMask(waitSemaphoreStages); } From 2f96c1d24da7f54ad2de210ae7edc215b47e04ac Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Sat, 26 Aug 2023 22:25:18 -0700 Subject: [PATCH 44/45] Fix Depth/Stencil render-target aspects Views must have a singular aspect set, while pipeline barriers need both aspects. Fixes more validation messages --- src/core/renderer_vk/renderer_vk.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index cd90fd70..6c6d9699 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -352,7 +352,7 @@ RendererVK::Texture& RendererVK::getDepthRenderTexture(u32 addr, PICA::DepthFmt textureInfo.setTiling(vk::ImageTiling::eOptimal); textureInfo.setUsage( vk::ImageUsageFlagBits::eDepthStencilAttachment | vk::ImageUsageFlagBits::eInputAttachment | vk::ImageUsageFlagBits::eTransferSrc | - vk::ImageUsageFlagBits::eTransferDst + vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled ); textureInfo.setSharingMode(vk::SharingMode::eExclusive); textureInfo.setInitialLayout(vk::ImageLayout::eUndefined); @@ -375,10 +375,6 @@ RendererVK::Texture& RendererVK::getDepthRenderTexture(u32 addr, PICA::DepthFmt // viewInfo.subresourceRange = vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil, 0, 1, 0, 1); viewInfo.subresourceRange = vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eDepth, 0, 1, 0, 1); - if (PICA::hasStencil(format)) { - viewInfo.subresourceRange.aspectMask |= vk::ImageAspectFlagBits::eStencil; - } - if (auto [result, imageMemory] = Vulkan::commitImageHeap(device.get(), physicalDevice, {&newTexture.image.get(), 1}); result == vk::Result::eSuccess) { newTexture.imageMemory = std::move(imageMemory); @@ -392,7 +388,10 @@ RendererVK::Texture& RendererVK::getDepthRenderTexture(u32 addr, PICA::DepthFmt Helpers::panic("Error creating depth render-texture: %s\n", vk::to_string(createResult.result).c_str()); } - // Initial layout transition + // Initial layout transition (depth and/or stencil) + if (vk::componentCount(newTexture.format) == 2) { + viewInfo.subresourceRange.aspectMask |= vk::ImageAspectFlagBits::eStencil; + } getCurrentCommandBuffer().pipelineBarrier( vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, vk::DependencyFlags{}, {}, {}, {vk::ImageMemoryBarrier( From c77740acf00317e62283379a64dc59d21d7fca26 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Sat, 26 Aug 2023 22:44:57 -0700 Subject: [PATCH 45/45] Fix render-texture size Use the width/height provided and not the current `fbSize`. Fixes a division exception. --- src/core/renderer_vk/renderer_vk.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index 6c6d9699..52c46668 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -265,7 +265,7 @@ RendererVK::Texture& RendererVK::getColorRenderTexture(u32 addr, PICA::ColorFmt Texture& newTexture = textureCache[renderTextureHash]; newTexture.loc = addr; newTexture.sizePerPixel = PICA::sizePerPixel(format); - newTexture.size = fbSize; + newTexture.size = {width, height}; newTexture.format = Vulkan::colorFormatToVulkan(format); @@ -338,7 +338,7 @@ RendererVK::Texture& RendererVK::getDepthRenderTexture(u32 addr, PICA::DepthFmt Texture& newTexture = textureCache[renderTextureHash]; newTexture.loc = addr; newTexture.sizePerPixel = PICA::sizePerPixel(format); - newTexture.size = fbSize; + newTexture.size = {width, height}; newTexture.format = Vulkan::depthFormatToVulkan(format);