diff --git a/CMakeLists.txt b/CMakeLists.txt index 68975209..a32382a5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -249,20 +249,48 @@ if(ENABLE_VULKAN) ) set(RENDERER_VK_INCLUDE_FILES include/renderer_vk/renderer_vk.hpp - include/renderer_vk/vulkan_api.hpp include/renderer_vk/vk_debug.hpp + include/renderer_vk/vk_api.hpp include/renderer_vk/vk_debug.hpp + include/renderer_vk/vk_descriptor_heap.hpp + include/renderer_vk/vk_descriptor_update_batch.hpp + include/renderer_vk/vk_sampler_cache.hpp + include/renderer_vk/vk_memory.hpp include/renderer_vk/vk_pica.hpp ) set(RENDERER_VK_SOURCE_FILES src/core/renderer_vk/renderer_vk.cpp - src/core/renderer_vk/vulkan_api.cpp src/core/renderer_vk/vk_debug.cpp + src/core/renderer_vk/vk_api.cpp src/core/renderer_vk/vk_debug.cpp + src/core/renderer_vk/vk_descriptor_heap.cpp + src/core/renderer_vk/vk_descriptor_update_batch.cpp + src/core/renderer_vk/vk_sampler_cache.cpp + src/core/renderer_vk/vk_memory.cpp src/core/renderer_vk/vk_pica.cpp ) set(HEADER_FILES ${HEADER_FILES} ${RENDERER_VK_INCLUDE_FILES}) source_group("Source Files\\Core\\Vulkan Renderer" FILES ${RENDERER_VK_SOURCE_FILES}) + + set(RENDERER_VK_HOST_SHADERS_SOURCE + "src/host_shaders/vulkan_display.frag" + "src/host_shaders/vulkan_display.vert" + ) + +foreach( HOST_SHADER_SOURCE ${RENDERER_VK_HOST_SHADERS_SOURCE} ) + get_filename_component( FILE_NAME ${HOST_SHADER_SOURCE} NAME ) + set( HOST_SHADER_SPIRV "${PROJECT_BINARY_DIR}/host_shaders/${FILE_NAME}.spv" ) + add_custom_command( + OUTPUT ${HOST_SHADER_SPIRV} + COMMAND ${CMAKE_COMMAND} -E make_directory "${PROJECT_BINARY_DIR}/host_shaders/" + COMMAND Vulkan::glslangValidator -t --target-env vulkan1.1 -g -V "${PROJECT_SOURCE_DIR}/${HOST_SHADER_SOURCE}" -o ${HOST_SHADER_SPIRV} + #COMMAND ${SPIRV_OPT} -O ${HOST_SHADER_SPIRV} -o ${HOST_SHADER_SPIRV} + DEPENDS ${HOST_SHADER_SOURCE} + ) + list( APPEND RENDERER_VK_HOST_SHADERS_SPIRV ${HOST_SHADER_SPIRV} ) +endforeach() + cmrc_add_resource_library( resources_renderer_vk NAMESPACE RendererVK - WHENCE "src/host_shaders/" + WHENCE "${PROJECT_BINARY_DIR}/host_shaders/" + ${RENDERER_VK_HOST_SHADERS_SPIRV} ) endif() diff --git a/include/renderer_vk/renderer_vk.hpp b/include/renderer_vk/renderer_vk.hpp index 59d8cdae..92007674 100644 --- a/include/renderer_vk/renderer_vk.hpp +++ b/include/renderer_vk/renderer_vk.hpp @@ -1,5 +1,12 @@ +#include +#include + +#include "math_util.hpp" #include "renderer.hpp" -#include "vulkan_api.hpp" +#include "vk_api.hpp" +#include "vk_descriptor_heap.hpp" +#include "vk_descriptor_update_batch.hpp" +#include "vk_sampler_cache.hpp" class GPU; @@ -10,7 +17,7 @@ class RendererVK final : public Renderer { vk::UniqueInstance instance = {}; vk::UniqueDebugUtilsMessengerEXT debugMessenger = {}; - vk::UniqueSurfaceKHR surface = {}; + vk::SurfaceKHR swapchainSurface = {}; vk::PhysicalDevice physicalDevice = {}; @@ -32,17 +39,74 @@ class RendererVK final : public Renderer { std::vector swapchainImages = {}; std::vector swapchainImageViews = {}; - // Per-swapchain-image data - // Each vector is `swapchainImageCount` in size - std::vector presentCommandBuffers = {}; + // This value is the degree of parallelism to allow multiple frames to be in-flight + // aka: "double-buffer"/"triple-buffering" + // Todo: make this a configuration option + static constexpr usize frameBufferingCount = 3; + + // Frame-buffering data + // Each vector is `frameBufferingCount` in size std::vector swapImageFreeSemaphore = {}; std::vector renderFinishedSemaphore = {}; std::vector frameFinishedFences = {}; + std::vector> frameFramebuffers = {}; + std::vector frameCommandBuffers = {}; + + const vk::CommandBuffer& getCurrentCommandBuffer() const { return frameCommandBuffers[frameBufferingIndex].get(); } + + // Todo: + // Use `{colourBuffer,depthBuffer}Loc` to maintain an std::map-cache of framebuffers + struct Texture { + u32 loc = 0; + u32 sizePerPixel = 0; + std::array size = {}; + + vk::Format format; + vk::UniqueImage image; + vk::UniqueDeviceMemory imageMemory; + vk::UniqueImageView imageView; + + Math::Rect getSubRect(u32 inputAddress, u32 width, u32 height) { + // PICA textures have top-left origin, same as Vulkan + const u32 startOffset = (inputAddress - loc) / sizePerPixel; + const u32 x0 = (startOffset % (size[0] * 8)) / 8; + const u32 y0 = (startOffset / (size[0] * 8)) * 8; + return Math::Rect{x0, y0, x0 + width, y0 + height}; + } + }; + // Hash(loc, size, format) -> Texture + std::map textureCache; + + Texture* findRenderTexture(u32 addr); + Texture& getColorRenderTexture(u32 addr, PICA::ColorFmt format, u32 width, u32 height); + Texture& getDepthRenderTexture(u32 addr, PICA::DepthFmt format, u32 width, u32 height); + + // Framebuffer for the top/bottom image + std::vector screenTexture = {}; + std::vector screenTextureViews = {}; + std::vector screenTextureFramebuffers = {}; + vk::UniqueDeviceMemory framebufferMemory = {}; + + std::map renderPassCache; + + vk::RenderPass getRenderPass(vk::Format colorFormat, std::optional depthFormat); + vk::RenderPass getRenderPass(PICA::ColorFmt colorFormat, std::optional depthFormat); + + std::unique_ptr descriptorUpdateBatch; + std::unique_ptr samplerCache; + + // Display pipeline data + std::unique_ptr displayDescriptorHeap; + vk::UniquePipeline displayPipeline; + vk::UniquePipelineLayout displayPipelineLayout; + std::vector topDisplayPipelineDescriptorSet; + std::vector bottomDisplayPipelineDescriptorSet; // Recreate the swapchain, possibly re-using the old one in the case of a resize vk::Result recreateSwapchain(vk::SurfaceKHR surface, vk::Extent2D swapchainExtent); - u64 currentFrame = 0; + u64 frameBufferingIndex = 0; + public: RendererVK(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs); ~RendererVK() override; diff --git a/include/renderer_vk/vulkan_api.hpp b/include/renderer_vk/vk_api.hpp similarity index 100% rename from include/renderer_vk/vulkan_api.hpp rename to include/renderer_vk/vk_api.hpp diff --git a/include/renderer_vk/vk_debug.hpp b/include/renderer_vk/vk_debug.hpp index afc367dc..ed712269 100644 --- a/include/renderer_vk/vk_debug.hpp +++ b/include/renderer_vk/vk_debug.hpp @@ -4,7 +4,7 @@ #include #include -#include "vulkan_api.hpp" +#include "vk_api.hpp" namespace Vulkan { diff --git a/include/renderer_vk/vk_descriptor_heap.hpp b/include/renderer_vk/vk_descriptor_heap.hpp new file mode 100644 index 00000000..8a9630e3 --- /dev/null +++ b/include/renderer_vk/vk_descriptor_heap.hpp @@ -0,0 +1,49 @@ +#pragma once + +#include +#include + +#include "helpers.hpp" +#include "vk_api.hpp" + +namespace Vulkan { + // Implements a basic heap of descriptor sets given a layout of particular + // bindings. Create a descriptor set by providing a list of bindings and it will + // automatically create both the pool, layout, and maintail a heap of descriptor + // sets. Descriptor sets will be reused and recycled. Assume that newly + // allocated descriptor sets are in an undefined state. + class DescriptorHeap { + private: + const vk::Device device; + + vk::UniqueDescriptorPool descriptorPool; + vk::UniqueDescriptorSetLayout descriptorSetLayout; + std::vector descriptorSets; + + std::vector bindings; + + std::vector allocationMap; + + explicit DescriptorHeap(vk::Device device); + + public: + ~DescriptorHeap() = default; + + DescriptorHeap(DescriptorHeap&&) = default; + + const vk::DescriptorPool& getDescriptorPool() const { return descriptorPool.get(); }; + + const vk::DescriptorSetLayout& getDescriptorSetLayout() const { return descriptorSetLayout.get(); }; + + const std::span getDescriptorSets() const { return descriptorSets; }; + + std::span getBindings() const { return bindings; }; + + std::optional allocateDescriptorSet(); + bool freeDescriptorSet(vk::DescriptorSet set); + + static std::optional create( + vk::Device device, std::span bindings, u16 descriptorHeapCount = 1024 + ); + }; +} // namespace Vulkan \ No newline at end of file diff --git a/include/renderer_vk/vk_descriptor_update_batch.hpp b/include/renderer_vk/vk_descriptor_update_batch.hpp new file mode 100644 index 00000000..1a10214d --- /dev/null +++ b/include/renderer_vk/vk_descriptor_update_batch.hpp @@ -0,0 +1,62 @@ +#pragma once + +#include +#include +#include + +#include "helpers.hpp" +#include "vk_api.hpp" + +namespace Vulkan { + // Implements a re-usable structure for batching up descriptor writes with a + // finite amount of space for both convenience and to reduce the overall amount + // of API calls to `vkUpdateDescriptorSets` + class DescriptorUpdateBatch { + private: + const vk::Device device; + + const usize descriptorWriteMax; + const usize descriptorCopyMax; + + using DescriptorInfoUnion = std::variant; + + // Todo: Maybe some kind of hash so that these structures can be re-used + // among descriptor writes. + std::unique_ptr descriptorInfos; + std::unique_ptr descriptorWrites; + std::unique_ptr descriptorCopies; + + usize descriptorWriteEnd = 0; + usize descriptorCopyEnd = 0; + + DescriptorUpdateBatch(vk::Device device, usize descriptorWriteMax, usize descriptorCopyMax) + : device(device), descriptorWriteMax(descriptorWriteMax), descriptorCopyMax(descriptorCopyMax) {} + + public: + ~DescriptorUpdateBatch() = default; + + DescriptorUpdateBatch(DescriptorUpdateBatch&&) = default; + + void flush(); + + void addImage( + vk::DescriptorSet targetDescriptor, u8 targetBinding, vk::ImageView imageView, vk::ImageLayout imageLayout = vk::ImageLayout::eGeneral + ); + void addSampler(vk::DescriptorSet targetDescriptor, u8 targetBinding, vk::Sampler sampler); + + void addImageSampler( + vk::DescriptorSet targetDescriptor, u8 targetBinding, vk::ImageView imageView, vk::Sampler sampler, + vk::ImageLayout imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal + ); + void addBuffer( + vk::DescriptorSet targetDescriptor, u8 targetBinding, vk::Buffer buffer, vk::DeviceSize offset, vk::DeviceSize size = VK_WHOLE_SIZE + ); + + void copyBinding( + vk::DescriptorSet sourceDescriptor, vk::DescriptorSet targetDescriptor, u8 sourceBinding, u8 targetBinding, u8 sourceArrayElement = 0, + u8 targetArrayElement = 0, u8 descriptorCount = 1 + ); + + static std::optional create(vk::Device device, usize descriptorWriteMax = 256, usize descriptorCopyMax = 256); + }; +} // namespace Vulkan \ No newline at end of file diff --git a/include/renderer_vk/vk_memory.hpp b/include/renderer_vk/vk_memory.hpp new file mode 100644 index 00000000..a84a5720 --- /dev/null +++ b/include/renderer_vk/vk_memory.hpp @@ -0,0 +1,36 @@ +#pragma once + +#include +#include +#include + +#include "helpers.hpp" +#include "vk_api.hpp" + +namespace Vulkan { + + // Will try to find a memory type that is suitable for the given requirements. + // Returns -1 if no suitable memory type was found. + s32 findMemoryTypeIndex( + vk::PhysicalDevice physicalDevice, u32 memoryTypeMask, vk::MemoryPropertyFlags memoryProperties, + vk::MemoryPropertyFlags memoryExcludeProperties = vk::MemoryPropertyFlagBits::eProtected + ); + + // Given an array of valid Vulkan image-handles or buffer-handles, these + // functions will allocate a single block of device-memory for all of them + // and bind them consecutively. + // There may be a case that all the buffers or images cannot be allocated + // to the same device memory due to their required memory-type. + std::tuple commitImageHeap( + vk::Device device, vk::PhysicalDevice physicalDevice, const std::span images, + vk::MemoryPropertyFlags memoryProperties = vk::MemoryPropertyFlagBits::eDeviceLocal, + vk::MemoryPropertyFlags memoryExcludeProperties = vk::MemoryPropertyFlagBits::eProtected + ); + + std::tuple commitBufferHeap( + vk::Device device, vk::PhysicalDevice physicalDevice, const std::span buffers, + vk::MemoryPropertyFlags memoryProperties = vk::MemoryPropertyFlagBits::eDeviceLocal, + vk::MemoryPropertyFlags memoryExcludeProperties = vk::MemoryPropertyFlagBits::eProtected + ); + +} // namespace Vulkan \ No newline at end of file diff --git a/include/renderer_vk/vk_pica.hpp b/include/renderer_vk/vk_pica.hpp new file mode 100644 index 00000000..affd3aa8 --- /dev/null +++ b/include/renderer_vk/vk_pica.hpp @@ -0,0 +1,12 @@ +#pragma once + +#include "PICA/gpu.hpp" +#include "helpers.hpp" +#include "vk_api.hpp" + +namespace Vulkan { + + vk::Format colorFormatToVulkan(PICA::ColorFmt colorFormat); + vk::Format depthFormatToVulkan(PICA::DepthFmt depthFormat); + +} // namespace Vulkan \ No newline at end of file diff --git a/include/renderer_vk/vk_sampler_cache.hpp b/include/renderer_vk/vk_sampler_cache.hpp new file mode 100644 index 00000000..8cb27689 --- /dev/null +++ b/include/renderer_vk/vk_sampler_cache.hpp @@ -0,0 +1,28 @@ +#pragma once + +#include +#include + +#include "helpers.hpp" +#include "vk_api.hpp" + +namespace Vulkan { + // Implements a simple pool of reusable sampler objects + class SamplerCache { + private: + const vk::Device device; + + std::unordered_map samplerMap; + + explicit SamplerCache(vk::Device device); + + public: + ~SamplerCache() = default; + + SamplerCache(SamplerCache&&) = default; + + const vk::Sampler& getSampler(const vk::SamplerCreateInfo& samplerInfo); + + static std::optional create(vk::Device device); + }; +} // namespace Vulkan \ No newline at end of file diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index 4ec70412..52c46668 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -1,5 +1,6 @@ #include "renderer_vk/renderer_vk.hpp" +#include #include #include #include @@ -7,6 +8,203 @@ #include "SDL_vulkan.h" #include "helpers.hpp" #include "renderer_vk/vk_debug.hpp" +#include "renderer_vk/vk_memory.hpp" +#include "renderer_vk/vk_pica.hpp" + +CMRC_DECLARE(RendererVK); + +static vk::SamplerCreateInfo sampler2D(bool filtered = true, bool clamp = false) { + vk::SamplerCreateInfo samplerInfo = {}; + samplerInfo.magFilter = filtered ? vk::Filter::eLinear : vk::Filter::eNearest; + samplerInfo.minFilter = filtered ? vk::Filter::eLinear : vk::Filter::eNearest; + + samplerInfo.mipmapMode = vk::SamplerMipmapMode::eLinear; + + samplerInfo.addressModeU = clamp ? vk::SamplerAddressMode::eClampToEdge : vk::SamplerAddressMode::eRepeat; + samplerInfo.addressModeV = clamp ? vk::SamplerAddressMode::eClampToEdge : vk::SamplerAddressMode::eRepeat; + samplerInfo.addressModeW = clamp ? vk::SamplerAddressMode::eClampToEdge : vk::SamplerAddressMode::eRepeat; + + samplerInfo.mipLodBias = 0.0f; + samplerInfo.anisotropyEnable = VK_FALSE; + samplerInfo.maxAnisotropy = 16.0f; + + samplerInfo.compareEnable = VK_FALSE; + samplerInfo.compareOp = vk::CompareOp::eAlways; + + samplerInfo.minLod = 0.0f; + samplerInfo.maxLod = VK_LOD_CLAMP_NONE; + samplerInfo.borderColor = vk::BorderColor::eFloatTransparentBlack; + samplerInfo.unnormalizedCoordinates = VK_FALSE; + return samplerInfo; +} + +static vk::UniqueShaderModule createShaderModule(vk::Device device, std::span shaderCode) { + vk::ShaderModuleCreateInfo shaderModuleInfo = {}; + shaderModuleInfo.pCode = reinterpret_cast(shaderCode.data()); + shaderModuleInfo.codeSize = shaderCode.size(); + + vk::UniqueShaderModule shaderModule = {}; + if (auto createResult = device.createShaderModuleUnique(shaderModuleInfo); createResult.result == vk::Result::eSuccess) { + shaderModule = std::move(createResult.value); + } else { + Helpers::panic("Error creating shader module: %s\n", vk::to_string(createResult.result).c_str()); + } + return shaderModule; +} + +static inline vk::UniqueShaderModule createShaderModule(vk::Device device, cmrc::file shaderFile) { + return createShaderModule(device, std::span(reinterpret_cast(shaderFile.begin()), shaderFile.size())); +} + +std::tuple createGraphicsPipeline( + vk::Device device, std::span pushConstants, std::span setLayouts, + vk::ShaderModule vertModule, vk::ShaderModule fragModule, std::span vertexBindingDescriptions, + std::span vertexAttributeDescriptions, vk::RenderPass renderPass +) { + // Create Pipeline Layout + vk::PipelineLayoutCreateInfo graphicsPipelineLayoutInfo = {}; + + graphicsPipelineLayoutInfo.pSetLayouts = setLayouts.data(); + graphicsPipelineLayoutInfo.setLayoutCount = setLayouts.size(); + graphicsPipelineLayoutInfo.pPushConstantRanges = pushConstants.data(); + graphicsPipelineLayoutInfo.pushConstantRangeCount = pushConstants.size(); + + vk::UniquePipelineLayout graphicsPipelineLayout = {}; + if (auto createResult = device.createPipelineLayoutUnique(graphicsPipelineLayoutInfo); createResult.result == vk::Result::eSuccess) { + graphicsPipelineLayout = std::move(createResult.value); + } else { + Helpers::panic("Error creating pipeline layout: %s\n", vk::to_string(createResult.result).c_str()); + return {}; + } + + // Describe the stage and entry point of each shader + const vk::PipelineShaderStageCreateInfo ShaderStagesInfo[2] = { + vk::PipelineShaderStageCreateInfo( + {}, // Flags + vk::ShaderStageFlagBits::eVertex, // Shader Stage + vertModule, // Shader Module + "main", // Shader entry point function name + {} // Shader specialization info + ), + vk::PipelineShaderStageCreateInfo( + {}, // Flags + vk::ShaderStageFlagBits::eFragment, // Shader Stage + fragModule, // Shader Module + "main", // Shader entry point function name + {} // Shader specialization info + ), + }; + + vk::PipelineVertexInputStateCreateInfo vertexInputState = {}; + + vertexInputState.vertexBindingDescriptionCount = vertexBindingDescriptions.size(); + vertexInputState.pVertexBindingDescriptions = vertexBindingDescriptions.data(); + + vertexInputState.vertexAttributeDescriptionCount = vertexAttributeDescriptions.size(); + vertexInputState.pVertexAttributeDescriptions = vertexAttributeDescriptions.data(); + + vk::PipelineInputAssemblyStateCreateInfo inputAssemblyState = {}; + inputAssemblyState.topology = vk::PrimitiveTopology::eTriangleList; + inputAssemblyState.primitiveRestartEnable = false; + + vk::PipelineViewportStateCreateInfo viewportState = {}; + + static const vk::Viewport defaultViewport = {0, 0, 16, 16, 0.0f, 1.0f}; + static const vk::Rect2D defaultScissor = {{0, 0}, {16, 16}}; + viewportState.viewportCount = 1; + viewportState.pViewports = &defaultViewport; + viewportState.scissorCount = 1; + viewportState.pScissors = &defaultScissor; + + vk::PipelineRasterizationStateCreateInfo rasterizationState = {}; + + rasterizationState.depthClampEnable = false; + rasterizationState.rasterizerDiscardEnable = false; + rasterizationState.polygonMode = vk::PolygonMode::eFill; + rasterizationState.cullMode = vk::CullModeFlagBits::eBack; + rasterizationState.frontFace = vk::FrontFace::eClockwise; + rasterizationState.depthBiasEnable = false; + rasterizationState.depthBiasConstantFactor = 0.0f; + rasterizationState.depthBiasClamp = 0.0f; + rasterizationState.depthBiasSlopeFactor = 0.0; + rasterizationState.lineWidth = 1.0f; + + vk::PipelineMultisampleStateCreateInfo multisampleState = {}; + + multisampleState.rasterizationSamples = vk::SampleCountFlagBits::e1; + multisampleState.sampleShadingEnable = false; + multisampleState.minSampleShading = 1.0f; + multisampleState.pSampleMask = nullptr; + multisampleState.alphaToCoverageEnable = true; + multisampleState.alphaToOneEnable = false; + + vk::PipelineDepthStencilStateCreateInfo depthStencilState = {}; + + depthStencilState.depthTestEnable = false; + depthStencilState.depthWriteEnable = false; + depthStencilState.depthCompareOp = vk::CompareOp::eLessOrEqual; + depthStencilState.depthBoundsTestEnable = false; + depthStencilState.stencilTestEnable = false; + depthStencilState.front = vk::StencilOp::eKeep; + depthStencilState.back = vk::StencilOp::eKeep; + depthStencilState.minDepthBounds = 0.0f; + depthStencilState.maxDepthBounds = 1.0f; + + vk::PipelineColorBlendStateCreateInfo colorBlendState = {}; + + colorBlendState.logicOpEnable = false; + colorBlendState.logicOp = vk::LogicOp::eClear; + colorBlendState.attachmentCount = 1; + + vk::PipelineColorBlendAttachmentState blendAttachmentState = {}; + + blendAttachmentState.blendEnable = false; + blendAttachmentState.srcColorBlendFactor = vk::BlendFactor::eZero; + blendAttachmentState.dstColorBlendFactor = vk::BlendFactor::eZero; + blendAttachmentState.colorBlendOp = vk::BlendOp::eAdd; + blendAttachmentState.srcAlphaBlendFactor = vk::BlendFactor::eZero; + blendAttachmentState.dstAlphaBlendFactor = vk::BlendFactor::eZero; + blendAttachmentState.alphaBlendOp = vk::BlendOp::eAdd; + blendAttachmentState.colorWriteMask = + vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA; + + colorBlendState.pAttachments = &blendAttachmentState; + + vk::PipelineDynamicStateCreateInfo dynamicState = {}; + static vk::DynamicState dynamicStates[] = {// The viewport and scissor of the framebuffer will be dynamic at + // run-time + vk::DynamicState::eViewport, vk::DynamicState::eScissor}; + dynamicState.dynamicStateCount = std::size(dynamicStates); + dynamicState.pDynamicStates = dynamicStates; + + vk::GraphicsPipelineCreateInfo renderPipelineInfo = {}; + + renderPipelineInfo.stageCount = 2; // Vert + Frag stages + renderPipelineInfo.pStages = ShaderStagesInfo; + renderPipelineInfo.pVertexInputState = &vertexInputState; + renderPipelineInfo.pInputAssemblyState = &inputAssemblyState; + renderPipelineInfo.pViewportState = &viewportState; + renderPipelineInfo.pRasterizationState = &rasterizationState; + renderPipelineInfo.pMultisampleState = &multisampleState; + renderPipelineInfo.pDepthStencilState = &depthStencilState; + renderPipelineInfo.pColorBlendState = &colorBlendState; + renderPipelineInfo.pDynamicState = &dynamicState; + renderPipelineInfo.subpass = 0; + renderPipelineInfo.renderPass = renderPass; + renderPipelineInfo.layout = graphicsPipelineLayout.get(); + + // Create Pipeline + vk::UniquePipeline pipeline = {}; + + if (auto createResult = device.createGraphicsPipelineUnique({}, renderPipelineInfo); createResult.result == vk::Result::eSuccess) { + pipeline = std::move(createResult.value); + } else { + Helpers::panic("Error creating graphics pipeline: %s\n", vk::to_string(createResult.result).c_str()); + return {}; + } + + return std::make_tuple(std::move(pipeline), std::move(graphicsPipelineLayout)); +} // Finds the first queue family that satisfies `queueMask` and excludes `queueExcludeMask` bits // Returns -1 if not found @@ -23,6 +221,274 @@ static s32 findQueueFamily( return -1; } +static u32 rotl32(u32 x, u32 n) { return (x << n) | (x >> (32 - n)); } +static u32 ror32(u32 x, u32 n) { return (x >> n) | (x << (32 - n)); } + +// Lower 32 bits is the format + size, upper 32-bits is the address +static u64 colorBufferHash(u32 loc, u32 size, PICA::ColorFmt format) { + return (static_cast(loc) << 32) | (ror32(size, 23) ^ static_cast(format)); +} +static u64 depthBufferHash(u32 loc, u32 size, PICA::DepthFmt format) { + return (static_cast(loc) << 32) | (ror32(size, 29) ^ static_cast(format)); +} + +RendererVK::Texture* RendererVK::findRenderTexture(u32 addr) { + // Find first render-texture hash that is >= to addr + auto match = textureCache.lower_bound(static_cast(addr) << 32); + + if (match == textureCache.end()) { + // Not found + return nullptr; + } + + Texture* texture = &match->second; + + const usize sizeInBytes = texture->size[0] * texture->size[1] * texture->sizePerPixel; + + // Ensure this address is within the span of the texture + if ((addr - match->second.loc) <= sizeInBytes) { + return texture; + } + + return nullptr; +} + +RendererVK::Texture& RendererVK::getColorRenderTexture(u32 addr, PICA::ColorFmt format, u32 width, u32 height) { + const u64 renderTextureHash = colorBufferHash(addr, width * height * PICA::sizePerPixel(format), format); + + // Cache hit + if (textureCache.contains(renderTextureHash)) { + return textureCache.at(renderTextureHash); + } + + // Cache miss + Texture& newTexture = textureCache[renderTextureHash]; + newTexture.loc = addr; + newTexture.sizePerPixel = PICA::sizePerPixel(format); + newTexture.size = {width, height}; + + newTexture.format = Vulkan::colorFormatToVulkan(format); + + vk::ImageCreateInfo textureInfo = {}; + textureInfo.setImageType(vk::ImageType::e2D); + textureInfo.setFormat(newTexture.format); + textureInfo.setExtent(vk::Extent3D(width, height, 1)); + textureInfo.setMipLevels(1); + textureInfo.setArrayLayers(1); + textureInfo.setSamples(vk::SampleCountFlagBits::e1); + textureInfo.setTiling(vk::ImageTiling::eOptimal); + textureInfo.setUsage( + vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eInputAttachment | vk::ImageUsageFlagBits::eTransferSrc | + vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled + ); + textureInfo.setSharingMode(vk::SharingMode::eExclusive); + textureInfo.setInitialLayout(vk::ImageLayout::eUndefined); + + if (auto createResult = device->createImageUnique(textureInfo); createResult.result == vk::Result::eSuccess) { + newTexture.image = std::move(createResult.value); + } else { + Helpers::panic("Error creating color render-texture image: %s\n", vk::to_string(createResult.result).c_str()); + } + + Vulkan::setObjectName( + device.get(), newTexture.image.get(), "TextureCache:%08x %ux%u %s", addr, width, height, vk::to_string(textureInfo.format).c_str() + ); + + vk::ImageViewCreateInfo viewInfo = {}; + viewInfo.image = newTexture.image.get(); + viewInfo.viewType = vk::ImageViewType::e2D; + viewInfo.format = newTexture.format; + viewInfo.components = vk::ComponentMapping(); + viewInfo.subresourceRange = vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1); + + if (auto [result, imageMemory] = Vulkan::commitImageHeap(device.get(), physicalDevice, {&newTexture.image.get(), 1}); + result == vk::Result::eSuccess) { + newTexture.imageMemory = std::move(imageMemory); + } else { + Helpers::panic("Error allocating color render-texture memory: %s\n", vk::to_string(result).c_str()); + } + + if (auto createResult = device->createImageViewUnique(viewInfo); createResult.result == vk::Result::eSuccess) { + newTexture.imageView = std::move(createResult.value); + } else { + Helpers::panic("Error creating color render-texture: %s\n", vk::to_string(createResult.result).c_str()); + } + + // Initial layout transition + getCurrentCommandBuffer().pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, vk::DependencyFlags{}, {}, {}, + {vk::ImageMemoryBarrier( + vk::AccessFlagBits::eMemoryWrite, vk::AccessFlagBits::eShaderRead, vk::ImageLayout::eUndefined, vk::ImageLayout::eShaderReadOnlyOptimal, + VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, newTexture.image.get(), viewInfo.subresourceRange + )} + ); + + return newTexture; +} + +RendererVK::Texture& RendererVK::getDepthRenderTexture(u32 addr, PICA::DepthFmt format, u32 width, u32 height) { + const u64 renderTextureHash = depthBufferHash(addr, width * height * PICA::sizePerPixel(format), format); + + // Cache hit + if (textureCache.contains(renderTextureHash)) { + return textureCache.at(renderTextureHash); + } + + // Cache miss + Texture& newTexture = textureCache[renderTextureHash]; + newTexture.loc = addr; + newTexture.sizePerPixel = PICA::sizePerPixel(format); + newTexture.size = {width, height}; + + newTexture.format = Vulkan::depthFormatToVulkan(format); + + vk::ImageCreateInfo textureInfo = {}; + textureInfo.setImageType(vk::ImageType::e2D); + textureInfo.setFormat(newTexture.format); + textureInfo.setExtent(vk::Extent3D(width, height, 1)); + textureInfo.setMipLevels(1); + textureInfo.setArrayLayers(1); + textureInfo.setSamples(vk::SampleCountFlagBits::e1); + textureInfo.setTiling(vk::ImageTiling::eOptimal); + textureInfo.setUsage( + vk::ImageUsageFlagBits::eDepthStencilAttachment | vk::ImageUsageFlagBits::eInputAttachment | vk::ImageUsageFlagBits::eTransferSrc | + vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled + ); + textureInfo.setSharingMode(vk::SharingMode::eExclusive); + textureInfo.setInitialLayout(vk::ImageLayout::eUndefined); + + if (auto createResult = device->createImageUnique(textureInfo); createResult.result == vk::Result::eSuccess) { + newTexture.image = std::move(createResult.value); + } else { + Helpers::panic("Error creating depth render-texture image: %s\n", vk::to_string(createResult.result).c_str()); + } + + Vulkan::setObjectName( + device.get(), newTexture.image.get(), "TextureCache:%08x %ux%u %s", addr, width, height, vk::to_string(textureInfo.format).c_str() + ); + + vk::ImageViewCreateInfo viewInfo = {}; + viewInfo.image = newTexture.image.get(); + viewInfo.viewType = vk::ImageViewType::e2D; + viewInfo.format = newTexture.format; + viewInfo.components = vk::ComponentMapping(); + // viewInfo.subresourceRange = vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil, 0, 1, 0, 1); + viewInfo.subresourceRange = vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eDepth, 0, 1, 0, 1); + + if (auto [result, imageMemory] = Vulkan::commitImageHeap(device.get(), physicalDevice, {&newTexture.image.get(), 1}); + result == vk::Result::eSuccess) { + newTexture.imageMemory = std::move(imageMemory); + } else { + Helpers::panic("Error allocating depth render-texture memory: %s\n", vk::to_string(result).c_str()); + } + + if (auto createResult = device->createImageViewUnique(viewInfo); createResult.result == vk::Result::eSuccess) { + newTexture.imageView = std::move(createResult.value); + } else { + Helpers::panic("Error creating depth render-texture: %s\n", vk::to_string(createResult.result).c_str()); + } + + // Initial layout transition (depth and/or stencil) + if (vk::componentCount(newTexture.format) == 2) { + viewInfo.subresourceRange.aspectMask |= vk::ImageAspectFlagBits::eStencil; + } + getCurrentCommandBuffer().pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, vk::DependencyFlags{}, {}, {}, + {vk::ImageMemoryBarrier( + vk::AccessFlagBits::eMemoryWrite, vk::AccessFlagBits::eShaderRead, vk::ImageLayout::eUndefined, vk::ImageLayout::eShaderReadOnlyOptimal, + VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, newTexture.image.get(), viewInfo.subresourceRange + )} + ); + + return newTexture; +} + +vk::RenderPass RendererVK::getRenderPass(vk::Format colorFormat, std::optional depthFormat) { + u64 renderPassHash = static_cast(colorFormat); + + if (depthFormat.has_value()) { + renderPassHash |= (static_cast(depthFormat.value()) << 32); + } + + // Cache hit + if (renderPassCache.contains(renderPassHash)) { + return renderPassCache.at(renderPassHash).get(); + } + + // Cache miss + vk::RenderPassCreateInfo renderPassInfo = {}; + vk::SubpassDescription subPass = {}; + + std::vector renderPassAttachments = {}; + + vk::AttachmentDescription colorAttachment = {}; + colorAttachment.format = colorFormat; + colorAttachment.samples = vk::SampleCountFlagBits::e1; + colorAttachment.loadOp = vk::AttachmentLoadOp::eLoad; + colorAttachment.storeOp = vk::AttachmentStoreOp::eStore; + colorAttachment.stencilLoadOp = vk::AttachmentLoadOp::eLoad; + colorAttachment.stencilStoreOp = vk::AttachmentStoreOp::eStore; + colorAttachment.initialLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + colorAttachment.finalLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + renderPassAttachments.emplace_back(colorAttachment); + + if (depthFormat.has_value()) { + vk::AttachmentDescription depthAttachment = {}; + depthAttachment.format = depthFormat.value(); + depthAttachment.samples = vk::SampleCountFlagBits::e1; + depthAttachment.loadOp = vk::AttachmentLoadOp::eLoad; + depthAttachment.storeOp = vk::AttachmentStoreOp::eStore; + depthAttachment.stencilLoadOp = vk::AttachmentLoadOp::eLoad; + depthAttachment.stencilStoreOp = vk::AttachmentStoreOp::eStore; + depthAttachment.initialLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + depthAttachment.finalLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + renderPassAttachments.emplace_back(depthAttachment); + } + + renderPassInfo.setAttachments(renderPassAttachments); + + static const vk::AttachmentReference colorAttachmentReference = {0, vk::ImageLayout::eColorAttachmentOptimal}; + static const vk::AttachmentReference depthAttachmentReference = {1, vk::ImageLayout::eDepthStencilReadOnlyOptimal}; + + subPass.setColorAttachments(colorAttachmentReference); + if (depthFormat.has_value()) { + subPass.setPDepthStencilAttachment(&depthAttachmentReference); + } + + subPass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; + + renderPassInfo.setSubpasses(subPass); + + // We only have one sub-pass and we want all render-passes to be sequential, + // so input/output depends on VK_SUBPASS_EXTERNAL + static const vk::SubpassDependency subpassDependencies[2] = { + vk::SubpassDependency( + VK_SUBPASS_EXTERNAL, 0, vk::PipelineStageFlagBits::eAllGraphics, vk::PipelineStageFlagBits::eAllGraphics, + vk::AccessFlagBits::eColorAttachmentWrite, vk::AccessFlagBits::eColorAttachmentWrite, vk::DependencyFlagBits::eByRegion + ), + vk::SubpassDependency( + 0, VK_SUBPASS_EXTERNAL, vk::PipelineStageFlagBits::eAllGraphics, vk::PipelineStageFlagBits::eAllGraphics, + vk::AccessFlagBits::eColorAttachmentWrite, vk::AccessFlagBits::eColorAttachmentWrite, vk::DependencyFlagBits::eByRegion + )}; + + renderPassInfo.setDependencies(subpassDependencies); + + if (auto createResult = device->createRenderPassUnique(renderPassInfo); createResult.result == vk::Result::eSuccess) { + return (renderPassCache[renderPassHash] = std::move(createResult.value)).get(); + } else { + Helpers::panic("Error creating render pass: %s\n", vk::to_string(createResult.result).c_str()); + } + return {}; +} + +vk::RenderPass RendererVK::getRenderPass(PICA::ColorFmt colorFormat, std::optional depthFormat) { + if (depthFormat.has_value()) { + return getRenderPass(Vulkan::colorFormatToVulkan(colorFormat), Vulkan::depthFormatToVulkan(depthFormat.value())); + } else { + return getRenderPass(Vulkan::colorFormatToVulkan(colorFormat), {}); + } +} + vk::Result RendererVK::recreateSwapchain(vk::SurfaceKHR surface, vk::Extent2D swapchainExtent) { static constexpr u32 screenTextureWidth = 400; // Top screen is 400 pixels wide, bottom is 320 static constexpr u32 screenTextureHeight = 2 * 240; // Both screens are 240 pixels tall @@ -69,7 +535,7 @@ vk::Result RendererVK::recreateSwapchain(vk::SurfaceKHR surface, vk::Extent2D sw // Fifo support is required by all vulkan implementations, waits for vsync vk::PresentModeKHR swapchainPresentMode = vk::PresentModeKHR::eFifo; if (auto getResult = physicalDevice.getSurfacePresentModesKHR(surface); getResult.result == vk::Result::eSuccess) { - std::vector& presentModes = getResult.value; + const std::vector& presentModes = getResult.value; // Use mailbox if available, lowest-latency vsync-enabled mode if (std::find(presentModes.begin(), presentModes.end(), vk::PresentModeKHR::eMailbox) != presentModes.end()) { @@ -155,48 +621,6 @@ vk::Result RendererVK::recreateSwapchain(vk::SurfaceKHR surface, vk::Extent2D sw Helpers::panic("Error creating acquiring swapchain images: %s\n", vk::to_string(getResult.result).c_str()); } - // Swapchain Command buffer(s) - vk::CommandBufferAllocateInfo commandBuffersInfo = {}; - commandBuffersInfo.commandPool = commandPool.get(); - commandBuffersInfo.level = vk::CommandBufferLevel::ePrimary; - commandBuffersInfo.commandBufferCount = swapchainImageCount; - - if (auto allocateResult = device->allocateCommandBuffersUnique(commandBuffersInfo); allocateResult.result == vk::Result::eSuccess) { - presentCommandBuffers = std::move(allocateResult.value); - } else { - Helpers::panic("Error allocating command buffer: %s\n", vk::to_string(allocateResult.result).c_str()); - } - - // Swapchain synchronization primitives - vk::FenceCreateInfo fenceInfo = {}; - fenceInfo.flags = vk::FenceCreateFlagBits::eSignaled; - - vk::SemaphoreCreateInfo semaphoreInfo = {}; - - swapImageFreeSemaphore.resize(swapchainImageCount); - renderFinishedSemaphore.resize(swapchainImageCount); - frameFinishedFences.resize(swapchainImageCount); - - for (usize i = 0; i < swapchainImageCount; i++) { - if (auto createResult = device->createSemaphoreUnique(semaphoreInfo); createResult.result == vk::Result::eSuccess) { - swapImageFreeSemaphore[i] = std::move(createResult.value); - } else { - Helpers::panic("Error creating 'present-ready' semaphore: %s\n", vk::to_string(createResult.result).c_str()); - } - - if (auto createResult = device->createSemaphoreUnique(semaphoreInfo); createResult.result == vk::Result::eSuccess) { - renderFinishedSemaphore[i] = std::move(createResult.value); - } else { - Helpers::panic("Error creating 'post-render' semaphore: %s\n", vk::to_string(createResult.result).c_str()); - } - - if (auto createResult = device->createFenceUnique(fenceInfo); createResult.result == vk::Result::eSuccess) { - frameFinishedFences[i] = std::move(createResult.value); - } else { - Helpers::panic("Error creating 'present-ready' semaphore: %s\n", vk::to_string(createResult.result).c_str()); - } - } - return vk::Result::eSuccess; } @@ -205,136 +629,260 @@ RendererVK::RendererVK(GPU& gpu, const std::array& internalRegs, co RendererVK::~RendererVK() {} -void RendererVK::reset() {} +void RendererVK::reset() { renderPassCache.clear(); } void RendererVK::display() { - // Block, on the CPU, to ensure that this swapchain-frame is ready for more work - if (auto waitResult = device->waitForFences({frameFinishedFences[currentFrame].get()}, true, std::numeric_limits::max()); - waitResult != vk::Result::eSuccess) { - Helpers::panic("Error waiting on swapchain fence: %s\n", vk::to_string(waitResult).c_str()); - } - - u32 swapchainImageIndex = std::numeric_limits::max(); - if (const auto acquireResult = - device->acquireNextImageKHR(swapchain.get(), std::numeric_limits::max(), swapImageFreeSemaphore[currentFrame].get(), {}); - acquireResult.result == vk::Result::eSuccess) { - swapchainImageIndex = acquireResult.value; - } else { - switch (acquireResult.result) { - case vk::Result::eSuboptimalKHR: - case vk::Result::eErrorOutOfDateKHR: { - // Surface resized - vk::Extent2D swapchainExtent; - { - int windowWidth, windowHeight; - // Block until we have a valid surface-area to present to - // Usually this is because the window has been minimized - // Todo: We should still be rendering even without a valid swapchain - do { - SDL_Vulkan_GetDrawableSize(targetWindow, &windowWidth, &windowHeight); - } while (!windowWidth || !windowHeight); - swapchainExtent.width = windowWidth; - swapchainExtent.height = windowHeight; + // Get the next available swapchain image, and signal the semaphore when it's ready + static constexpr u32 swapchainImageInvalid = std::numeric_limits::max(); + u32 swapchainImageIndex = swapchainImageInvalid; + if (swapchain) { + if (const auto acquireResult = + device->acquireNextImageKHR(swapchain.get(), std::numeric_limits::max(), swapImageFreeSemaphore[frameBufferingIndex].get(), {}); + acquireResult.result == vk::Result::eSuccess) { + swapchainImageIndex = acquireResult.value; + } else { + switch (acquireResult.result) { + case vk::Result::eSuboptimalKHR: + case vk::Result::eErrorOutOfDateKHR: { + // Surface resized + vk::Extent2D swapchainExtent; + { + int windowWidth, windowHeight; + // Block until we have a valid surface-area to present to + // Usually this is because the window has been minimized + // Todo: We should still be rendering even without a valid swapchain + do { + SDL_Vulkan_GetDrawableSize(targetWindow, &windowWidth, &windowHeight); + } while (!windowWidth || !windowHeight); + swapchainExtent.width = windowWidth; + swapchainExtent.height = windowHeight; + } + recreateSwapchain(swapchainSurface, swapchainExtent); + break; + } + default: { + Helpers::panic("Error acquiring next swapchain image: %s\n", vk::to_string(acquireResult.result).c_str()); } - recreateSwapchain(surface.get(), swapchainExtent); - break; - } - default: { - Helpers::panic("Error acquiring next swapchain image: %s\n", vk::to_string(acquireResult.result).c_str()); } } } - vk::UniqueCommandBuffer& presentCommandBuffer = presentCommandBuffers.at(currentFrame); + const bool topActiveFb = externalRegs[PICA::ExternalRegs::Framebuffer0Select] & 1; + const u32 topScreenAddr = externalRegs[topActiveFb ? PICA::ExternalRegs::Framebuffer0AFirstAddr : PICA::ExternalRegs::Framebuffer0ASecondAddr]; - vk::CommandBufferBeginInfo beginInfo = {}; - beginInfo.flags = vk::CommandBufferUsageFlagBits::eSimultaneousUse; + const bool bottomActiveFb = externalRegs[PICA::ExternalRegs::Framebuffer1Select] & 1; + const u32 bottomScreenAddr = + externalRegs[bottomActiveFb ? PICA::ExternalRegs::Framebuffer1AFirstAddr : PICA::ExternalRegs::Framebuffer1ASecondAddr]; - if (const vk::Result beginResult = presentCommandBuffer->begin(beginInfo); beginResult != vk::Result::eSuccess) { - Helpers::panic("Error beginning command buffer recording: %s\n", vk::to_string(beginResult).c_str()); + //// Render Display + { + static const std::array renderScreenScopeColor = {{1.0f, 0.0f, 1.0f, 1.0f}}; + + Vulkan::DebugLabelScope debugScope(getCurrentCommandBuffer(), renderScreenScopeColor, "Render Screen"); + + vk::RenderPassBeginInfo renderPassBeginInfo = {}; + renderPassBeginInfo.renderPass = getRenderPass(vk::Format::eR8G8B8A8Unorm, {}); + + renderPassBeginInfo.framebuffer = screenTextureFramebuffers[frameBufferingIndex].get(); + renderPassBeginInfo.renderArea.offset = vk::Offset2D(); + renderPassBeginInfo.renderArea.extent = vk::Extent2D(400, 240 * 2); + + getCurrentCommandBuffer().beginRenderPass(renderPassBeginInfo, vk::SubpassContents::eInline); + + const Texture* topScreen = findRenderTexture(topScreenAddr); + const Texture* bottomScreen = findRenderTexture(bottomScreenAddr); + + if (topScreen || bottomScreen) { + getCurrentCommandBuffer().bindPipeline(vk::PipelineBindPoint::eGraphics, displayPipeline.get()); + + // Update descriptors before binding to the command buffer + if (topScreen) { + descriptorUpdateBatch->addImageSampler( + topDisplayPipelineDescriptorSet[frameBufferingIndex], 0, topScreen->imageView.get(), samplerCache->getSampler(sampler2D()) + ); + } + + if (bottomScreen) { + descriptorUpdateBatch->addImageSampler( + bottomDisplayPipelineDescriptorSet[frameBufferingIndex], 0, bottomScreen->imageView.get(), samplerCache->getSampler(sampler2D()) + ); + } + descriptorUpdateBatch->flush(); + + // Render top screen + if (topScreen) { + static const std::array scopeColor = {{1.0f, 0.0f, 0.0f, 1.0f}}; + Vulkan::DebugLabelScope debugScope(getCurrentCommandBuffer(), scopeColor, "Top Screen: %08x", topScreenAddr); + + getCurrentCommandBuffer().bindDescriptorSets( + vk::PipelineBindPoint::eGraphics, displayPipelineLayout.get(), 0, {topDisplayPipelineDescriptorSet[frameBufferingIndex]}, {} + ); + getCurrentCommandBuffer().setViewport(0, vk::Viewport(0, 0, 400, 240)); + getCurrentCommandBuffer().setScissor(0, vk::Rect2D({0, 0}, {400, 240})); + getCurrentCommandBuffer().draw(3, 1, 0, 0); + } + + // Render bottom screen + if (bottomScreen) { + static const std::array scopeColor = {{0.0f, 1.0f, 0.0f, 1.0f}}; + Vulkan::DebugLabelScope debugScope(getCurrentCommandBuffer(), scopeColor, "Bottom Screen: %08x", bottomScreenAddr); + getCurrentCommandBuffer().bindDescriptorSets( + vk::PipelineBindPoint::eGraphics, displayPipelineLayout.get(), 0, {bottomDisplayPipelineDescriptorSet[frameBufferingIndex]}, {} + ); + getCurrentCommandBuffer().bindPipeline(vk::PipelineBindPoint::eGraphics, displayPipeline.get()); + getCurrentCommandBuffer().setViewport(0, vk::Viewport(40, 240, 320, 240)); + getCurrentCommandBuffer().setScissor(0, vk::Rect2D({40, 240}, {320, 240})); + getCurrentCommandBuffer().draw(3, 1, 0, 0); + } + } + + getCurrentCommandBuffer().endRenderPass(); } - { - static const std::array presentScopeColor = {{1.0f, 0.0f, 1.0f, 1.0f}}; + //// Present + if (swapchainImageIndex != swapchainImageInvalid) { + static const std::array presentScopeColor = {{1.0f, 1.0f, 1.0f, 1.0f}}; + Vulkan::DebugLabelScope debugScope(getCurrentCommandBuffer(), presentScopeColor, "Present"); - Vulkan::DebugLabelScope debugScope(presentCommandBuffer.get(), presentScopeColor, "Present"); - - // Prepare for color-clear - presentCommandBuffer->pipelineBarrier( + // Prepare swapchain image for color-clear/blit-dst, prepare top/bottom screen for blit-src + getCurrentCommandBuffer().pipelineBarrier( vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags(), {}, {}, - {vk::ImageMemoryBarrier( - vk::AccessFlagBits::eMemoryRead, vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eUndefined, - vk::ImageLayout::eTransferDstOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, swapchainImages[swapchainImageIndex], - vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) - )} + { + // swapchainImage: Undefined -> TransferDst + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eMemoryRead, vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eUndefined, + vk::ImageLayout::eTransferDstOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, swapchainImages[swapchainImageIndex], + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ), + // screenTexture: ShaderReadOnlyOptimal -> TransferSrc + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eColorAttachmentWrite, vk::AccessFlagBits::eTransferRead, vk::ImageLayout::eShaderReadOnlyOptimal, + vk::ImageLayout::eTransferSrcOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, screenTexture[frameBufferingIndex].get(), + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ), + } ); - presentCommandBuffer->clearColorImage( - swapchainImages[swapchainImageIndex], vk::ImageLayout::eTransferDstOptimal, presentScopeColor, + // Clear swapchain image with black + static const std::array clearColor = {{0.0f, 0.0f, 0.0f, 1.0f}}; + getCurrentCommandBuffer().clearColorImage( + swapchainImages[swapchainImageIndex], vk::ImageLayout::eTransferDstOptimal, clearColor, vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) ); - // Prepare for present - presentCommandBuffer->pipelineBarrier( - vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eColorAttachmentOutput, vk::DependencyFlags(), {}, {}, - {vk::ImageMemoryBarrier( - vk::AccessFlagBits::eNone, vk::AccessFlagBits::eColorAttachmentWrite, vk::ImageLayout::eTransferDstOptimal, - vk::ImageLayout::ePresentSrcKHR, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, swapchainImages[swapchainImageIndex], - vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) - )} + // Blit screentexture into swapchain image + static const vk::ImageBlit screenBlit( + vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), {vk::Offset3D{}, vk::Offset3D{400, 240 * 2, 1}}, + vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), {vk::Offset3D{}, vk::Offset3D{400, 240 * 2, 1}} + ); + getCurrentCommandBuffer().blitImage( + screenTexture[frameBufferingIndex].get(), vk::ImageLayout::eTransferSrcOptimal, swapchainImages[swapchainImageIndex], + vk::ImageLayout::eTransferDstOptimal, {screenBlit}, vk::Filter::eNearest + ); + + // Prepare swapchain image for present + // Transfer screenTexture back into ColorAttachmentOptimal + getCurrentCommandBuffer().pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllGraphics, vk::DependencyFlags(), {}, {}, + { + // swapchainImage: TransferDst -> Preset (wait for all writes) + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eColorAttachmentWrite, vk::ImageLayout::eTransferDstOptimal, + vk::ImageLayout::ePresentSrcKHR, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, swapchainImages[swapchainImageIndex], + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ), + // screenTexture: TransferSrc -> eShaderReadOnlyOptimal (wait for all reads) + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eTransferRead, vk::AccessFlagBits::eShaderRead, vk::ImageLayout::eTransferSrcOptimal, + vk::ImageLayout::eShaderReadOnlyOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, + screenTexture[frameBufferingIndex].get(), vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ), + } ); } - if (const vk::Result endResult = presentCommandBuffer->end(); endResult != vk::Result::eSuccess) { + if (const vk::Result endResult = getCurrentCommandBuffer().end(); endResult != vk::Result::eSuccess) { Helpers::panic("Error ending command buffer recording: %s\n", vk::to_string(endResult).c_str()); } vk::SubmitInfo submitInfo = {}; // Wait for any previous uses of the image image to finish presenting - submitInfo.setWaitSemaphores(swapImageFreeSemaphore[currentFrame].get()); + std::vector waitSemaphores; + std::vector waitSemaphoreStages; + { + if (swapchainImageIndex != swapchainImageInvalid) { + waitSemaphores.emplace_back(swapImageFreeSemaphore[frameBufferingIndex].get()); + static const vk::PipelineStageFlags waitStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput; + waitSemaphoreStages.emplace_back(waitStageMask); + } + + submitInfo.setWaitSemaphores(waitSemaphores); + submitInfo.setWaitDstStageMask(waitSemaphoreStages); + } // Signal when finished - submitInfo.setSignalSemaphores(renderFinishedSemaphore[currentFrame].get()); + submitInfo.setSignalSemaphores(renderFinishedSemaphore[frameBufferingIndex].get()); - static const vk::PipelineStageFlags waitStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput; - submitInfo.setWaitDstStageMask(waitStageMask); + submitInfo.setCommandBuffers(getCurrentCommandBuffer()); - submitInfo.setCommandBuffers(presentCommandBuffer.get()); + device->resetFences({frameFinishedFences[frameBufferingIndex].get()}); - device->resetFences({frameFinishedFences[currentFrame].get()}); - - if (const vk::Result submitResult = graphicsQueue.submit({submitInfo}, frameFinishedFences[currentFrame].get()); + if (const vk::Result submitResult = graphicsQueue.submit({submitInfo}, frameFinishedFences[frameBufferingIndex].get()); submitResult != vk::Result::eSuccess) { Helpers::panic("Error submitting to graphics queue: %s\n", vk::to_string(submitResult).c_str()); } - vk::PresentInfoKHR presentInfo = {}; - presentInfo.setWaitSemaphores(renderFinishedSemaphore[currentFrame].get()); - presentInfo.setSwapchains(swapchain.get()); - presentInfo.setImageIndices(swapchainImageIndex); + if (swapchainImageIndex != swapchainImageInvalid) { + vk::PresentInfoKHR presentInfo = {}; + presentInfo.setWaitSemaphores(renderFinishedSemaphore[frameBufferingIndex].get()); + presentInfo.setSwapchains(swapchain.get()); + presentInfo.setImageIndices(swapchainImageIndex); - if (const auto presentResult = presentQueue.presentKHR(presentInfo); presentResult == vk::Result::eSuccess) { - } else { - switch (presentResult) { - case vk::Result::eSuboptimalKHR: - case vk::Result::eErrorOutOfDateKHR: { - // Surface resized - vk::Extent2D swapchainExtent; - { - int windowWidth, windowHeight; - SDL_Vulkan_GetDrawableSize(targetWindow, &windowWidth, &windowHeight); - swapchainExtent.width = windowWidth; - swapchainExtent.height = windowHeight; + if (const auto presentResult = presentQueue.presentKHR(presentInfo); presentResult == vk::Result::eSuccess) { + } else { + switch (presentResult) { + case vk::Result::eSuboptimalKHR: + case vk::Result::eErrorOutOfDateKHR: { + // Surface resized + vk::Extent2D swapchainExtent; + { + int windowWidth, windowHeight; + SDL_Vulkan_GetDrawableSize(targetWindow, &windowWidth, &windowHeight); + swapchainExtent.width = windowWidth; + swapchainExtent.height = windowHeight; + } + recreateSwapchain(swapchainSurface, swapchainExtent); + break; + } + default: { + Helpers::panic("Error presenting swapchain image: %s\n", vk::to_string(presentResult).c_str()); } - recreateSwapchain(surface.get(), swapchainExtent); - break; - } - default: { - Helpers::panic("Error presenting swapchain image: %s\n", vk::to_string(presentResult).c_str()); } } } - currentFrame = ((currentFrame + 1) % swapchainImageCount); + // We are now working on the next frame + frameBufferingIndex = ((frameBufferingIndex + 1) % frameBufferingCount); + + // Wait for next frame to be ready + + // Block, on the CPU, to ensure that this buffered-frame is ready for more work + if (auto waitResult = device->waitForFences({frameFinishedFences[frameBufferingIndex].get()}, true, std::numeric_limits::max()); + waitResult != vk::Result::eSuccess) { + Helpers::panic("Error waiting on swapchain fence: %s\n", vk::to_string(waitResult).c_str()); + } + + { + frameFramebuffers[frameBufferingIndex].clear(); + + getCurrentCommandBuffer().reset(); + + vk::CommandBufferBeginInfo beginInfo = {}; + beginInfo.flags = vk::CommandBufferUsageFlagBits::eSimultaneousUse; + + if (const vk::Result beginResult = getCurrentCommandBuffer().begin(beginInfo); beginResult != vk::Result::eSuccess) { + Helpers::panic("Error beginning command buffer recording: %s\n", vk::to_string(beginResult).c_str()); + } + } } void RendererVK::initGraphicsContext(SDL_Window* window) { @@ -357,19 +905,37 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { instanceInfo.pApplicationInfo = &applicationInfo; - std::vector instanceExtensions = { + std::unordered_set instanceExtensionsAvailable = {}; + if (const auto enumerateResult = vk::enumerateInstanceExtensionProperties(); enumerateResult.result == vk::Result::eSuccess) { + for (const auto& curExtension : enumerateResult.value) { + instanceExtensionsAvailable.emplace(curExtension.extensionName.data()); + } + } + + std::vector instanceExtensions = {}; + + if (instanceExtensionsAvailable.contains(VK_KHR_SURFACE_EXTENSION_NAME)) { + instanceExtensions.emplace_back(VK_KHR_SURFACE_EXTENSION_NAME); + } + + bool debugUtils = false; + if (instanceExtensionsAvailable.contains(VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) { + instanceExtensions.emplace_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); + debugUtils = true; + } + #if defined(__APPLE__) - VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME, + if (instanceExtensionsAvailable.contains(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME)) { + instanceExtensions.emplace_back(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME); + } #endif - VK_EXT_DEBUG_UTILS_EXTENSION_NAME, - }; // Get any additional extensions that SDL wants as well - { + if (targetWindow) { unsigned int extensionCount = 0; - SDL_Vulkan_GetInstanceExtensions(window, &extensionCount, nullptr); + SDL_Vulkan_GetInstanceExtensions(targetWindow, &extensionCount, nullptr); std::vector sdlInstanceExtensions(extensionCount); - SDL_Vulkan_GetInstanceExtensions(window, &extensionCount, sdlInstanceExtensions.data()); + SDL_Vulkan_GetInstanceExtensions(targetWindow, &extensionCount, sdlInstanceExtensions.data()); instanceExtensions.insert(instanceExtensions.end(), sdlInstanceExtensions.begin(), sdlInstanceExtensions.end()); } @@ -390,13 +956,7 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { VULKAN_HPP_DEFAULT_DISPATCHER.init(instance.get()); // Enable debug messenger if the instance was able to be created with debug_utils - if (std::find( - instanceExtensions.begin(), instanceExtensions.end(), - // std::string_view has a way to compare itself to `const char*` - // so by casting it, we get the actual string comparisons - // and not pointer-comparisons - std::string_view(VK_EXT_DEBUG_UTILS_EXTENSION_NAME) - ) != instanceExtensions.end()) { + if (debugUtils) { vk::DebugUtilsMessengerCreateInfoEXT debugCreateInfo{}; debugCreateInfo.messageSeverity = vk::DebugUtilsMessageSeverityFlagBitsEXT::eVerbose | vk::DebugUtilsMessageSeverityFlagBitsEXT::eInfo | vk::DebugUtilsMessageSeverityFlagBitsEXT::eError | vk::DebugUtilsMessageSeverityFlagBitsEXT::eWarning; @@ -411,10 +971,12 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { } // Create surface - if (VkSurfaceKHR newSurface; SDL_Vulkan_CreateSurface(window, instance.get(), &newSurface)) { - surface.reset(newSurface); - } else { - Helpers::warn("Error creating Vulkan surface"); + if (window) { + if (VkSurfaceKHR newSurface; SDL_Vulkan_CreateSurface(window, instance.get(), &newSurface)) { + swapchainSurface = newSurface; + } else { + Helpers::warn("Error creating Vulkan surface"); + } } // Pick physical device @@ -423,18 +985,20 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { std::vector::iterator partitionEnd = physicalDevices.end(); // Prefer GPUs that can access the surface - const auto surfaceSupport = [this](const vk::PhysicalDevice& physicalDevice) -> bool { - const usize queueCount = physicalDevice.getQueueFamilyProperties().size(); - for (usize queueIndex = 0; queueIndex < queueCount; ++queueIndex) { - if (auto supportResult = physicalDevice.getSurfaceSupportKHR(queueIndex, surface.get()); - supportResult.result == vk::Result::eSuccess) { - return supportResult.value; + if (swapchainSurface) { + const auto surfaceSupport = [this](const vk::PhysicalDevice& physicalDevice) -> bool { + const usize queueCount = physicalDevice.getQueueFamilyProperties().size(); + for (usize queueIndex = 0; queueIndex < queueCount; ++queueIndex) { + if (auto supportResult = physicalDevice.getSurfaceSupportKHR(queueIndex, swapchainSurface); + supportResult.result == vk::Result::eSuccess) { + return supportResult.value; + } } - } - return false; - }; + return false; + }; - partitionEnd = std::stable_partition(physicalDevices.begin(), partitionEnd, surfaceSupport); + partitionEnd = std::stable_partition(physicalDevices.begin(), partitionEnd, surfaceSupport); + } // Prefer Discrete GPUs const auto isDiscrete = [](const vk::PhysicalDevice& physicalDevice) -> bool { @@ -454,26 +1018,32 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { std::vector deviceQueueInfos; { const std::vector queueFamilyProperties = physicalDevice.getQueueFamilyProperties(); - + std::unordered_set queueFamilyRequests; // Get present queue family - for (usize queueFamilyIndex = 0; queueFamilyIndex < queueFamilyProperties.size(); ++queueFamilyIndex) { - if (auto supportResult = physicalDevice.getSurfaceSupportKHR(queueFamilyIndex, surface.get()); - supportResult.result == vk::Result::eSuccess) { - if (supportResult.value) { - presentQueueFamily = queueFamilyIndex; - break; + if (swapchainSurface) { + for (usize queueFamilyIndex = 0; queueFamilyIndex < queueFamilyProperties.size(); ++queueFamilyIndex) { + if (auto supportResult = physicalDevice.getSurfaceSupportKHR(queueFamilyIndex, swapchainSurface); + supportResult.result == vk::Result::eSuccess) { + if (supportResult.value) { + presentQueueFamily = queueFamilyIndex; + break; + } } } + queueFamilyRequests.emplace(presentQueueFamily); } static const float queuePriority = 1.0f; graphicsQueueFamily = findQueueFamily(queueFamilyProperties, vk::QueueFlagBits::eGraphics); + queueFamilyRequests.emplace(graphicsQueueFamily); computeQueueFamily = findQueueFamily(queueFamilyProperties, vk::QueueFlagBits::eCompute); + queueFamilyRequests.emplace(computeQueueFamily); transferQueueFamily = findQueueFamily(queueFamilyProperties, vk::QueueFlagBits::eTransfer); + queueFamilyRequests.emplace(transferQueueFamily); // Requests a singular queue for each unique queue-family - const std::unordered_set queueFamilyRequests = {presentQueueFamily, graphicsQueueFamily, computeQueueFamily, transferQueueFamily}; + for (const u32 queueFamilyIndex : queueFamilyRequests) { deviceQueueInfos.emplace_back(vk::DeviceQueueCreateInfo({}, queueFamilyIndex, 1, &queuePriority)); } @@ -482,15 +1052,31 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { // Create Device vk::DeviceCreateInfo deviceInfo = {}; - static const char* deviceExtensions[] = { - VK_KHR_SWAPCHAIN_EXTENSION_NAME, + // Device extensions + std::vector deviceExtensions = { #if defined(__APPLE__) "VK_KHR_portability_subset", #endif // VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME }; - deviceInfo.ppEnabledExtensionNames = deviceExtensions; - deviceInfo.enabledExtensionCount = std::size(deviceExtensions); + + std::unordered_set physicalDeviceExtensions; + if (const auto enumerateResult = physicalDevice.enumerateDeviceExtensionProperties(); enumerateResult.result == vk::Result::eSuccess) { + for (const auto& extension : enumerateResult.value) { + physicalDeviceExtensions.insert(extension.extensionName); + } + } else { + Helpers::panic("Error enumerating physical devices extensions: %s\n", vk::to_string(enumerateResult.result).c_str()); + } + + // Opertional extensions + + // Optionally enable the swapchain, to support "headless" rendering + if (physicalDeviceExtensions.contains(VK_KHR_SWAPCHAIN_EXTENSION_NAME)) { + deviceExtensions.emplace_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); + } + + deviceInfo.setPEnabledExtensionNames(deviceExtensions); vk::StructureChain deviceFeatureChain = {}; @@ -512,8 +1098,10 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { // Initialize device-specific function pointers VULKAN_HPP_DEFAULT_DISPATCHER.init(device.get()); - presentQueue = device->getQueue(presentQueueFamily, 0); - graphicsQueue = device->getQueue(presentQueueFamily, 0); + if (presentQueueFamily != VK_QUEUE_FAMILY_IGNORED) { + presentQueue = device->getQueue(presentQueueFamily, 0); + } + graphicsQueue = device->getQueue(graphicsQueueFamily, 0); computeQueue = device->getQueue(computeQueueFamily, 0); transferQueue = device->getQueue(transferQueueFamily, 0); @@ -528,22 +1116,468 @@ void RendererVK::initGraphicsContext(SDL_Window* window) { } // Create swapchain - vk::Extent2D swapchainExtent; - { - int windowWidth, windowHeight; - SDL_Vulkan_GetDrawableSize(window, &windowWidth, &windowHeight); - swapchainExtent.width = windowWidth; - swapchainExtent.height = windowHeight; + if (targetWindow && swapchainSurface) { + vk::Extent2D swapchainExtent; + { + int windowWidth, windowHeight; + SDL_Vulkan_GetDrawableSize(window, &windowWidth, &windowHeight); + swapchainExtent.width = windowWidth; + swapchainExtent.height = windowHeight; + } + recreateSwapchain(swapchainSurface, swapchainExtent); } - recreateSwapchain(surface.get(), swapchainExtent); + + // Create frame-buffering data + // Frame-buffering Command buffer(s) + vk::CommandBufferAllocateInfo commandBuffersInfo = {}; + commandBuffersInfo.commandPool = commandPool.get(); + commandBuffersInfo.level = vk::CommandBufferLevel::ePrimary; + commandBuffersInfo.commandBufferCount = frameBufferingCount; + + if (auto allocateResult = device->allocateCommandBuffersUnique(commandBuffersInfo); allocateResult.result == vk::Result::eSuccess) { + frameCommandBuffers = std::move(allocateResult.value); + } else { + Helpers::panic("Error allocating command buffer: %s\n", vk::to_string(allocateResult.result).c_str()); + } + + // Initialize the first command buffer to be in the RECORDING state + vk::CommandBufferBeginInfo beginInfo = {}; + beginInfo.flags = vk::CommandBufferUsageFlagBits::eSimultaneousUse; + + if (const vk::Result beginResult = frameCommandBuffers[frameBufferingIndex]->begin(beginInfo); beginResult != vk::Result::eSuccess) { + Helpers::panic("Error beginning command buffer recording: %s\n", vk::to_string(beginResult).c_str()); + } + + // Frame-buffering synchronization primitives + vk::FenceCreateInfo fenceInfo = {}; + fenceInfo.flags = vk::FenceCreateFlagBits::eSignaled; + + vk::SemaphoreCreateInfo semaphoreInfo = {}; + + swapImageFreeSemaphore.resize(frameBufferingCount); + renderFinishedSemaphore.resize(frameBufferingCount); + frameFinishedFences.resize(frameBufferingCount); + frameFramebuffers.resize(frameBufferingCount); + frameCommandBuffers.resize(frameBufferingCount); + + vk::ImageCreateInfo screenTextureInfo = {}; + screenTextureInfo.setImageType(vk::ImageType::e2D); + screenTextureInfo.setFormat(vk::Format::eR8G8B8A8Unorm); + screenTextureInfo.setExtent(vk::Extent3D(400, 240 * 2, 1)); + screenTextureInfo.setMipLevels(1); + screenTextureInfo.setArrayLayers(1); + screenTextureInfo.setSamples(vk::SampleCountFlagBits::e1); + screenTextureInfo.setTiling(vk::ImageTiling::eOptimal); + screenTextureInfo.setUsage( + vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eInputAttachment | vk::ImageUsageFlagBits::eTransferSrc | + vk::ImageUsageFlagBits::eTransferDst + ); + screenTextureInfo.setSharingMode(vk::SharingMode::eExclusive); + screenTextureInfo.setInitialLayout(vk::ImageLayout::eUndefined); + + screenTexture.resize(frameBufferingCount); + screenTextureViews.resize(frameBufferingCount); + screenTextureFramebuffers.resize(frameBufferingCount); + + for (usize i = 0; i < frameBufferingCount; ++i) { + if (auto createResult = device->createSemaphoreUnique(semaphoreInfo); createResult.result == vk::Result::eSuccess) { + swapImageFreeSemaphore[i] = std::move(createResult.value); + + Vulkan::setObjectName(device.get(), swapImageFreeSemaphore[i].get(), "swapImageFreeSemaphore#%zu", i); + } else { + Helpers::panic("Error creating 'present-ready' semaphore: %s\n", vk::to_string(createResult.result).c_str()); + } + + if (auto createResult = device->createSemaphoreUnique(semaphoreInfo); createResult.result == vk::Result::eSuccess) { + renderFinishedSemaphore[i] = std::move(createResult.value); + + Vulkan::setObjectName(device.get(), renderFinishedSemaphore[i].get(), "renderFinishedSemaphore#%zu", i); + } else { + Helpers::panic("Error creating 'post-render' semaphore: %s\n", vk::to_string(createResult.result).c_str()); + } + + if (auto createResult = device->createFenceUnique(fenceInfo); createResult.result == vk::Result::eSuccess) { + frameFinishedFences[i] = std::move(createResult.value); + } else { + Helpers::panic("Error creating 'frame-finished' fence: %s\n", vk::to_string(createResult.result).c_str()); + } + + if (auto createResult = device->createImageUnique(screenTextureInfo); createResult.result == vk::Result::eSuccess) { + screenTexture[i] = std::move(createResult.value); + + Vulkan::setObjectName(device.get(), screenTexture[i].get(), "screenTexture#%zu", i); + } else { + Helpers::panic("Error creating top-screen image: %s\n", vk::to_string(createResult.result).c_str()); + } + } + + // Commit memory to all of our images + { + const auto getImage = [](const vk::UniqueImage& image) -> vk::Image { return image.get(); }; + std::vector images; + std::transform(screenTexture.begin(), screenTexture.end(), std::back_inserter(images), getImage); + + if (auto [result, imageMemory] = Vulkan::commitImageHeap(device.get(), physicalDevice, images); result == vk::Result::eSuccess) { + framebufferMemory = std::move(imageMemory); + } else { + Helpers::panic("Error allocating framebuffer memory: %s\n", vk::to_string(result).c_str()); + } + } + + // Memory is bounded, create views, framebuffer, and layout transitions for screentexture + vk::ImageViewCreateInfo screenTextureViewCreateInfo = {}; + screenTextureViewCreateInfo.viewType = vk::ImageViewType::e2D; + screenTextureViewCreateInfo.format = vk::Format::eR8G8B8A8Unorm; + screenTextureViewCreateInfo.components.r = vk::ComponentSwizzle::eR; + screenTextureViewCreateInfo.components.g = vk::ComponentSwizzle::eG; + screenTextureViewCreateInfo.components.b = vk::ComponentSwizzle::eB; + screenTextureViewCreateInfo.components.a = vk::ComponentSwizzle::eA; + screenTextureViewCreateInfo.subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + + for (usize i = 0; i < frameBufferingCount; ++i) { + screenTextureViewCreateInfo.image = screenTexture[i].get(); + + if (auto createResult = device->createImageViewUnique(screenTextureViewCreateInfo); createResult.result == vk::Result::eSuccess) { + screenTextureViews[i] = std::move(createResult.value); + } else { + Helpers::panic("Error creating screen texture view: %s\n", vk::to_string(createResult.result).c_str()); + } + + // Initial layout transition + getCurrentCommandBuffer().pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, vk::DependencyFlags{}, {}, {}, + {vk::ImageMemoryBarrier( + vk::AccessFlagBits::eMemoryWrite, vk::AccessFlagBits::eShaderRead, vk::ImageLayout::eUndefined, + vk::ImageLayout::eShaderReadOnlyOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, screenTexture[i].get(), + screenTextureViewCreateInfo.subresourceRange + )} + ); + + vk::FramebufferCreateInfo framebufferInfo = {}; + framebufferInfo.setRenderPass(getRenderPass(vk::Format::eR8G8B8A8Unorm, {})); + framebufferInfo.setAttachments(screenTextureViews[i].get()); + framebufferInfo.setWidth(400); + framebufferInfo.setHeight(240 * 2); + framebufferInfo.setLayers(1); + if (auto createResult = device->createFramebufferUnique(framebufferInfo); createResult.result == vk::Result::eSuccess) { + screenTextureFramebuffers[i] = std::move(createResult.value); + } else { + Helpers::panic("Error creating screen-texture framebuffer: %s\n", vk::to_string(createResult.result).c_str()); + } + } + + static vk::DescriptorSetLayoutBinding displayShaderLayout[] = { + {// Just a singular texture slot + 0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, + }; + + if (auto createResult = Vulkan::DescriptorUpdateBatch::create(device.get()); createResult.has_value()) { + descriptorUpdateBatch = std::make_unique(std::move(createResult.value())); + } else { + Helpers::panic("Error creating descriptor update batch\n"); + } + + if (auto createResult = Vulkan::SamplerCache::create(device.get()); createResult.has_value()) { + samplerCache = std::make_unique(std::move(createResult.value())); + } else { + Helpers::panic("Error creating sampler cache\n"); + } + + if (auto createResult = Vulkan::DescriptorHeap::create(device.get(), displayShaderLayout); createResult.has_value()) { + displayDescriptorHeap = std::make_unique(std::move(createResult.value())); + } else { + Helpers::panic("Error creating descriptor heap\n"); + } + + for (usize i = 0; i < frameBufferingCount; ++i) { + if (auto allocateResult = displayDescriptorHeap->allocateDescriptorSet(); allocateResult.has_value()) { + topDisplayPipelineDescriptorSet.emplace_back(allocateResult.value()); + } else { + Helpers::panic("Error creating descriptor set\n"); + } + if (auto allocateResult = displayDescriptorHeap->allocateDescriptorSet(); allocateResult.has_value()) { + bottomDisplayPipelineDescriptorSet.emplace_back(allocateResult.value()); + } else { + Helpers::panic("Error creating descriptor set\n"); + } + } + + auto vk_resources = cmrc::RendererVK::get_filesystem(); + auto displayVertexShader = vk_resources.open("vulkan_display.vert.spv"); + auto displayFragmentShader = vk_resources.open("vulkan_display.frag.spv"); + + vk::UniqueShaderModule displayVertexShaderModule = createShaderModule(device.get(), displayVertexShader); + vk::UniqueShaderModule displayFragmentShaderModule = createShaderModule(device.get(), displayFragmentShader); + + vk::RenderPass screenTextureRenderPass = getRenderPass(screenTextureInfo.format, {}); + + std::tie(displayPipeline, displayPipelineLayout) = createGraphicsPipeline( + device.get(), {}, {{displayDescriptorHeap.get()->getDescriptorSetLayout()}}, displayVertexShaderModule.get(), + displayFragmentShaderModule.get(), {}, {}, screenTextureRenderPass + ); } -void RendererVK::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) {} +void RendererVK::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) { + const Texture* renderTexture = findRenderTexture(startAddress); -void RendererVK::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) {} + if (!renderTexture) { + // not found + return; + } + + if (*vk::componentName(renderTexture->format, 0) != 'D') { + // Color-Clear + vk::ClearColorValue clearColor = {}; + + clearColor.float32[0] = Helpers::getBits<24, 8>(value) / 255.0f; // r + clearColor.float32[1] = Helpers::getBits<16, 8>(value) / 255.0f; // g + clearColor.float32[2] = Helpers::getBits<8, 8>(value) / 255.0f; // b + clearColor.float32[3] = Helpers::getBits<0, 8>(value) / 255.0f; // a + + Vulkan::DebugLabelScope scope( + getCurrentCommandBuffer(), clearColor.float32, "ClearBuffer start:%08X end:%08X value:%08X control:%08X\n", startAddress, endAddress, + value, control + ); + + getCurrentCommandBuffer().pipelineBarrier( + vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags(), {}, {}, + { + // renderTexture: ShaderReadOnlyOptimal -> TransferDst + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eShaderRead, vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eShaderReadOnlyOptimal, + vk::ImageLayout::eTransferDstOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, renderTexture->image.get(), + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ), + } + ); + + // Clear RenderTarget + getCurrentCommandBuffer().clearColorImage( + renderTexture->image.get(), vk::ImageLayout::eTransferDstOptimal, clearColor, + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ); + + getCurrentCommandBuffer().pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllGraphics, vk::DependencyFlags(), {}, {}, + { + // renderTexture: TransferDst -> eShaderReadOnlyOptimal + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eShaderRead, vk::ImageLayout::eTransferDstOptimal, + vk::ImageLayout::eShaderReadOnlyOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, renderTexture->image.get(), + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ), + } + ); + } else { + // Depth-Clear + vk::ClearDepthStencilValue clearDepthStencil = {}; + + if (vk::componentBits(renderTexture->format, 0) == 16) { + clearDepthStencil.depth = (value & 0xffff) / 65535.0f; + } else { + clearDepthStencil.depth = (value & 0xffffff) / 16777215.0f; + } + + clearDepthStencil.stencil = (value >> 24); // Stencil + + const std::array scopeColor = {{clearDepthStencil.depth, clearDepthStencil.depth, clearDepthStencil.depth, 1.0f}}; + Vulkan::DebugLabelScope scope( + getCurrentCommandBuffer(), scopeColor, "ClearBuffer start:%08X end:%08X value:%08X control:%08X\n", startAddress, endAddress, value, + control + ); + + getCurrentCommandBuffer().pipelineBarrier( + vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags(), {}, {}, + { + // renderTexture: ShaderReadOnlyOptimal -> TransferDst + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eShaderRead, vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eShaderReadOnlyOptimal, + vk::ImageLayout::eTransferDstOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, renderTexture->image.get(), + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil, 0, 1, 0, 1) + ), + } + ); + + static vk::ImageSubresourceRange depthStencilRanges[2] = { + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eDepth, 0, 1, 0, 1), + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eStencil, 0, 1, 0, 1)}; + + // Clear RenderTarget + getCurrentCommandBuffer().clearDepthStencilImage( + renderTexture->image.get(), vk::ImageLayout::eTransferDstOptimal, &clearDepthStencil, vk::componentCount(renderTexture->format), + depthStencilRanges + ); + + getCurrentCommandBuffer().pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllGraphics, vk::DependencyFlags(), {}, {}, + { + // renderTexture: TransferDst -> eShaderReadOnlyOptimal + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eShaderRead, vk::ImageLayout::eTransferDstOptimal, + vk::ImageLayout::eShaderReadOnlyOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, renderTexture->image.get(), + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil, 0, 1, 0, 1) + ), + } + ); + } +} + +// NOTE: The GPU format has RGB5551 and RGB655 swapped compared to internal regs format +static PICA::ColorFmt ToColorFmt(u32 format) { + switch (format) { + case 2: return PICA::ColorFmt::RGB565; + case 3: return PICA::ColorFmt::RGBA5551; + default: return static_cast(format); + } +} + +void RendererVK::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) { + const u32 inputWidth = inputSize & 0xffff; + const u32 inputHeight = inputSize >> 16; + const PICA::ColorFmt inputFormat = ToColorFmt(Helpers::getBits<8, 3>(flags)); + const PICA::ColorFmt outputFormat = ToColorFmt(Helpers::getBits<12, 3>(flags)); + const bool verticalFlip = flags & 1; + const PICA::Scaling scaling = static_cast(Helpers::getBits<24, 2>(flags)); + + u32 outputWidth = outputSize & 0xffff; + u32 outputHeight = outputSize >> 16; + + Texture& srcFramebuffer = getColorRenderTexture(inputAddr, inputFormat, inputWidth, inputHeight); + Math::Rect srcRect = srcFramebuffer.getSubRect(inputAddr, outputWidth, outputHeight); + + if (verticalFlip) { + std::swap(srcRect.bottom, srcRect.top); + } + + // Apply scaling for the destination rectangle. + if (scaling == PICA::Scaling::X || scaling == PICA::Scaling::XY) { + outputWidth >>= 1; + } + + if (scaling == PICA::Scaling::XY) { + outputHeight >>= 1; + } + + Texture& destFramebuffer = getColorRenderTexture(outputAddr, outputFormat, outputWidth, outputHeight); + Math::Rect destRect = destFramebuffer.getSubRect(outputAddr, outputWidth, outputHeight); + + if (inputWidth != outputWidth) { + // Helpers::warn("Strided display transfer is not handled correctly!\n"); + } + + const vk::ImageBlit blitRegion( + vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), + {vk::Offset3D{(int)srcRect.left, (int)srcRect.top, 0}, vk::Offset3D{(int)srcRect.right, (int)srcRect.bottom, 1}}, + vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), + {vk::Offset3D{(int)destRect.left, (int)destRect.top, 0}, vk::Offset3D{(int)destRect.right, (int)destRect.bottom, 1}} + ); + + const vk::CommandBuffer& blitCommandBuffer = getCurrentCommandBuffer(); + + static const std::array displayTransferColor = {{1.0f, 1.0f, 0.0f, 1.0f}}; + Vulkan::DebugLabelScope scope( + blitCommandBuffer, displayTransferColor, + "DisplayTransfer inputAddr 0x%08X outputAddr 0x%08X inputWidth %d outputWidth %d inputHeight %d outputHeight %d", inputAddr, outputAddr, + inputWidth, outputWidth, inputHeight, outputHeight + ); + + blitCommandBuffer.pipelineBarrier( + vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags(), {}, {}, + { + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eColorAttachmentWrite, vk::AccessFlagBits::eTransferRead, vk::ImageLayout::eShaderReadOnlyOptimal, + vk::ImageLayout::eTransferSrcOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, srcFramebuffer.image.get(), + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ), + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eColorAttachmentWrite, vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eShaderReadOnlyOptimal, + vk::ImageLayout::eTransferDstOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, destFramebuffer.image.get(), + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ), + } + ); + + blitCommandBuffer.blitImage( + srcFramebuffer.image.get(), vk::ImageLayout::eTransferSrcOptimal, destFramebuffer.image.get(), vk::ImageLayout::eTransferDstOptimal, + {blitRegion}, vk::Filter::eLinear + ); + + blitCommandBuffer.pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllGraphics, vk::DependencyFlags(), {}, {}, + { + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eTransferRead, vk::AccessFlagBits::eColorAttachmentWrite, vk::ImageLayout::eTransferSrcOptimal, + vk::ImageLayout::eShaderReadOnlyOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, srcFramebuffer.image.get(), + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ), + vk::ImageMemoryBarrier( + vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eColorAttachmentWrite, vk::ImageLayout::eTransferDstOptimal, + vk::ImageLayout::eShaderReadOnlyOptimal, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, destFramebuffer.image.get(), + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) + ), + } + ); +} void RendererVK::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) {} -void RendererVK::drawVertices(PICA::PrimType primType, std::span vertices) {} +void RendererVK::drawVertices(PICA::PrimType primType, std::span vertices) { + using namespace Helpers; + + const u32 depthControl = regs[PICA::InternalRegs::DepthAndColorMask]; + const bool depthTestEnable = depthControl & 1; + const bool depthWriteEnable = getBit<12>(depthControl); + const int depthFunc = getBits<4, 3>(depthControl); + const vk::ColorComponentFlags colorMask = vk::ColorComponentFlags(getBits<8, 4>(depthControl)); + + const vk::RenderPass curRenderPass = getRenderPass(colourBufferFormat, depthTestEnable ? std::make_optional(depthBufferFormat) : std::nullopt); + + // Create framebuffer, find a way to cache this! + vk::Framebuffer curFramebuffer = {}; + { + std::vector renderTargets; + + const auto& colorTexture = getColorRenderTexture(colourBufferLoc, colourBufferFormat, fbSize[0], fbSize[1]); + renderTargets.emplace_back(colorTexture.imageView.get()); + + if (depthTestEnable) { + const auto& depthTexture = getDepthRenderTexture(depthBufferLoc, depthBufferFormat, fbSize[0], fbSize[1]); + renderTargets.emplace_back(depthTexture.imageView.get()); + } + + vk::FramebufferCreateInfo framebufferInfo = {}; + framebufferInfo.setRenderPass(curRenderPass); + framebufferInfo.setAttachments(renderTargets); + framebufferInfo.setWidth(fbSize[0]); + framebufferInfo.setHeight(fbSize[1]); + framebufferInfo.setLayers(1); + if (auto createResult = device->createFramebufferUnique(framebufferInfo); createResult.result == vk::Result::eSuccess) { + curFramebuffer = (frameFramebuffers[frameBufferingIndex].emplace_back(std::move(createResult.value))).get(); + } else { + Helpers::panic("Error creating render-texture framebuffer: %s\n", vk::to_string(createResult.result).c_str()); + } + } + + vk::RenderPassBeginInfo renderBeginInfo = {}; + renderBeginInfo.renderPass = curRenderPass; + static const vk::ClearValue ClearColors[] = { + vk::ClearColorValue(std::array{0.0f, 0.0f, 0.0f, 0.0f}), + vk::ClearDepthStencilValue(1.0f, 0), + vk::ClearColorValue(std::array{0.0f, 0.0f, 0.0f, 0.0f}), + }; + renderBeginInfo.pClearValues = ClearColors; + renderBeginInfo.clearValueCount = std::size(ClearColors); + renderBeginInfo.renderArea.extent.width = fbSize[0]; + renderBeginInfo.renderArea.extent.height = fbSize[1]; + renderBeginInfo.framebuffer = curFramebuffer; + + const vk::CommandBuffer& commandBuffer = getCurrentCommandBuffer(); + + // Todo: Rather than starting a new renderpass for each draw, do some state-tracking to re-use render-passes + commandBuffer.beginRenderPass(renderBeginInfo, vk::SubpassContents::eInline); + static const std::array labelColor = {{1.0f, 0.0f, 0.0f, 1.0f}}; + Vulkan::insertDebugLabel(commandBuffer, labelColor, "DrawVertices: %u vertices", vertices.size()); + commandBuffer.endRenderPass(); +} void RendererVK::screenshot(const std::string& name) {} diff --git a/src/core/renderer_vk/vk_api.cpp b/src/core/renderer_vk/vk_api.cpp new file mode 100644 index 00000000..4f879dc2 --- /dev/null +++ b/src/core/renderer_vk/vk_api.cpp @@ -0,0 +1,3 @@ +#include "renderer_vk/vk_api.hpp" + +VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE; \ No newline at end of file diff --git a/src/core/renderer_vk/vk_descriptor_heap.cpp b/src/core/renderer_vk/vk_descriptor_heap.cpp new file mode 100644 index 00000000..ecf71d92 --- /dev/null +++ b/src/core/renderer_vk/vk_descriptor_heap.cpp @@ -0,0 +1,119 @@ +#include "renderer_vk/vk_descriptor_heap.hpp" + +#include +#include +#include + +namespace Vulkan { + + DescriptorHeap::DescriptorHeap(vk::Device device) : device(device) {} + + std::optional DescriptorHeap::allocateDescriptorSet() { + // Find a free slot + const auto freeSlot = std::find(allocationMap.begin(), allocationMap.end(), false); + + // If there is no free slot, return + if (freeSlot == allocationMap.end()) { + return std::nullopt; + } + + // Mark the slot as allocated + *freeSlot = true; + + const u16 index = static_cast(std::distance(allocationMap.begin(), freeSlot)); + + vk::UniqueDescriptorSet& newDescriptorSet = descriptorSets[index]; + + if (!newDescriptorSet) { + // Descriptor set doesn't exist yet. Allocate a new one + vk::DescriptorSetAllocateInfo allocateInfo = {}; + + allocateInfo.descriptorPool = descriptorPool.get(); + allocateInfo.pSetLayouts = &descriptorSetLayout.get(); + allocateInfo.descriptorSetCount = 1; + + if (auto AllocateResult = device.allocateDescriptorSetsUnique(allocateInfo); AllocateResult.result == vk::Result::eSuccess) { + newDescriptorSet = std::move(AllocateResult.value[0]); + } else { + // Error allocating descriptor set + return std::nullopt; + } + } + + return newDescriptorSet.get(); + } + + bool DescriptorHeap::freeDescriptorSet(vk::DescriptorSet Set) { + // Find the descriptor set + const auto found = + std::find_if(descriptorSets.begin(), descriptorSets.end(), [&Set](const auto& CurSet) -> bool { return CurSet.get() == Set; }); + + // If the descriptor set is not found, return + if (found == descriptorSets.end()) { + return false; + } + + // Mark the slot as free + const u16 index = static_cast(std::distance(descriptorSets.begin(), found)); + + allocationMap[index] = false; + + return true; + } + + std::optional DescriptorHeap::create( + vk::Device device, std::span bindings, u16 descriptorHeapCount + ) { + DescriptorHeap newDescriptorHeap(device); + + // Create a histogram of each of the descriptor types and how many of each + // the pool should have + // Todo: maybe keep this around as a hash table to do more dynamic + // allocations of descriptor sets rather than allocating them all up-front + std::vector poolSizes; + { + std::unordered_map descriptorTypeCounts; + + for (const auto& binding : bindings) { + descriptorTypeCounts[binding.descriptorType] += binding.descriptorCount; + } + for (const auto& descriptorTypeCount : descriptorTypeCounts) { + poolSizes.push_back(vk::DescriptorPoolSize(descriptorTypeCount.first, descriptorTypeCount.second * descriptorHeapCount)); + } + } + + // Create descriptor pool + { + vk::DescriptorPoolCreateInfo poolInfo; + poolInfo.flags = vk::DescriptorPoolCreateFlagBits::eFreeDescriptorSet; + poolInfo.maxSets = descriptorHeapCount; + poolInfo.pPoolSizes = poolSizes.data(); + poolInfo.poolSizeCount = poolSizes.size(); + if (auto createResult = device.createDescriptorPoolUnique(poolInfo); createResult.result == vk::Result::eSuccess) { + newDescriptorHeap.descriptorPool = std::move(createResult.value); + } else { + return std::nullopt; + } + } + + // Create descriptor set layout + { + vk::DescriptorSetLayoutCreateInfo layoutInfo; + layoutInfo.pBindings = bindings.data(); + layoutInfo.bindingCount = bindings.size(); + + if (auto createResult = device.createDescriptorSetLayoutUnique(layoutInfo); createResult.result == vk::Result::eSuccess) { + newDescriptorHeap.descriptorSetLayout = std::move(createResult.value); + } else { + return std::nullopt; + } + } + + newDescriptorHeap.descriptorSets.resize(descriptorHeapCount); + newDescriptorHeap.allocationMap.resize(descriptorHeapCount); + + newDescriptorHeap.bindings.assign(bindings.begin(), bindings.end()); + + return {std::move(newDescriptorHeap)}; + } +} // namespace Vulkan \ No newline at end of file diff --git a/src/core/renderer_vk/vk_descriptor_update_batch.cpp b/src/core/renderer_vk/vk_descriptor_update_batch.cpp new file mode 100644 index 00000000..a414ca2d --- /dev/null +++ b/src/core/renderer_vk/vk_descriptor_update_batch.cpp @@ -0,0 +1,98 @@ +#include "renderer_vk/vk_descriptor_update_batch.hpp" + +#include +#include + +namespace Vulkan { + + void DescriptorUpdateBatch::flush() { + device.updateDescriptorSets({std::span(descriptorWrites.get(), descriptorWriteEnd)}, {std::span(descriptorCopies.get(), descriptorCopyEnd)}); + + descriptorWriteEnd = 0; + descriptorCopyEnd = 0; + } + + void DescriptorUpdateBatch::addImage(vk::DescriptorSet targetDescriptor, u8 targetBinding, vk::ImageView imageView, vk::ImageLayout imageLayout) { + if (descriptorWriteEnd >= descriptorWriteMax) { + flush(); + } + + const auto& imageInfo = descriptorInfos[descriptorWriteEnd].emplace(vk::Sampler(), imageView, imageLayout); + + descriptorWrites[descriptorWriteEnd] = + vk::WriteDescriptorSet(targetDescriptor, targetBinding, 0, 1, vk::DescriptorType::eSampledImage, &imageInfo, nullptr, nullptr); + + ++descriptorWriteEnd; + } + + void DescriptorUpdateBatch::addSampler(vk::DescriptorSet targetDescriptor, u8 targetBinding, vk::Sampler sampler) { + if (descriptorWriteEnd >= descriptorWriteMax) { + flush(); + } + + const auto& imageInfo = descriptorInfos[descriptorWriteEnd].emplace(sampler, vk::ImageView(), vk::ImageLayout()); + + descriptorWrites[descriptorWriteEnd] = + vk::WriteDescriptorSet(targetDescriptor, targetBinding, 0, 1, vk::DescriptorType::eSampler, &imageInfo, nullptr, nullptr); + + ++descriptorWriteEnd; + } + + void DescriptorUpdateBatch::addImageSampler( + vk::DescriptorSet targetDescriptor, u8 targetBinding, vk::ImageView imageView, vk::Sampler sampler, vk::ImageLayout imageLayout + ) { + if (descriptorWriteEnd >= descriptorWriteMax) { + flush(); + } + + const auto& imageInfo = descriptorInfos[descriptorWriteEnd].emplace(sampler, imageView, imageLayout); + + descriptorWrites[descriptorWriteEnd] = + vk::WriteDescriptorSet(targetDescriptor, targetBinding, 0, 1, vk::DescriptorType::eCombinedImageSampler, &imageInfo, nullptr, nullptr); + + ++descriptorWriteEnd; + } + + void DescriptorUpdateBatch::addBuffer( + vk::DescriptorSet targetDescriptor, u8 targetBinding, vk::Buffer buffer, vk::DeviceSize offset, vk::DeviceSize size + ) { + if (descriptorWriteEnd >= descriptorWriteMax) { + flush(); + } + + const auto& bufferInfo = descriptorInfos[descriptorWriteEnd].emplace(buffer, offset, size); + + descriptorWrites[descriptorWriteEnd] = + vk::WriteDescriptorSet(targetDescriptor, targetBinding, 0, 1, vk::DescriptorType::eStorageImage, nullptr, &bufferInfo, nullptr); + + ++descriptorWriteEnd; + } + + void DescriptorUpdateBatch::copyBinding( + vk::DescriptorSet sourceDescriptor, vk::DescriptorSet targetDescriptor, u8 sourceBinding, u8 targetBinding, u8 sourceArrayElement, + u8 targetArrayElement, u8 descriptorCount + ) { + if (descriptorCopyEnd >= descriptorCopyMax) { + flush(); + } + + descriptorCopies[descriptorCopyEnd] = vk::CopyDescriptorSet( + sourceDescriptor, sourceBinding, sourceArrayElement, targetDescriptor, targetBinding, targetArrayElement, descriptorCount + ); + + ++descriptorCopyEnd; + } + + std::optional DescriptorUpdateBatch::create(vk::Device device, usize descriptorWriteMax, usize descriptorCopyMax) + + { + DescriptorUpdateBatch newDescriptorUpdateBatch(device, descriptorWriteMax, descriptorCopyMax); + + newDescriptorUpdateBatch.descriptorInfos = std::make_unique(descriptorWriteMax); + newDescriptorUpdateBatch.descriptorWrites = std::make_unique(descriptorWriteMax); + newDescriptorUpdateBatch.descriptorCopies = std::make_unique(descriptorCopyMax); + + return {std::move(newDescriptorUpdateBatch)}; + } + +} // namespace Vulkan \ No newline at end of file diff --git a/src/core/renderer_vk/vk_memory.cpp b/src/core/renderer_vk/vk_memory.cpp new file mode 100644 index 00000000..c9087719 --- /dev/null +++ b/src/core/renderer_vk/vk_memory.cpp @@ -0,0 +1,174 @@ +#include "renderer_vk/vk_memory.hpp" + +namespace Vulkan { + + static constexpr vk::DeviceSize alignUp(vk::DeviceSize value, std::size_t size) { + const vk::DeviceSize mod = static_cast(value % size); + value -= mod; + return static_cast(mod == vk::DeviceSize{0} ? value : value + size); + } + + // Given a speculative heap-allocation, defined by its current size and + // memory-type bits, appends a memory requirements structure to it, updating + // both the size and the required memory-type-bits. Returns the offset within + // the heap for the current MemoryRequirements Todo: Sun Apr 23 13:28:25 PDT + // 2023 Rather than using a running-size of the heap, look at all of the memory + // requests and optimally create a packing for all of the offset and alignment + // requirements. Such as by satisfying all of the largest alignments first, and + // then the smallest, to reduce padding + static vk::DeviceSize commitMemoryRequestToHeap( + const vk::MemoryRequirements& curMemoryRequirements, vk::DeviceSize& curHeapEnd, u32& curMemoryTypeBits, vk::DeviceSize sizeAlignment + ) { + // Accumulate a mask of all the memory types that satisfies each of the + // handles + curMemoryTypeBits &= curMemoryRequirements.memoryTypeBits; + + // Pad up the memory sizes so they are not considered aliasing + const vk::DeviceSize curMemoryOffset = alignUp(curHeapEnd, curMemoryRequirements.alignment); + // Pad the size by the required size-alignment. + // Intended for BufferImageGranularity + const vk::DeviceSize curMemorySize = alignUp(curMemoryRequirements.size, sizeAlignment); + + curHeapEnd = (curMemoryOffset + curMemorySize); + return curMemoryOffset; + } + + s32 findMemoryTypeIndex( + vk::PhysicalDevice physicalDevice, u32 memoryTypeMask, vk::MemoryPropertyFlags memoryProperties, + vk::MemoryPropertyFlags memoryExcludeProperties + ) { + const vk::PhysicalDeviceMemoryProperties deviceMemoryProperties = physicalDevice.getMemoryProperties(); + // Iterate the physical device's memory types until we find a match + for (std::size_t i = 0; i < deviceMemoryProperties.memoryTypeCount; i++) { + if( + // Is within memory type mask + (((memoryTypeMask >> i) & 0b1) == 0b1) && + // Has property flags + (deviceMemoryProperties.memoryTypes[i].propertyFlags + & memoryProperties) + == memoryProperties + && + // None of the excluded properties are enabled + !(deviceMemoryProperties.memoryTypes[i].propertyFlags + & memoryExcludeProperties) ) + { + return static_cast(i); + } + } + + return -1; + } + + std::tuple commitImageHeap( + vk::Device device, vk::PhysicalDevice physicalDevice, const std::span images, vk::MemoryPropertyFlags memoryProperties, + vk::MemoryPropertyFlags memoryExcludeProperties + ) { + vk::MemoryAllocateInfo imageHeapAllocInfo = {}; + u32 imageHeapMemoryTypeBits = 0xFFFFFFFF; + std::vector imageHeapBinds; + + const vk::DeviceSize bufferImageGranularity = physicalDevice.getProperties().limits.bufferImageGranularity; + + for (const vk::Image& curImage : images) { + const vk::DeviceSize curBindOffset = commitMemoryRequestToHeap( + device.getImageMemoryRequirements(curImage), imageHeapAllocInfo.allocationSize, imageHeapMemoryTypeBits, bufferImageGranularity + ); + + if (imageHeapMemoryTypeBits == 0) { + // No possible memory heap for all of the images to share + return std::make_tuple(vk::Result::eErrorOutOfDeviceMemory, vk::UniqueDeviceMemory()); + } + + // Put nullptr for the device memory for now + imageHeapBinds.emplace_back(vk::BindImageMemoryInfo{curImage, nullptr, curBindOffset}); + } + + const s32 memoryTypeIndex = findMemoryTypeIndex(physicalDevice, imageHeapMemoryTypeBits, memoryProperties, memoryExcludeProperties); + + if (memoryTypeIndex < 0) { + // Unable to find a memory heap that satisfies all the images + return std::make_tuple(vk::Result::eErrorOutOfDeviceMemory, vk::UniqueDeviceMemory()); + } + + imageHeapAllocInfo.memoryTypeIndex = memoryTypeIndex; + + vk::UniqueDeviceMemory imageHeapMemory = {}; + + if (auto allocResult = device.allocateMemoryUnique(imageHeapAllocInfo); allocResult.result == vk::Result::eSuccess) { + imageHeapMemory = std::move(allocResult.value); + } else { + return std::make_tuple(allocResult.result, vk::UniqueDeviceMemory()); + } + + // Assign the device memory to the bindings + for (vk::BindImageMemoryInfo& curBind : imageHeapBinds) { + curBind.memory = imageHeapMemory.get(); + } + + // Now bind them all in one call + if (const vk::Result bindResult = device.bindImageMemory2(imageHeapBinds); bindResult == vk::Result::eSuccess) { + // Binding memory succeeded + } else { + return std::make_tuple(bindResult, vk::UniqueDeviceMemory()); + } + + return std::make_tuple(vk::Result::eSuccess, std::move(imageHeapMemory)); + } + + std::tuple commitBufferHeap( + vk::Device device, vk::PhysicalDevice physicalDevice, const std::span buffers, vk::MemoryPropertyFlags memoryProperties, + vk::MemoryPropertyFlags memoryExcludeProperties + ) { + vk::MemoryAllocateInfo bufferHeapAllocInfo = {}; + u32 bufferHeapMemoryTypeBits = 0xFFFFFFFF; + std::vector bufferHeapBinds; + + const vk::DeviceSize bufferImageGranularity = physicalDevice.getProperties().limits.bufferImageGranularity; + + for (const vk::Buffer& curBuffer : buffers) { + const vk::DeviceSize curBindOffset = commitMemoryRequestToHeap( + device.getBufferMemoryRequirements(curBuffer), bufferHeapAllocInfo.allocationSize, bufferHeapMemoryTypeBits, bufferImageGranularity + ); + + if (bufferHeapMemoryTypeBits == 0) { + // No possible memory heap for all of the buffers to share + return std::make_tuple(vk::Result::eErrorOutOfDeviceMemory, vk::UniqueDeviceMemory()); + } + + // Put nullptr for the device memory for now + bufferHeapBinds.emplace_back(vk::BindBufferMemoryInfo{curBuffer, nullptr, curBindOffset}); + } + + const s32 memoryTypeIndex = findMemoryTypeIndex(physicalDevice, bufferHeapMemoryTypeBits, memoryProperties, memoryExcludeProperties); + + if (memoryTypeIndex < 0) { + // Unable to find a memory heap that satisfies all the buffers + return std::make_tuple(vk::Result::eErrorOutOfDeviceMemory, vk::UniqueDeviceMemory()); + } + + bufferHeapAllocInfo.memoryTypeIndex = memoryTypeIndex; + + vk::UniqueDeviceMemory bufferHeapMemory = {}; + + if (auto allocResult = device.allocateMemoryUnique(bufferHeapAllocInfo); allocResult.result == vk::Result::eSuccess) { + bufferHeapMemory = std::move(allocResult.value); + } else { + return std::make_tuple(allocResult.result, vk::UniqueDeviceMemory()); + } + + // Assign the device memory to the bindings + for (vk::BindBufferMemoryInfo& curBind : bufferHeapBinds) { + curBind.memory = bufferHeapMemory.get(); + } + + // Now bind them all in one call + if (const vk::Result bindResult = device.bindBufferMemory2(bufferHeapBinds); bindResult == vk::Result::eSuccess) { + // Binding memory succeeded + } else { + return std::make_tuple(bindResult, vk::UniqueDeviceMemory()); + } + + return std::make_tuple(vk::Result::eSuccess, std::move(bufferHeapMemory)); + } + +} // namespace Vulkan \ No newline at end of file diff --git a/src/core/renderer_vk/vk_pica.cpp b/src/core/renderer_vk/vk_pica.cpp new file mode 100644 index 00000000..e7fc9033 --- /dev/null +++ b/src/core/renderer_vk/vk_pica.cpp @@ -0,0 +1,39 @@ +#include "renderer_vk/vk_pica.hpp" + +namespace Vulkan { + + vk::Format colorFormatToVulkan(PICA::ColorFmt colorFormat) { + switch (colorFormat) { + case PICA::ColorFmt::RGBA8: return vk::Format::eR8G8B8A8Unorm; + // VK_FORMAT_R8G8B8A8_UNORM is mandated by the vulkan specification + // VK_FORMAT_R8G8B8_UNORM may not be supported + // TODO: Detect this! + // case PICA::ColorFmt::RGB8: return vk::Format::eR8G8B8Unorm; + case PICA::ColorFmt::RGB8: return vk::Format::eR8G8B8A8Unorm; + case PICA::ColorFmt::RGBA5551: return vk::Format::eR5G5B5A1UnormPack16; + case PICA::ColorFmt::RGB565: return vk::Format::eR5G6B5UnormPack16; + case PICA::ColorFmt::RGBA4: return vk::Format::eR4G4B4A4UnormPack16; + } + return vk::Format::eUndefined; + } + vk::Format depthFormatToVulkan(PICA::DepthFmt depthFormat) { + switch (depthFormat) { + // VK_FORMAT_D16_UNORM is mandated by the vulkan specification + case PICA::DepthFmt::Depth16: return vk::Format::eD16Unorm; + case PICA::DepthFmt::Unknown1: return vk::Format::eUndefined; + // The GPU may _not_ support these formats natively + // Only one of: + // VK_FORMAT_X8_D24_UNORM_PACK32 and VK_FORMAT_D32_SFLOAT + // and one of: + // VK_FORMAT_D24_UNORM_S8_UINT and VK_FORMAT_D32_SFLOAT_S8_UINT + // will be supported + // TODO: Detect this! + // case PICA::DepthFmt::Depth24: return vk::Format::eX8D24UnormPack32; + // case PICA::DepthFmt::Depth24Stencil8: return vk::Format::eD24UnormS8Uint; + case PICA::DepthFmt::Depth24: return vk::Format::eD32Sfloat; + case PICA::DepthFmt::Depth24Stencil8: return vk::Format::eD32SfloatS8Uint; + } + return vk::Format::eUndefined; + } + +} // namespace Vulkan \ No newline at end of file diff --git a/src/core/renderer_vk/vk_sampler_cache.cpp b/src/core/renderer_vk/vk_sampler_cache.cpp new file mode 100644 index 00000000..884264b1 --- /dev/null +++ b/src/core/renderer_vk/vk_sampler_cache.cpp @@ -0,0 +1,31 @@ +#include "renderer_vk/vk_sampler_cache.hpp" + +#include + +#include "helpers.hpp" + +namespace Vulkan { + + SamplerCache::SamplerCache(vk::Device device) : device(device) {} + + const vk::Sampler& SamplerCache::getSampler(const vk::SamplerCreateInfo& samplerInfo) { + const std::size_t samplerHash = std::hash()(samplerInfo); + + // Cache hit + if (samplerMap.contains(samplerHash)) { + return samplerMap.at(samplerHash).get(); + } + + if (auto createResult = device.createSamplerUnique(samplerInfo); createResult.result == vk::Result::eSuccess) { + return (samplerMap[samplerHash] = std::move(createResult.value)).get(); + } else { + Helpers::panic("Error creating sampler: %s\n", vk::to_string(createResult.result).c_str()); + } + } + + std::optional SamplerCache::create(vk::Device device) { + SamplerCache newSamplerCache(device); + + return {std::move(newSamplerCache)}; + } +} // namespace Vulkan \ No newline at end of file diff --git a/src/core/renderer_vk/vulkan_api.cpp b/src/core/renderer_vk/vulkan_api.cpp deleted file mode 100644 index c207eea7..00000000 --- a/src/core/renderer_vk/vulkan_api.cpp +++ /dev/null @@ -1,3 +0,0 @@ -#include "renderer_vk/vulkan_api.hpp" - -VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE; \ No newline at end of file diff --git a/src/host_shaders/vulkan_display.frag b/src/host_shaders/vulkan_display.frag new file mode 100644 index 00000000..1b6bd937 --- /dev/null +++ b/src/host_shaders/vulkan_display.frag @@ -0,0 +1,7 @@ +#version 460 core +layout(location = 0) in vec2 UV; +layout(location = 0) out vec4 FragColor; + +layout(binding = 0) uniform sampler2D u_texture; + +void main() { FragColor = texture(u_texture, UV); } \ No newline at end of file diff --git a/src/host_shaders/vulkan_display.vert b/src/host_shaders/vulkan_display.vert new file mode 100644 index 00000000..284997ca --- /dev/null +++ b/src/host_shaders/vulkan_display.vert @@ -0,0 +1,7 @@ +#version 460 core +layout(location = 0) out vec2 UV; + +void main() { + UV = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2); + gl_Position = vec4(UV * 2.0f + -1.0f, 0.0f, 1.0f); +} \ No newline at end of file