video_core: Abstract shader generators. (#6990)

* video_core: Abstract shader generators.

* shader: Extract common generator structures and move generators to specific namespaces.

* shader: Minor fixes and clean-up.
This commit is contained in:
Steveice10
2023-09-30 02:06:06 -07:00
committed by GitHub
parent 1492d73ccb
commit 50f22d1f59
35 changed files with 1374 additions and 3344 deletions

View File

@@ -5,7 +5,8 @@
#include "common/thread_worker.h"
#include "video_core/rasterizer_cache/pixel_format.h"
#include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/renderer_vulkan/vk_shader_gen.h"
#include "video_core/shader/generator/glsl_shader_gen.h"
#include "video_core/shader/generator/spv_shader_gen.h"
namespace Common {

View File

@@ -14,19 +14,14 @@
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_gen_spv.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
using namespace Pica::Shader::Generator;
MICROPROFILE_DEFINE(Vulkan_Bind, "Vulkan", "Pipeline Bind", MP_RGB(192, 32, 32));
namespace Vulkan {
enum ProgramType : u32 {
VS = 0,
GS = 2,
FS = 1,
};
u32 AttribBytes(Pica::PipelineRegs::VertexAttributeFormat format, u32 size) {
switch (format) {
case Pica::PipelineRegs::VertexAttributeFormat::FLOAT:
@@ -52,14 +47,14 @@ AttribLoadFlags MakeAttribLoadFlag(Pica::PipelineRegs::VertexAttributeFormat for
}
}
constexpr std::array<vk::DescriptorSetLayoutBinding, 5> BUFFER_BINDINGS = {{
constexpr std::array<vk::DescriptorSetLayoutBinding, 6> BUFFER_BINDINGS = {{
{0, vk::DescriptorType::eUniformBufferDynamic, 1, vk::ShaderStageFlagBits::eVertex},
{1, vk::DescriptorType::eUniformBufferDynamic, 1,
vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eGeometry |
vk::ShaderStageFlagBits::eFragment},
{2, vk::DescriptorType::eUniformTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment},
vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eGeometry},
{2, vk::DescriptorType::eUniformBufferDynamic, 1, vk::ShaderStageFlagBits::eFragment},
{3, vk::DescriptorType::eUniformTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment},
{4, vk::DescriptorType::eUniformTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment},
{5, vk::DescriptorType::eUniformTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment},
}};
constexpr std::array<vk::DescriptorSetLayoutBinding, 4> TEXTURE_BINDINGS = {{
@@ -88,8 +83,9 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
descriptor_set_providers{DescriptorSetProvider{instance, pool, BUFFER_BINDINGS},
DescriptorSetProvider{instance, pool, TEXTURE_BINDINGS},
DescriptorSetProvider{instance, pool, SHADOW_BINDINGS}},
trivial_vertex_shader{instance, vk::ShaderStageFlagBits::eVertex,
GenerateTrivialVertexShader(instance.IsShaderClipDistanceSupported())} {
trivial_vertex_shader{
instance, vk::ShaderStageFlagBits::eVertex,
GLSL::GenerateTrivialVertexShader(instance.IsShaderClipDistanceSupported(), true)} {
BuildLayout();
}
@@ -294,8 +290,8 @@ bool PipelineCache::BindPipeline(const PipelineInfo& info, bool wait_built) {
bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs,
Pica::Shader::ShaderSetup& setup,
const VertexLayout& layout) {
PicaVSConfig config{regs.rasterizer, regs.vs, setup, instance};
config.state.use_geometry_shader = instance.UseGeometryShaders();
PicaVSConfig config{regs, setup, instance.IsShaderClipDistanceSupported(),
instance.UseGeometryShaders()};
for (u32 i = 0; i < layout.attribute_count; i++) {
const VertexAttribute& attr = layout.attributes[i];
@@ -313,14 +309,13 @@ bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs,
auto [it, new_config] = programmable_vertex_map.try_emplace(config);
if (new_config) {
auto code = GenerateVertexShader(setup, config);
if (!code) {
auto program = GLSL::GenerateVertexShader(setup, config, true);
if (program.empty()) {
LOG_ERROR(Render_Vulkan, "Failed to retrieve programmable vertex shader");
programmable_vertex_map[config] = nullptr;
return false;
}
std::string& program = code.value();
auto [iter, new_program] = programmable_vertex_cache.try_emplace(program, instance);
auto& shader = iter->second;
@@ -359,13 +354,13 @@ bool PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) {
return true;
}
const PicaFixedGSConfig gs_config{regs, instance};
const PicaFixedGSConfig gs_config{regs, instance.IsShaderClipDistanceSupported()};
auto [it, new_shader] = fixed_geometry_shaders.try_emplace(gs_config, instance);
auto& shader = it->second;
if (new_shader) {
workers.QueueWork([gs_config, device = instance.GetDevice(), &shader]() {
const std::string code = GenerateFixedGeometryShader(gs_config);
const auto code = GLSL::GenerateFixedGeometryShader(gs_config, true);
shader.module = Compile(code, vk::ShaderStageFlagBits::eGeometry, device);
shader.MarkDone();
});
@@ -383,7 +378,9 @@ void PipelineCache::UseTrivialGeometryShader() {
}
void PipelineCache::UseFragmentShader(const Pica::Regs& regs) {
const PicaFSConfig config{regs, instance};
const PicaFSConfig config{regs, instance.IsFragmentShaderInterlockSupported(),
instance.NeedsLogicOpEmulation(),
!instance.IsCustomBorderColorSupported(), false};
const auto [it, new_shader] = fragment_shaders.try_emplace(config, instance);
auto& shader = it->second;
@@ -395,12 +392,12 @@ void PipelineCache::UseFragmentShader(const Pica::Regs& regs) {
texture0_type == Pica::TexturingRegs::TextureConfig::ShadowCube ||
config.state.shadow_rendering.Value();
if (use_spirv && !is_shadow) {
const std::vector code = GenerateFragmentShaderSPV(config);
const std::vector code = SPIRV::GenerateFragmentShader(config);
shader.module = CompileSPV(code, instance.GetDevice());
shader.MarkDone();
} else {
workers.QueueWork([config, device = instance.GetDevice(), &shader]() {
const std::string code = GenerateFragmentShader(config);
const std::string code = GLSL::GenerateFragmentShader(config, true);
shader.module = Compile(code, vk::ShaderStageFlagBits::eFragment, device);
shader.MarkDone();
});

View File

@@ -9,6 +9,8 @@
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/shader/generator/glsl_shader_gen.h"
#include "video_core/shader/generator/spv_shader_gen.h"
namespace Pica {
struct Regs;
@@ -22,7 +24,7 @@ class RenderpassCache;
class DescriptorPool;
constexpr u32 NUM_RASTERIZER_SETS = 3;
constexpr u32 NUM_DYNAMIC_OFFSETS = 2;
constexpr u32 NUM_DYNAMIC_OFFSETS = 3;
/**
* Stores a collection of rasterizer pipelines used during rendering.
@@ -113,10 +115,10 @@ private:
std::array<u64, MAX_SHADER_STAGES> shader_hashes;
std::array<Shader*, MAX_SHADER_STAGES> current_shaders;
std::unordered_map<PicaVSConfig, Shader*> programmable_vertex_map;
std::unordered_map<Pica::Shader::Generator::PicaVSConfig, Shader*> programmable_vertex_map;
std::unordered_map<std::string, Shader> programmable_vertex_cache;
std::unordered_map<PicaFixedGSConfig, Shader> fixed_geometry_shaders;
std::unordered_map<PicaFSConfig, Shader> fragment_shaders;
std::unordered_map<Pica::Shader::Generator::PicaFixedGSConfig, Shader> fixed_geometry_shaders;
std::unordered_map<Pica::Shader::Generator::PicaFSConfig, Shader> fragment_shaders;
Shader trivial_vertex_shader;
};

View File

@@ -28,6 +28,8 @@ MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Drawing", MP_RGB(128, 128, 192));
using TriangleTopology = Pica::PipelineRegs::TriangleTopology;
using VideoCore::SurfaceType;
using namespace Pica::Shader::Generator;
constexpr u64 STREAM_BUFFER_SIZE = 64 * 1024 * 1024;
constexpr u64 UNIFORM_BUFFER_SIZE = 4 * 1024 * 1024;
constexpr u64 TEXTURE_BUFFER_SIZE = 2 * 1024 * 1024;
@@ -76,10 +78,10 @@ RasterizerVulkan::RasterizerVulkan(Memory::MemorySystem& memory,
vertex_buffers.fill(stream_buffer.Handle());
uniform_buffer_alignment = instance.UniformMinAlignment();
uniform_size_aligned_vs =
Common::AlignUp(sizeof(Pica::Shader::VSUniformData), uniform_buffer_alignment);
uniform_size_aligned_fs =
Common::AlignUp(sizeof(Pica::Shader::UniformData), uniform_buffer_alignment);
uniform_size_aligned_vs_pica =
Common::AlignUp(sizeof(VSPicaUniformData), uniform_buffer_alignment);
uniform_size_aligned_vs = Common::AlignUp(sizeof(VSUniformData), uniform_buffer_alignment);
uniform_size_aligned_fs = Common::AlignUp(sizeof(FSUniformData), uniform_buffer_alignment);
// Define vertex layout for software shaders
MakeSoftwareVertexLayout();
@@ -107,11 +109,12 @@ RasterizerVulkan::RasterizerVulkan(Memory::MemorySystem& memory,
// Since we don't have access to VK_EXT_descriptor_indexing we need to intiallize
// all descriptor sets even the ones we don't use.
pipeline_cache.BindBuffer(0, uniform_buffer.Handle(), 0, sizeof(Pica::Shader::VSUniformData));
pipeline_cache.BindBuffer(1, uniform_buffer.Handle(), 0, sizeof(Pica::Shader::UniformData));
pipeline_cache.BindTexelBuffer(2, *texture_lf_view);
pipeline_cache.BindTexelBuffer(3, *texture_rg_view);
pipeline_cache.BindTexelBuffer(4, *texture_rgba_view);
pipeline_cache.BindBuffer(0, uniform_buffer.Handle(), 0, sizeof(VSPicaUniformData));
pipeline_cache.BindBuffer(1, uniform_buffer.Handle(), 0, sizeof(VSUniformData));
pipeline_cache.BindBuffer(2, uniform_buffer.Handle(), 0, sizeof(FSUniformData));
pipeline_cache.BindTexelBuffer(3, *texture_lf_view);
pipeline_cache.BindTexelBuffer(4, *texture_rg_view);
pipeline_cache.BindTexelBuffer(5, *texture_rgba_view);
Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID);
Surface& null_cube_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_CUBE_ID);
@@ -140,7 +143,6 @@ void RasterizerVulkan::LoadDiskResources(const std::atomic_bool& stop_loading,
}
void RasterizerVulkan::SyncFixedState() {
SyncClipEnabled();
SyncCullMode();
SyncBlendEnabled();
SyncBlendFuncs();
@@ -478,16 +480,16 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
// Update scissor uniforms
const auto [scissor_x1, scissor_y2, scissor_x2, scissor_y1] = fb_helper.Scissor();
if (uniform_block_data.data.scissor_x1 != scissor_x1 ||
uniform_block_data.data.scissor_x2 != scissor_x2 ||
uniform_block_data.data.scissor_y1 != scissor_y1 ||
uniform_block_data.data.scissor_y2 != scissor_y2) {
if (fs_uniform_block_data.data.scissor_x1 != scissor_x1 ||
fs_uniform_block_data.data.scissor_x2 != scissor_x2 ||
fs_uniform_block_data.data.scissor_y1 != scissor_y1 ||
fs_uniform_block_data.data.scissor_y2 != scissor_y2) {
uniform_block_data.data.scissor_x1 = scissor_x1;
uniform_block_data.data.scissor_x2 = scissor_x2;
uniform_block_data.data.scissor_y1 = scissor_y1;
uniform_block_data.data.scissor_y2 = scissor_y2;
uniform_block_data.dirty = true;
fs_uniform_block_data.data.scissor_x1 = scissor_x1;
fs_uniform_block_data.data.scissor_x2 = scissor_x2;
fs_uniform_block_data.data.scissor_y1 = scissor_y1;
fs_uniform_block_data.data.scissor_y2 = scissor_y2;
fs_uniform_block_data.dirty = true;
}
// Sync and bind the texture surfaces
@@ -670,11 +672,6 @@ void RasterizerVulkan::UnbindSpecial() {
void RasterizerVulkan::NotifyFixedFunctionPicaRegisterChanged(u32 id) {
switch (id) {
// Clipping plane
case PICA_REG_INDEX(rasterizer.clip_enable):
SyncClipEnabled();
break;
// Culling
case PICA_REG_INDEX(rasterizer.cull_mode):
SyncCullMode();
@@ -831,14 +828,6 @@ void RasterizerVulkan::MakeSoftwareVertexLayout() {
}
}
void RasterizerVulkan::SyncClipEnabled() {
bool clip_enabled = regs.rasterizer.clip_enable != 0;
if (clip_enabled != uniform_block_data.data.enable_clip1) {
uniform_block_data.data.enable_clip1 = clip_enabled;
uniform_block_data.dirty = true;
}
}
void RasterizerVulkan::SyncCullMode() {
pipeline_info.rasterization.cull_mode.Assign(regs.rasterizer.cull_mode);
}
@@ -946,7 +935,7 @@ void RasterizerVulkan::SyncAndUploadLUTsLF() {
sizeof(Common::Vec2f) * 256 * Pica::LightingRegs::NumLightingSampler +
sizeof(Common::Vec2f) * 128; // fog
if (!uniform_block_data.lighting_lut_dirty_any && !uniform_block_data.fog_lut_dirty) {
if (!fs_uniform_block_data.lighting_lut_dirty_any && !fs_uniform_block_data.fog_lut_dirty) {
return;
}
@@ -954,9 +943,9 @@ void RasterizerVulkan::SyncAndUploadLUTsLF() {
auto [buffer, offset, invalidate] = texture_lf_buffer.Map(max_size, sizeof(Common::Vec4f));
// Sync the lighting luts
if (uniform_block_data.lighting_lut_dirty_any || invalidate) {
for (unsigned index = 0; index < uniform_block_data.lighting_lut_dirty.size(); index++) {
if (uniform_block_data.lighting_lut_dirty[index] || invalidate) {
if (fs_uniform_block_data.lighting_lut_dirty_any || invalidate) {
for (unsigned index = 0; index < fs_uniform_block_data.lighting_lut_dirty.size(); index++) {
if (fs_uniform_block_data.lighting_lut_dirty[index] || invalidate) {
std::array<Common::Vec2f, 256> new_data;
const auto& source_lut = Pica::g_state.lighting.luts[index];
std::transform(source_lut.begin(), source_lut.end(), new_data.begin(),
@@ -968,19 +957,19 @@ void RasterizerVulkan::SyncAndUploadLUTsLF() {
lighting_lut_data[index] = new_data;
std::memcpy(buffer + bytes_used, new_data.data(),
new_data.size() * sizeof(Common::Vec2f));
uniform_block_data.data.lighting_lut_offset[index / 4][index % 4] =
fs_uniform_block_data.data.lighting_lut_offset[index / 4][index % 4] =
static_cast<int>((offset + bytes_used) / sizeof(Common::Vec2f));
uniform_block_data.dirty = true;
fs_uniform_block_data.dirty = true;
bytes_used += new_data.size() * sizeof(Common::Vec2f);
}
uniform_block_data.lighting_lut_dirty[index] = false;
fs_uniform_block_data.lighting_lut_dirty[index] = false;
}
}
uniform_block_data.lighting_lut_dirty_any = false;
fs_uniform_block_data.lighting_lut_dirty_any = false;
}
// Sync the fog lut
if (uniform_block_data.fog_lut_dirty || invalidate) {
if (fs_uniform_block_data.fog_lut_dirty || invalidate) {
std::array<Common::Vec2f, 128> new_data;
std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(),
@@ -992,12 +981,12 @@ void RasterizerVulkan::SyncAndUploadLUTsLF() {
fog_lut_data = new_data;
std::memcpy(buffer + bytes_used, new_data.data(),
new_data.size() * sizeof(Common::Vec2f));
uniform_block_data.data.fog_lut_offset =
fs_uniform_block_data.data.fog_lut_offset =
static_cast<int>((offset + bytes_used) / sizeof(Common::Vec2f));
uniform_block_data.dirty = true;
fs_uniform_block_data.dirty = true;
bytes_used += new_data.size() * sizeof(Common::Vec2f);
}
uniform_block_data.fog_lut_dirty = false;
fs_uniform_block_data.fog_lut_dirty = false;
}
texture_lf_buffer.Commit(static_cast<u32>(bytes_used));
@@ -1010,10 +999,10 @@ void RasterizerVulkan::SyncAndUploadLUTs() {
sizeof(Common::Vec4f) * 256 + // proctex
sizeof(Common::Vec4f) * 256; // proctex diff
if (!uniform_block_data.proctex_noise_lut_dirty &&
!uniform_block_data.proctex_color_map_dirty &&
!uniform_block_data.proctex_alpha_map_dirty && !uniform_block_data.proctex_lut_dirty &&
!uniform_block_data.proctex_diff_lut_dirty) {
if (!fs_uniform_block_data.proctex_noise_lut_dirty &&
!fs_uniform_block_data.proctex_color_map_dirty &&
!fs_uniform_block_data.proctex_alpha_map_dirty &&
!fs_uniform_block_data.proctex_lut_dirty && !fs_uniform_block_data.proctex_diff_lut_dirty) {
return;
}
@@ -1035,34 +1024,34 @@ void RasterizerVulkan::SyncAndUploadLUTs() {
std::memcpy(buffer + bytes_used, new_data.data(),
new_data.size() * sizeof(Common::Vec2f));
lut_offset = static_cast<int>((offset + bytes_used) / sizeof(Common::Vec2f));
uniform_block_data.dirty = true;
fs_uniform_block_data.dirty = true;
bytes_used += new_data.size() * sizeof(Common::Vec2f);
}
};
// Sync the proctex noise lut
if (uniform_block_data.proctex_noise_lut_dirty || invalidate) {
if (fs_uniform_block_data.proctex_noise_lut_dirty || invalidate) {
sync_proctex_value_lut(proctex.noise_table, proctex_noise_lut_data,
uniform_block_data.data.proctex_noise_lut_offset);
uniform_block_data.proctex_noise_lut_dirty = false;
fs_uniform_block_data.data.proctex_noise_lut_offset);
fs_uniform_block_data.proctex_noise_lut_dirty = false;
}
// Sync the proctex color map
if (uniform_block_data.proctex_color_map_dirty || invalidate) {
if (fs_uniform_block_data.proctex_color_map_dirty || invalidate) {
sync_proctex_value_lut(proctex.color_map_table, proctex_color_map_data,
uniform_block_data.data.proctex_color_map_offset);
uniform_block_data.proctex_color_map_dirty = false;
fs_uniform_block_data.data.proctex_color_map_offset);
fs_uniform_block_data.proctex_color_map_dirty = false;
}
// Sync the proctex alpha map
if (uniform_block_data.proctex_alpha_map_dirty || invalidate) {
if (fs_uniform_block_data.proctex_alpha_map_dirty || invalidate) {
sync_proctex_value_lut(proctex.alpha_map_table, proctex_alpha_map_data,
uniform_block_data.data.proctex_alpha_map_offset);
uniform_block_data.proctex_alpha_map_dirty = false;
fs_uniform_block_data.data.proctex_alpha_map_offset);
fs_uniform_block_data.proctex_alpha_map_dirty = false;
}
// Sync the proctex lut
if (uniform_block_data.proctex_lut_dirty || invalidate) {
if (fs_uniform_block_data.proctex_lut_dirty || invalidate) {
std::array<Common::Vec4f, 256> new_data;
std::transform(proctex.color_table.begin(), proctex.color_table.end(), new_data.begin(),
@@ -1075,16 +1064,16 @@ void RasterizerVulkan::SyncAndUploadLUTs() {
proctex_lut_data = new_data;
std::memcpy(buffer + bytes_used, new_data.data(),
new_data.size() * sizeof(Common::Vec4f));
uniform_block_data.data.proctex_lut_offset =
fs_uniform_block_data.data.proctex_lut_offset =
static_cast<int>((offset + bytes_used) / sizeof(Common::Vec4f));
uniform_block_data.dirty = true;
fs_uniform_block_data.dirty = true;
bytes_used += new_data.size() * sizeof(Common::Vec4f);
}
uniform_block_data.proctex_lut_dirty = false;
fs_uniform_block_data.proctex_lut_dirty = false;
}
// Sync the proctex difference lut
if (uniform_block_data.proctex_diff_lut_dirty || invalidate) {
if (fs_uniform_block_data.proctex_diff_lut_dirty || invalidate) {
std::array<Common::Vec4f, 256> new_data;
std::transform(proctex.color_diff_table.begin(), proctex.color_diff_table.end(),
@@ -1097,48 +1086,59 @@ void RasterizerVulkan::SyncAndUploadLUTs() {
proctex_diff_lut_data = new_data;
std::memcpy(buffer + bytes_used, new_data.data(),
new_data.size() * sizeof(Common::Vec4f));
uniform_block_data.data.proctex_diff_lut_offset =
fs_uniform_block_data.data.proctex_diff_lut_offset =
static_cast<int>((offset + bytes_used) / sizeof(Common::Vec4f));
uniform_block_data.dirty = true;
fs_uniform_block_data.dirty = true;
bytes_used += new_data.size() * sizeof(Common::Vec4f);
}
uniform_block_data.proctex_diff_lut_dirty = false;
fs_uniform_block_data.proctex_diff_lut_dirty = false;
}
texture_buffer.Commit(static_cast<u32>(bytes_used));
}
void RasterizerVulkan::UploadUniforms(bool accelerate_draw) {
const bool sync_vs = accelerate_draw;
const bool sync_fs = uniform_block_data.dirty;
if (!sync_vs && !sync_fs) {
const bool sync_vs_pica = accelerate_draw;
const bool sync_vs = vs_uniform_block_data.dirty;
const bool sync_fs = fs_uniform_block_data.dirty;
if (!sync_vs_pica && !sync_vs && !sync_fs) {
return;
}
const u64 uniform_size = uniform_size_aligned_vs + uniform_size_aligned_fs;
const u64 uniform_size =
uniform_size_aligned_vs_pica + uniform_size_aligned_vs + uniform_size_aligned_fs;
auto [uniforms, offset, invalidate] =
uniform_buffer.Map(uniform_size, uniform_buffer_alignment);
u32 used_bytes = 0;
if (sync_vs) {
Pica::Shader::VSUniformData vs_uniforms;
vs_uniforms.uniforms.SetFromRegs(regs.vs, Pica::g_state.vs);
std::memcpy(uniforms, &vs_uniforms, sizeof(vs_uniforms));
pipeline_cache.SetBufferOffset(0, offset);
if (sync_vs || invalidate) {
std::memcpy(uniforms + used_bytes, &vs_uniform_block_data.data,
sizeof(vs_uniform_block_data.data));
pipeline_cache.SetBufferOffset(1, offset + used_bytes);
vs_uniform_block_data.dirty = false;
used_bytes += static_cast<u32>(uniform_size_aligned_vs);
}
if (sync_fs || invalidate) {
std::memcpy(uniforms + used_bytes, &uniform_block_data.data,
sizeof(Pica::Shader::UniformData));
std::memcpy(uniforms + used_bytes, &fs_uniform_block_data.data,
sizeof(fs_uniform_block_data.data));
pipeline_cache.SetBufferOffset(1, offset + used_bytes);
uniform_block_data.dirty = false;
pipeline_cache.SetBufferOffset(2, offset + used_bytes);
fs_uniform_block_data.dirty = false;
used_bytes += static_cast<u32>(uniform_size_aligned_fs);
}
if (sync_vs_pica) {
VSPicaUniformData vs_uniforms;
vs_uniforms.uniforms.SetFromRegs(regs.vs, Pica::g_state.vs);
std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms));
pipeline_cache.SetBufferOffset(0, offset + used_bytes);
used_bytes += static_cast<u32>(uniform_size_aligned_vs_pica);
}
uniform_buffer.Commit(used_bytes);
}

View File

@@ -60,9 +60,6 @@ public:
private:
void NotifyFixedFunctionPicaRegisterChanged(u32 id) override;
/// Syncs the clip enabled status to match the PICA register
void SyncClipEnabled();
/// Syncs the cull mode to match the PICA register
void SyncCullMode();
@@ -163,6 +160,7 @@ private:
vk::UniqueBufferView texture_rg_view;
vk::UniqueBufferView texture_rgba_view;
u64 uniform_buffer_alignment;
u64 uniform_size_aligned_vs_pica;
u64 uniform_size_aligned_vs;
u64 uniform_size_aligned_fs;
bool async_shaders{false};

File diff suppressed because it is too large Load Diff

View File

@@ -1,280 +0,0 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <optional>
#include "common/hash.h"
#include "video_core/regs.h"
#include "video_core/shader/shader.h"
namespace Vulkan {
class Instance;
enum Attributes {
ATTRIBUTE_POSITION,
ATTRIBUTE_COLOR,
ATTRIBUTE_TEXCOORD0,
ATTRIBUTE_TEXCOORD1,
ATTRIBUTE_TEXCOORD2,
ATTRIBUTE_TEXCOORD0_W,
ATTRIBUTE_NORMQUAT,
ATTRIBUTE_VIEW,
};
// Doesn't include const_color because we don't sync it, see comment in BuildFromRegs()
struct TevStageConfigRaw {
u32 sources_raw;
u32 modifiers_raw;
u32 ops_raw;
u32 scales_raw;
explicit operator Pica::TexturingRegs::TevStageConfig() const noexcept {
Pica::TexturingRegs::TevStageConfig stage;
stage.sources_raw = sources_raw;
stage.modifiers_raw = modifiers_raw;
stage.ops_raw = ops_raw;
stage.const_color = 0;
stage.scales_raw = scales_raw;
return stage;
}
};
struct PicaFSConfigState {
union {
BitField<0, 3, Pica::FramebufferRegs::CompareFunc> alpha_test_func;
BitField<3, 2, Pica::RasterizerRegs::ScissorMode> scissor_test_mode;
BitField<5, 3, Pica::TexturingRegs::TextureConfig::TextureType> texture0_type;
BitField<8, 1, u32> texture2_use_coord1;
BitField<9, 8, u32> combiner_buffer_input;
BitField<17, 1, Pica::RasterizerRegs::DepthBuffering> depthmap_enable;
BitField<18, 3, Pica::TexturingRegs::FogMode> fog_mode;
BitField<21, 1, u32> fog_flip;
BitField<22, 1, u32> emulate_logic_op;
BitField<23, 4, Pica::FramebufferRegs::LogicOp> logic_op;
BitField<27, 1, u32> shadow_rendering;
BitField<28, 1, u32> shadow_texture_orthographic;
BitField<29, 1, u32> use_fragment_shader_interlock;
};
union {
BitField<0, 1, u32> enable_s;
BitField<1, 1, u32> enable_t;
} texture_border_color[3];
std::array<TevStageConfigRaw, 6> tev_stages;
struct {
union {
BitField<0, 3, u16> num;
BitField<3, 1, u16> directional;
BitField<4, 1, u16> two_sided_diffuse;
BitField<5, 1, u16> dist_atten_enable;
BitField<6, 1, u16> spot_atten_enable;
BitField<7, 1, u16> geometric_factor_0;
BitField<8, 1, u16> geometric_factor_1;
BitField<9, 1, u16> shadow_enable;
} light[8];
union {
BitField<0, 1, u32> enable;
BitField<1, 4, u32> src_num;
BitField<5, 2, Pica::LightingRegs::LightingBumpMode> bump_mode;
BitField<7, 2, u32> bump_selector;
BitField<9, 1, u32> bump_renorm;
BitField<10, 1, u32> clamp_highlights;
BitField<11, 4, Pica::LightingRegs::LightingConfig> config;
BitField<15, 1, u32> enable_primary_alpha;
BitField<16, 1, u32> enable_secondary_alpha;
BitField<17, 1, u32> enable_shadow;
BitField<18, 1, u32> shadow_primary;
BitField<19, 1, u32> shadow_secondary;
BitField<20, 1, u32> shadow_invert;
BitField<21, 1, u32> shadow_alpha;
BitField<22, 2, u32> shadow_selector;
};
struct {
union {
BitField<0, 1, u32> enable;
BitField<1, 1, u32> abs_input;
BitField<2, 3, Pica::LightingRegs::LightingLutInput> type;
};
float scale;
} lut_d0, lut_d1, lut_sp, lut_fr, lut_rr, lut_rg, lut_rb;
} lighting;
struct {
union {
BitField<0, 1, u32> enable;
BitField<1, 2, u32> coord;
BitField<3, 3, Pica::TexturingRegs::ProcTexClamp> u_clamp;
BitField<6, 3, Pica::TexturingRegs::ProcTexClamp> v_clamp;
BitField<9, 4, Pica::TexturingRegs::ProcTexCombiner> color_combiner;
BitField<13, 4, Pica::TexturingRegs::ProcTexCombiner> alpha_combiner;
BitField<17, 3, Pica::TexturingRegs::ProcTexFilter> lut_filter;
BitField<20, 1, u32> separate_alpha;
BitField<21, 1, u32> noise_enable;
BitField<22, 2, Pica::TexturingRegs::ProcTexShift> u_shift;
BitField<24, 2, Pica::TexturingRegs::ProcTexShift> v_shift;
};
s32 lut_width;
s32 lut_offset0;
s32 lut_offset1;
s32 lut_offset2;
s32 lut_offset3;
u8 lod_min;
u8 lod_max;
} proctex;
};
/**
* This struct contains all state used to generate the GLSL fragment shader that emulates the
* current Pica register configuration. This struct is used as a cache key for generated GLSL shader
* programs. The functions in gl_shader_gen.cpp should retrieve state from this struct only, not by
* directly accessing Pica registers. This should reduce the risk of bugs in shader generation where
* Pica state is not being captured in the shader cache key, thereby resulting in (what should be)
* two separate shaders sharing the same key.
*/
struct PicaFSConfig : Common::HashableStruct<PicaFSConfigState> {
PicaFSConfig(const Pica::Regs& regs, const Instance& instance);
bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index));
}
bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index));
}
};
enum class AttribLoadFlags {
Float = 1 << 0,
Sint = 1 << 1,
Uint = 1 << 2,
ZeroW = 1 << 3,
};
DECLARE_ENUM_FLAG_OPERATORS(AttribLoadFlags)
/**
* This struct contains common information to identify a GL vertex/geometry shader generated from
* PICA vertex/geometry shader.
*/
struct PicaShaderConfigCommon {
void Init(const Pica::RasterizerRegs& rasterizer, const Pica::ShaderRegs& regs,
Pica::Shader::ShaderSetup& setup);
u64 program_hash;
u64 swizzle_hash;
u32 main_offset;
bool sanitize_mul;
u32 num_outputs;
// Load operations to apply to the input vertex data
std::array<AttribLoadFlags, 16> load_flags;
// output_map[output register index] -> output attribute index
std::array<u32, 16> output_map;
bool use_geometry_shader;
u32 vs_output_attributes;
u32 gs_output_attributes;
struct SemanticMap {
u32 attribute_index;
u32 component_index;
};
// semantic_maps[semantic name] -> GS output attribute index + component index
std::array<SemanticMap, 24> semantic_maps;
};
/**
* This struct contains information to identify a GL vertex shader generated from PICA vertex
* shader.
*/
struct PicaVSConfig : Common::HashableStruct<PicaShaderConfigCommon> {
explicit PicaVSConfig(const Pica::RasterizerRegs& rasterizer, const Pica::ShaderRegs& regs,
Pica::Shader::ShaderSetup& setup, const Instance& instance);
bool use_clip_planes;
};
struct PicaGSConfigCommonRaw {
void Init(const Pica::Regs& regs);
u32 vs_output_attributes;
u32 gs_output_attributes;
struct SemanticMap {
u32 attribute_index;
u32 component_index;
};
// semantic_maps[semantic name] -> GS output attribute index + component index
std::array<SemanticMap, 24> semantic_maps;
};
/**
* This struct contains information to identify a GL geometry shader generated from PICA no-geometry
* shader pipeline
*/
struct PicaFixedGSConfig : Common::HashableStruct<PicaGSConfigCommonRaw> {
explicit PicaFixedGSConfig(const Pica::Regs& regs, const Instance& instance);
bool use_clip_planes;
};
/**
* Generates the GLSL vertex shader program source code that accepts vertices from software shader
* and directly passes them to the fragment shader.
* @param separable_shader generates shader that can be used for separate shader object
* @returns String of the shader source code
*/
std::string GenerateTrivialVertexShader(bool use_clip_planes);
/**
* Generates the GLSL vertex shader program source code for the given VS program
* @returns String of the shader source code; boost::none on failure
*/
std::optional<std::string> GenerateVertexShader(const Pica::Shader::ShaderSetup& setup,
const PicaVSConfig& config);
/**
* Generates the GLSL fixed geometry shader program source code for non-GS PICA pipeline
* @returns String of the shader source code
*/
std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config);
/**
* Generates the GLSL fragment shader program source code for the current Pica state
* @param config ShaderCacheKey object generated for the current Pica state, used for the shader
* configuration (NOTE: Use state in this struct only, not the Pica registers!)
* @param separable_shader generates shader that can be used for separate shader object
* @returns String of the shader source code
*/
std::string GenerateFragmentShader(const PicaFSConfig& config);
} // namespace Vulkan
namespace std {
template <>
struct hash<Vulkan::PicaFSConfig> {
std::size_t operator()(const Vulkan::PicaFSConfig& k) const noexcept {
return k.Hash();
}
};
template <>
struct hash<Vulkan::PicaVSConfig> {
std::size_t operator()(const Vulkan::PicaVSConfig& k) const noexcept {
return k.Hash();
}
};
template <>
struct hash<Vulkan::PicaFixedGSConfig> {
std::size_t operator()(const Vulkan::PicaFixedGSConfig& k) const noexcept {
return k.Hash();
}
};
} // namespace std

File diff suppressed because it is too large Load Diff

View File

@@ -1,294 +0,0 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <sirit/sirit.h>
#include "video_core/renderer_vulkan/vk_shader_gen.h"
namespace Core {
class TelemetrySession;
}
namespace Vulkan {
using Sirit::Id;
struct VectorIds {
/// Returns the type id of the vector with the provided size
[[nodiscard]] constexpr Id Get(u32 size) const {
return ids[size - 2];
}
std::array<Id, 3> ids;
};
class FragmentModule : public Sirit::Module {
static constexpr u32 NUM_TEV_STAGES = 6;
static constexpr u32 NUM_LIGHTS = 8;
static constexpr u32 NUM_LIGHTING_SAMPLERS = 24;
static constexpr u32 NUM_TEX_UNITS = 4;
static constexpr u32 NUM_NON_PROC_TEX_UNITS = 3;
public:
explicit FragmentModule(Core::TelemetrySession& telemetry, const PicaFSConfig& config);
~FragmentModule();
/// Emits SPIR-V bytecode corresponding to the provided pica fragment configuration
void Generate();
private:
/// Undos the vulkan perspective transformation and applies the PICA one
void WriteDepth();
/// Emits code to emulate the scissor rectangle
void WriteScissor();
/// Writes the code to emulate fragment lighting
void WriteLighting();
/// Writes the code to emulate fog
void WriteFog();
/// Writes the code to emulate gas rendering
void WriteGas();
/// Writes the code to emulate the specified TEV stage
void WriteTevStage(s32 index);
/// Defines the basic texture sampling functions for a unit
void DefineTexSampler(u32 texture_unit);
/// Function for sampling the procedurally generated texture unit.
Id ProcTexSampler();
/// Writes the if-statement condition used to evaluate alpha testing.
void WriteAlphaTestCondition(Pica::FramebufferRegs::CompareFunc func);
/// Samples the current fragment texel from shadow plane
[[nodiscard]] Id SampleShadow();
[[nodiscard]] Id AppendProcTexShiftOffset(Id v, Pica::TexturingRegs::ProcTexShift mode,
Pica::TexturingRegs::ProcTexClamp clamp_mode);
[[nodiscard]] Id AppendProcTexClamp(Id var, Pica::TexturingRegs::ProcTexClamp mode);
[[nodiscard]] Id AppendProcTexCombineAndMap(Pica::TexturingRegs::ProcTexCombiner combiner, Id u,
Id v, Id offset);
/// Rounds the provided variable to the nearest 1/255th
[[nodiscard]] Id Byteround(Id variable_id, u32 size = 1);
/// LUT sampling uitlity
/// For NoiseLUT/ColorMap/AlphaMap, coord=0.0 is lut[0], coord=127.0/128.0 is lut[127] and
/// coord=1.0 is lut[127]+lut_diff[127]. For other indices, the result is interpolated using
/// value entries and difference entries.
[[nodiscard]] Id ProcTexLookupLUT(Id offset, Id coord);
/// Generates random noise with proctex
[[nodiscard]] Id ProcTexNoiseCoef(Id x);
/// Samples a color value from the rgba texture lut
[[nodiscard]] Id SampleProcTexColor(Id lut_coord, Id level);
/// Lookups the lighting LUT at the provided lut_index
[[nodiscard]] Id LookupLightingLUT(Id lut_index, Id index, Id delta);
/// Writes the specified TEV stage source component(s)
[[nodiscard]] Id AppendSource(Pica::TexturingRegs::TevStageConfig::Source source, s32 index);
/// Writes the color components to use for the specified TEV stage color modifier
[[nodiscard]] Id AppendColorModifier(
Pica::TexturingRegs::TevStageConfig::ColorModifier modifier,
Pica::TexturingRegs::TevStageConfig::Source source, s32 index);
/// Writes the alpha component to use for the specified TEV stage alpha modifier
[[nodiscard]] Id AppendAlphaModifier(
Pica::TexturingRegs::TevStageConfig::AlphaModifier modifier,
Pica::TexturingRegs::TevStageConfig::Source source, s32 index);
/// Writes the combiner function for the color components for the specified TEV stage operation
[[nodiscard]] Id AppendColorCombiner(Pica::TexturingRegs::TevStageConfig::Operation operation);
/// Writes the combiner function for the alpha component for the specified TEV stage operation
[[nodiscard]] Id AppendAlphaCombiner(Pica::TexturingRegs::TevStageConfig::Operation operation);
private:
/// Creates a constant array of integers
template <typename... T>
void InitTableS32(Id table, T... elems) {
const Id table_const{ConstS32(elems...)};
OpStore(table, table_const);
};
/// Loads the member specified from the shader_data uniform struct
template <typename... Ids>
[[nodiscard]] Id GetShaderDataMember(Id type, Ids... ids) {
const Id uniform_ptr{TypePointer(spv::StorageClass::Uniform, type)};
return OpLoad(type, OpAccessChain(uniform_ptr, shader_data_id, ids...));
}
/// Pads the provided vector by inserting args at the end
template <typename... Args>
[[nodiscard]] Id PadVectorF32(Id vector, Id pad_type_id, Args&&... args) {
return OpCompositeConstruct(pad_type_id, vector, ConstF32(args...));
}
/// Defines a input variable
[[nodiscard]] Id DefineInput(Id type, u32 location) {
const Id input_id{DefineVar(type, spv::StorageClass::Input)};
Decorate(input_id, spv::Decoration::Location, location);
return input_id;
}
/// Defines a input variable
[[nodiscard]] Id DefineOutput(Id type, u32 location) {
const Id output_id{DefineVar(type, spv::StorageClass::Output)};
Decorate(output_id, spv::Decoration::Location, location);
return output_id;
}
/// Defines a uniform constant variable
[[nodiscard]] Id DefineUniformConst(Id type, u32 set, u32 binding, bool readonly = false) {
const Id uniform_id{DefineVar(type, spv::StorageClass::UniformConstant)};
Decorate(uniform_id, spv::Decoration::DescriptorSet, set);
Decorate(uniform_id, spv::Decoration::Binding, binding);
if (readonly) {
Decorate(uniform_id, spv::Decoration::NonWritable);
}
return uniform_id;
}
template <bool global = true>
[[nodiscard]] Id DefineVar(Id type, spv::StorageClass storage_class) {
const Id pointer_type_id{TypePointer(storage_class, type)};
return global ? AddGlobalVariable(pointer_type_id, storage_class)
: AddLocalVariable(pointer_type_id, storage_class);
}
/// Returns the id of a signed integer constant of value
[[nodiscard]] Id ConstU32(u32 value) {
return Constant(u32_id, value);
}
template <typename... Args>
[[nodiscard]] Id ConstU32(Args&&... values) {
constexpr u32 size = static_cast<u32>(sizeof...(values));
static_assert(size >= 2);
const std::array constituents{Constant(u32_id, values)...};
const Id type = size <= 4 ? uvec_ids.Get(size) : TypeArray(u32_id, ConstU32(size));
return ConstantComposite(type, constituents);
}
/// Returns the id of a signed integer constant of value
[[nodiscard]] Id ConstS32(s32 value) {
return Constant(i32_id, value);
}
template <typename... Args>
[[nodiscard]] Id ConstS32(Args&&... values) {
constexpr u32 size = static_cast<u32>(sizeof...(values));
static_assert(size >= 2);
const std::array constituents{Constant(i32_id, values)...};
const Id type = size <= 4 ? ivec_ids.Get(size) : TypeArray(i32_id, ConstU32(size));
return ConstantComposite(type, constituents);
}
/// Returns the id of a float constant of value
[[nodiscard]] Id ConstF32(f32 value) {
return Constant(f32_id, value);
}
template <typename... Args>
[[nodiscard]] Id ConstF32(Args... values) {
constexpr u32 size = static_cast<u32>(sizeof...(values));
static_assert(size >= 2);
const std::array constituents{Constant(f32_id, values)...};
const Id type = size <= 4 ? vec_ids.Get(size) : TypeArray(f32_id, ConstU32(size));
return ConstantComposite(type, constituents);
}
void DefineArithmeticTypes();
void DefineEntryPoint();
void DefineUniformStructs();
void DefineInterface();
Id CompareShadow(Id pixel, Id z);
private:
Core::TelemetrySession& telemetry;
PicaFSConfig config;
Id void_id{};
Id bool_id{};
Id f32_id{};
Id i32_id{};
Id u32_id{};
VectorIds vec_ids{};
VectorIds ivec_ids{};
VectorIds uvec_ids{};
VectorIds bvec_ids{};
Id image2d_id{};
Id image_cube_id{};
Id image_buffer_id{};
Id image_r32_id{};
Id sampler_id{};
Id shader_data_id{};
Id primary_color_id{};
Id texcoord_id[NUM_NON_PROC_TEX_UNITS]{};
Id texcoord0_w_id{};
Id normquat_id{};
Id view_id{};
Id color_id{};
Id gl_frag_coord_id{};
Id gl_frag_depth_id{};
Id depth{};
Id tex0_id{};
Id tex1_id{};
Id tex2_id{};
Id tex_cube_id{};
Id texture_buffer_lut_lf_id{};
Id texture_buffer_lut_rg_id{};
Id texture_buffer_lut_rgba_id{};
Id shadow_texture_px_id{};
Id texture_buffer_lut_lf{};
Id texture_buffer_lut_rg{};
Id texture_buffer_lut_rgba{};
Id rounded_primary_color{};
Id primary_fragment_color{};
Id secondary_fragment_color{};
Id combiner_buffer{};
Id next_combiner_buffer{};
Id last_tex_env_out{};
Id color_results_1{};
Id color_results_2{};
Id color_results_3{};
Id alpha_results_1{};
Id alpha_results_2{};
Id alpha_results_3{};
Id sample_tex_unit_func[NUM_TEX_UNITS]{};
Id noise1d_table{};
Id noise2d_table{};
Id lut_offsets{};
};
/**
* Generates the SPIR-V fragment shader program source code for the current Pica state
* @param config ShaderCacheKey object generated for the current Pica state, used for the shader
* configuration (NOTE: Use state in this struct only, not the Pica registers!)
* @param separable_shader generates shader that can be used for separate shader object
* @returns String of the shader source code
*/
std::vector<u32> GenerateFragmentShaderSPV(const PicaFSConfig& config);
} // namespace Vulkan