Moar shader decompiler (#559)
* Renderer: Add prepareForDraw callback * Add fmt submodule and port shader decompiler instructions to it * Add shader acceleration setting * Hook up vertex shaders to shader cache * Shader decompiler: Fix redundant compilations * Shader Decompiler: Fix vertex attribute upload * Shader compiler: Simplify generated code for reading and faster compilation * Further simplify shader decompiler output * Shader decompiler: More smallen-ing * Shader decompiler: Get PICA uniforms uploaded to the GPU * Shader decompiler: Readd clipping * Shader decompiler: Actually `break` on control flow instructions * Shader decompiler: More control flow handling * Shader decompiler: Fix desitnation mask * Shader Decomp: Remove pair member capture in lambda (unsupported on NDK) * Disgusting changes to handle the fact that hw shader shaders are 2x as big * Shader decompiler: Implement proper output semantic mapping * Moar instructions * Shader decompiler: Add FLR/SLT/SLTI/SGE/SGEI * Shader decompiler: Add register indexing * Shader decompiler: Optimize mova with both x and y masked * Shader decompiler: Add DPH/DPHI * Fix shader caching being broken * PICA decompiler: Cache VS uniforms * Simply vertex cache code * Simplify vertex cache code * Shader decompiler: Add loops * Shader decompiler: Implement safe multiplication * Shader decompiler: Implement LG2/EX2 * Shader decompiler: More control flow * Shader decompiler: Fix JMPU condition * Shader decompiler: Convert main function to void * PICA: Start implementing GPU vertex fetch * More hw VAO work * More hw VAO work * More GPU vertex fetch code * Add GL Stream Buffer from Duckstation * GL: Actually upload data to stream buffers * GPU: Cleanup immediate mode handling * Get first renders working with accelerated draws * Shader decompiler: Fix control flow analysis bugs * HW shaders: Accelerate indexed draws * Shader decompiler: Add support for compilation errors * GLSL decompiler: Fall back for LITP * Add Renderdoc scope classes * Fix control flow analysis bug * HW shaders: Fix attribute fetch * Rewriting hw vertex fetch * Stream buffer: Fix copy-paste mistake * HW shaders: Fix indexed rendering * HW shaders: Add padding attributes * HW shaders: Avoid redundant glVertexAttrib4f calls * HW shaders: Fix loops * HW shaders: Make generated shaders slightly smaller * Fix libretro build * HW shaders: Fix android * Remove redundant ubershader checks * Set accelerate shader default to true * Shader decompiler: Don't declare VS input attributes as an array * Change ubuntu-latest to Ubuntu 24.04 because Microsoft screwed up their CI again * fix merge conflict bug
This commit is contained in:
@@ -1,3 +1,7 @@
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "PICA/pica_frag_config.hpp"
|
||||
#include "PICA/regs.hpp"
|
||||
#include "PICA/shader_gen.hpp"
|
||||
@@ -702,6 +706,113 @@ void FragmentGenerator::compileFog(std::string& shader, const PICA::FragmentConf
|
||||
shader += "combinerOutput.rgb = mix(fog_color, combinerOutput.rgb, fog_factor);";
|
||||
}
|
||||
|
||||
std::string FragmentGenerator::getVertexShaderAccelerated(const std::string& picaSource, const PICA::VertConfig& vertConfig, bool usingUbershader) {
|
||||
// First, calculate output register -> Fixed function fragment semantics based on the VAO config
|
||||
// This array contains the mappings for the 32 fixed function semantics (8 variables, with 4 lanes each).
|
||||
// Each entry is a pair, containing the output reg to use for this semantic (first) and which lane of that register (second)
|
||||
std::array<std::pair<int, int>, 32> outputMappings{};
|
||||
// Output registers adjusted according to VS_OUTPUT_MASK, which handles enabling and disabling output attributes
|
||||
std::array<u8, 16> vsOutputRegisters;
|
||||
|
||||
{
|
||||
uint count = 0;
|
||||
u16 outputMask = vertConfig.outputMask;
|
||||
|
||||
// See which registers are actually enabled and ignore the disabled ones
|
||||
for (int i = 0; i < 16; i++) {
|
||||
if (outputMask & 1) {
|
||||
vsOutputRegisters[count++] = i;
|
||||
}
|
||||
|
||||
outputMask >>= 1;
|
||||
}
|
||||
|
||||
// For the others, map the index to a vs output directly (TODO: What does hw actually do?)
|
||||
for (; count < 16; count++) {
|
||||
vsOutputRegisters[count] = count;
|
||||
}
|
||||
|
||||
for (int i = 0; i < vertConfig.outputCount; i++) {
|
||||
const u32 config = vertConfig.outmaps[i];
|
||||
for (int j = 0; j < 4; j++) {
|
||||
const u32 mapping = (config >> (j * 8)) & 0x1F;
|
||||
outputMappings[mapping] = std::make_pair(vsOutputRegisters[i], j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto getSemanticName = [&](u32 semanticIndex) {
|
||||
auto [reg, lane] = outputMappings[semanticIndex];
|
||||
return fmt::format("out_regs[{}][{}]", reg, lane);
|
||||
};
|
||||
|
||||
std::string semantics = fmt::format(
|
||||
R"(
|
||||
vec4 a_coords = vec4({}, {}, {}, {});
|
||||
vec4 a_quaternion = vec4({}, {}, {}, {});
|
||||
vec4 a_vertexColour = vec4({}, {}, {}, {});
|
||||
vec2 a_texcoord0 = vec2({}, {});
|
||||
float a_texcoord0_w = {};
|
||||
vec2 a_texcoord1 = vec2({}, {});
|
||||
vec2 a_texcoord2 = vec2({}, {});
|
||||
vec3 a_view = vec3({}, {}, {});
|
||||
)",
|
||||
getSemanticName(0), getSemanticName(1), getSemanticName(2), getSemanticName(3), getSemanticName(4), getSemanticName(5), getSemanticName(6),
|
||||
getSemanticName(7), getSemanticName(8), getSemanticName(9), getSemanticName(10), getSemanticName(11), getSemanticName(12),
|
||||
getSemanticName(13), getSemanticName(16), getSemanticName(14), getSemanticName(15), getSemanticName(22), getSemanticName(23),
|
||||
getSemanticName(18), getSemanticName(19), getSemanticName(20)
|
||||
);
|
||||
|
||||
if (usingUbershader) {
|
||||
Helpers::panic("Unimplemented: GetVertexShaderAccelerated for ubershader");
|
||||
return picaSource;
|
||||
} else {
|
||||
// TODO: Uniforms and don't hardcode fixed-function semantic indices...
|
||||
std::string ret = picaSource;
|
||||
if (api == API::GLES) {
|
||||
ret += "\n#define USING_GLES\n";
|
||||
}
|
||||
|
||||
ret += uniformDefinition;
|
||||
|
||||
ret += R"(
|
||||
out vec4 v_quaternion;
|
||||
out vec4 v_colour;
|
||||
out vec3 v_texcoord0;
|
||||
out vec2 v_texcoord1;
|
||||
out vec3 v_view;
|
||||
out vec2 v_texcoord2;
|
||||
|
||||
#ifndef USING_GLES
|
||||
out float gl_ClipDistance[2];
|
||||
#endif
|
||||
|
||||
void main() {
|
||||
pica_shader_main();
|
||||
)";
|
||||
// Transfer fixed function fragment registers from vertex shader output to the fragment shader
|
||||
ret += semantics;
|
||||
|
||||
ret += R"(
|
||||
gl_Position = a_coords;
|
||||
vec4 colourAbs = abs(a_vertexColour);
|
||||
v_colour = min(colourAbs, vec4(1.f));
|
||||
|
||||
v_texcoord0 = vec3(a_texcoord0.x, 1.0 - a_texcoord0.y, a_texcoord0_w);
|
||||
v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y);
|
||||
v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y);
|
||||
v_view = a_view;
|
||||
v_quaternion = a_quaternion;
|
||||
|
||||
#ifndef USING_GLES
|
||||
gl_ClipDistance[0] = -a_coords.z;
|
||||
gl_ClipDistance[1] = dot(clipCoords, a_coords);
|
||||
#endif
|
||||
})";
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
void FragmentGenerator::compileLogicOps(std::string& shader, const PICA::FragmentConfig& config) {
|
||||
if (api != API::GLES) [[unlikely]] {
|
||||
Helpers::warn("Shadergen: Unsupported API for compileLogicOps");
|
||||
|
||||
Reference in New Issue
Block a user