video_core: Refactor GPU interface (#7272)

* video_core: Refactor GPU interface

* citra_qt: Better debug widget lifetime
This commit is contained in:
GPUCode
2023-12-28 12:46:57 +02:00
committed by GitHub
parent 602f4f60d8
commit 2bb7f89c30
167 changed files with 4172 additions and 4866 deletions

View File

@@ -0,0 +1,402 @@
// Copyright 2017 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <boost/serialization/base_object.hpp>
#include <boost/serialization/export.hpp>
#include <boost/serialization/unique_ptr.hpp>
#include "common/archives.h"
#include "core/core.h"
#include "video_core/gpu.h"
#include "video_core/pica/geometry_pipeline.h"
#include "video_core/pica/pica_core.h"
#include "video_core/pica/shader_setup.h"
#include "video_core/pica/shader_unit.h"
#include "video_core/shader/shader.h"
namespace Pica {
/// An attribute buffering interface for different pipeline modes
class GeometryPipelineBackend {
public:
virtual ~GeometryPipelineBackend() = default;
/// Checks if there is no incomplete data transfer
virtual bool IsEmpty() const = 0;
/// Checks if the pipeline needs a direct input from index buffer
virtual bool NeedIndexInput() const = 0;
/// Submits an index from index buffer
virtual void SubmitIndex(unsigned int val) = 0;
/**
* Submits vertex attributes
* @param input attributes of a vertex output from vertex shader
* @return if the buffer is full and the geometry shader should be invoked
*/
virtual bool SubmitVertex(const AttributeBuffer& input) = 0;
private:
template <class Archive>
void serialize(Archive& ar, const unsigned int file_version) {}
friend class boost::serialization::access;
};
// In the Point mode, vertex attributes are sent to the input registers in the geometry shader unit.
// The size of vertex shader outputs and geometry shader inputs are constants. Geometry shader is
// invoked upon inputs buffer filled up by vertex shader outputs. For example, if we have a geometry
// shader that takes 6 inputs, and the vertex shader outputs 2 attributes, it would take 3 vertices
// for one geometry shader invocation.
// TODO: what happens when the input size is not divisible by the output size?
class GeometryPipeline_Point : public GeometryPipelineBackend {
public:
GeometryPipeline_Point(const RegsInternal& regs, GeometryShaderUnit& unit)
: regs(regs), unit(unit) {
ASSERT(regs.pipeline.variable_primitive == 0);
ASSERT(regs.gs.input_to_uniform == 0);
vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1;
std::size_t gs_input_num = regs.gs.max_input_attribute_index + 1;
ASSERT(gs_input_num % vs_output_num == 0);
buffer_cur = attribute_buffer.data();
buffer_end = attribute_buffer.data() + gs_input_num;
}
bool IsEmpty() const override {
return buffer_cur == attribute_buffer.data();
}
bool NeedIndexInput() const override {
return false;
}
void SubmitIndex(u32 val) override {
UNREACHABLE();
}
bool SubmitVertex(const AttributeBuffer& input) override {
buffer_cur = std::copy(input.data(), input.data() + vs_output_num, buffer_cur);
if (buffer_cur == buffer_end) {
buffer_cur = attribute_buffer.data();
unit.LoadInput(regs.gs, attribute_buffer);
return true;
}
return false;
}
private:
const RegsInternal& regs;
GeometryShaderUnit& unit;
AttributeBuffer attribute_buffer;
Common::Vec4<f24>* buffer_cur;
Common::Vec4<f24>* buffer_end;
u32 vs_output_num;
// TODO: REMOVE THIS
GeometryPipeline_Point()
: regs(Core::System::GetInstance().GPU().PicaCore().regs.internal),
unit(Core::System::GetInstance().GPU().PicaCore().gs_unit) {}
template <typename Class, class Archive>
static void serialize_common(Class* self, Archive& ar, const unsigned int version) {
ar& boost::serialization::base_object<GeometryPipelineBackend>(*self);
ar & self->attribute_buffer;
ar & self->vs_output_num;
}
template <class Archive>
void save(Archive& ar, const unsigned int version) const {
serialize_common(this, ar, version);
auto buffer_idx = static_cast<u32>(buffer_cur - attribute_buffer.data());
auto buffer_size = static_cast<u32>(buffer_end - attribute_buffer.data());
ar << buffer_idx;
ar << buffer_size;
}
template <class Archive>
void load(Archive& ar, const unsigned int version) {
serialize_common(this, ar, version);
u32 buffer_idx, buffer_size;
ar >> buffer_idx;
ar >> buffer_size;
buffer_cur = attribute_buffer.data() + buffer_idx;
buffer_end = attribute_buffer.data() + buffer_size;
}
BOOST_SERIALIZATION_SPLIT_MEMBER()
friend class boost::serialization::access;
};
// In VariablePrimitive mode, vertex attributes are buffered into the uniform registers in the
// geometry shader unit. The number of vertex is variable, which is specified by the first index
// value in the batch. This mode is usually used for subdivision.
class GeometryPipeline_VariablePrimitive : public GeometryPipelineBackend {
public:
GeometryPipeline_VariablePrimitive(const RegsInternal& regs, ShaderSetup& setup)
: regs(regs), setup(setup) {
ASSERT(regs.pipeline.variable_primitive == 1);
ASSERT(regs.gs.input_to_uniform == 1);
vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1;
}
bool IsEmpty() const override {
return need_index;
}
bool NeedIndexInput() const override {
return need_index;
}
void SubmitIndex(u32 val) override {
DEBUG_ASSERT(need_index);
// The number of vertex input is put to the uniform register
f24 vertex_num = f24::FromFloat32(static_cast<float>(val));
setup.uniforms.f[0] = Common::MakeVec(vertex_num, vertex_num, vertex_num, vertex_num);
// The second uniform register and so on are used for receiving input vertices
buffer_cur = setup.uniforms.f.data() + 1;
main_vertex_num = regs.pipeline.variable_vertex_main_num_minus_1 + 1;
total_vertex_num = val;
need_index = false;
}
bool SubmitVertex(const AttributeBuffer& input) override {
DEBUG_ASSERT(!need_index);
if (main_vertex_num != 0) {
// For main vertices, receive all attributes
buffer_cur = std::copy(input.data(), input.data() + vs_output_num, buffer_cur);
--main_vertex_num;
} else {
// For other vertices, only receive the first attribute (usually the position)
*(buffer_cur++) = input[0];
}
--total_vertex_num;
if (total_vertex_num == 0) {
need_index = true;
return true;
}
return false;
}
private:
bool need_index = true;
const RegsInternal& regs;
ShaderSetup& setup;
u32 main_vertex_num;
u32 total_vertex_num;
Common::Vec4<f24>* buffer_cur;
u32 vs_output_num;
// TODO: REMOVE THIS
GeometryPipeline_VariablePrimitive()
: regs(Core::System::GetInstance().GPU().PicaCore().regs.internal),
setup(Core::System::GetInstance().GPU().PicaCore().gs_setup) {}
template <typename Class, class Archive>
static void serialize_common(Class* self, Archive& ar, const unsigned int version) {
ar& boost::serialization::base_object<GeometryPipelineBackend>(*self);
ar & self->need_index;
ar & self->main_vertex_num;
ar & self->total_vertex_num;
ar & self->vs_output_num;
}
template <class Archive>
void save(Archive& ar, const unsigned int version) const {
serialize_common(this, ar, version);
auto buffer_idx = static_cast<u32>(buffer_cur - setup.uniforms.f.data());
ar << buffer_idx;
}
template <class Archive>
void load(Archive& ar, const unsigned int version) {
serialize_common(this, ar, version);
u32 buffer_idx;
ar >> buffer_idx;
buffer_cur = setup.uniforms.f.data() + buffer_idx;
}
BOOST_SERIALIZATION_SPLIT_MEMBER()
friend class boost::serialization::access;
};
// In FixedPrimitive mode, vertex attributes are buffered into the uniform registers in the geometry
// shader unit. The number of vertex per shader invocation is constant. This is usually used for
// particle system.
class GeometryPipeline_FixedPrimitive : public GeometryPipelineBackend {
public:
GeometryPipeline_FixedPrimitive(const RegsInternal& regs, ShaderSetup& setup) : setup(setup) {
ASSERT(regs.pipeline.variable_primitive == 0);
ASSERT(regs.gs.input_to_uniform == 1);
vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1;
ASSERT(vs_output_num == regs.pipeline.gs_config.stride_minus_1 + 1);
std::size_t vertex_num = regs.pipeline.gs_config.fixed_vertex_num_minus_1 + 1;
buffer_cur = buffer_begin = setup.uniforms.f.data() + regs.pipeline.gs_config.start_index;
buffer_end = buffer_begin + vs_output_num * vertex_num;
}
bool IsEmpty() const override {
return buffer_cur == buffer_begin;
}
bool NeedIndexInput() const override {
return false;
}
void SubmitIndex(u32 val) override {
UNREACHABLE();
}
bool SubmitVertex(const AttributeBuffer& input) override {
buffer_cur = std::copy(input.data(), input.data() + vs_output_num, buffer_cur);
if (buffer_cur == buffer_end) {
buffer_cur = buffer_begin;
return true;
}
return false;
}
private:
ShaderSetup& setup;
Common::Vec4<f24>* buffer_begin;
Common::Vec4<f24>* buffer_cur;
Common::Vec4<f24>* buffer_end;
u32 vs_output_num;
// TODO: REMOVE THIS
GeometryPipeline_FixedPrimitive()
: setup(Core::System::GetInstance().GPU().PicaCore().gs_setup) {}
template <typename Class, class Archive>
static void serialize_common(Class* self, Archive& ar, const unsigned int version) {
ar& boost::serialization::base_object<GeometryPipelineBackend>(*self);
ar & self->vs_output_num;
}
template <class Archive>
void save(Archive& ar, const unsigned int version) const {
serialize_common(this, ar, version);
auto buffer_offset = static_cast<u32>(buffer_begin - setup.uniforms.f.data());
auto buffer_idx = static_cast<u32>(buffer_cur - setup.uniforms.f.data());
auto buffer_size = static_cast<u32>(buffer_end - setup.uniforms.f.data());
ar << buffer_offset;
ar << buffer_idx;
ar << buffer_size;
}
template <class Archive>
void load(Archive& ar, const unsigned int version) {
serialize_common(this, ar, version);
u32 buffer_offset, buffer_idx, buffer_size;
ar >> buffer_offset;
ar >> buffer_idx;
ar >> buffer_size;
buffer_begin = setup.uniforms.f.data() + buffer_offset;
buffer_cur = setup.uniforms.f.data() + buffer_idx;
buffer_end = setup.uniforms.f.data() + buffer_size;
}
BOOST_SERIALIZATION_SPLIT_MEMBER()
friend class boost::serialization::access;
};
GeometryPipeline::GeometryPipeline(RegsInternal& regs_, GeometryShaderUnit& gs_unit_,
ShaderSetup& gs_)
: regs(regs_), gs_unit(gs_unit_), gs(gs_) {}
GeometryPipeline::~GeometryPipeline() = default;
void GeometryPipeline::SetVertexHandler(VertexHandler vertex_handler) {
this->vertex_handler = std::move(vertex_handler);
}
void GeometryPipeline::Setup(ShaderEngine* shader_engine) {
if (!backend) {
return;
}
this->shader_engine = shader_engine;
shader_engine->SetupBatch(gs, regs.gs.main_offset);
}
void GeometryPipeline::Reconfigure() {
ASSERT(!backend || backend->IsEmpty());
if (regs.pipeline.use_gs == PipelineRegs::UseGS::No) {
backend = nullptr;
return;
}
// The following assumes that when geometry shader is in use, the shader unit 3 is configured as
// a geometry shader unit.
// TODO: what happens if this is not true?
ASSERT(regs.pipeline.gs_unit_exclusive_configuration == 1);
ASSERT(regs.gs.shader_mode == ShaderRegs::ShaderMode::GS);
ASSERT(regs.pipeline.use_gs == PipelineRegs::UseGS::Yes);
gs_unit.ConfigOutput(regs.gs);
ASSERT(regs.pipeline.vs_outmap_total_minus_1_a == regs.pipeline.vs_outmap_total_minus_1_b);
switch (regs.pipeline.gs_config.mode) {
case PipelineRegs::GSMode::Point:
backend = std::make_unique<GeometryPipeline_Point>(regs, gs_unit);
break;
case PipelineRegs::GSMode::VariablePrimitive:
backend = std::make_unique<GeometryPipeline_VariablePrimitive>(regs, gs);
break;
case PipelineRegs::GSMode::FixedPrimitive:
backend = std::make_unique<GeometryPipeline_FixedPrimitive>(regs, gs);
break;
default:
UNREACHABLE();
}
}
bool GeometryPipeline::NeedIndexInput() const {
if (!backend) {
return false;
}
return backend->NeedIndexInput();
}
void GeometryPipeline::SubmitIndex(unsigned int val) {
backend->SubmitIndex(val);
}
void GeometryPipeline::SubmitVertex(const AttributeBuffer& input) {
if (!backend) {
// No backend means the geometry shader is disabled, so we send the vertex shader output
// directly to the primitive assembler.
vertex_handler(input);
} else {
if (backend->SubmitVertex(input)) {
shader_engine->Run(gs, gs_unit);
// The uniform b15 is set to true after every geometry shader invocation. This is useful
// for the shader to know if this is the first invocation in a batch, if the program set
// b15 to false first.
gs.uniforms.b[15] = true;
}
}
}
template <class Archive>
void GeometryPipeline::serialize(Archive& ar, const unsigned int version) {
// vertex_handler and shader_engine are always set to the same value
ar& backend;
}
} // namespace Pica
SERIALIZE_EXPORT_IMPL(Pica::GeometryPipeline_Point)
SERIALIZE_EXPORT_IMPL(Pica::GeometryPipeline_VariablePrimitive)
SERIALIZE_EXPORT_IMPL(Pica::GeometryPipeline_FixedPrimitive)
SERIALIZE_IMPL(Pica::GeometryPipeline)

View File

@@ -0,0 +1,64 @@
// Copyright 2017 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <boost/serialization/export.hpp>
#include "video_core/pica/shader_unit.h"
namespace Pica {
struct RegsInternal;
struct GeometryShaderUnit;
struct ShaderSetup;
class ShaderEngine;
class GeometryPipelineBackend;
class GeometryPipeline_Point;
class GeometryPipeline_VariablePrimitive;
class GeometryPipeline_FixedPrimitive;
/// A pipeline receiving from vertex shader and sending to geometry shader and primitive assembler
class GeometryPipeline {
public:
explicit GeometryPipeline(RegsInternal& regs, GeometryShaderUnit& gs_unit, ShaderSetup& gs);
~GeometryPipeline();
/// Sets the handler for receiving vertex outputs from vertex shader
void SetVertexHandler(VertexHandler vertex_handler);
/// Setup the geometry shader unit if it is in use
void Setup(ShaderEngine* shader_engine);
/// Reconfigures the pipeline according to current register settings
void Reconfigure();
/// Checks if the pipeline needs a direct input from index buffer
bool NeedIndexInput() const;
/// Submits an index from index buffer. Call this only when NeedIndexInput returns true
void SubmitIndex(unsigned int val);
/// Submits vertex attributes output from vertex shader
void SubmitVertex(const AttributeBuffer& input);
private:
VertexHandler vertex_handler;
ShaderEngine* shader_engine;
std::unique_ptr<GeometryPipelineBackend> backend;
RegsInternal& regs;
GeometryShaderUnit& gs_unit;
ShaderSetup& gs;
template <class Archive>
void serialize(Archive& ar, const unsigned int version);
friend class boost::serialization::access;
};
} // namespace Pica
BOOST_CLASS_EXPORT_KEY(Pica::GeometryPipeline_Point)
BOOST_CLASS_EXPORT_KEY(Pica::GeometryPipeline_VariablePrimitive)
BOOST_CLASS_EXPORT_KEY(Pica::GeometryPipeline_FixedPrimitive)

View File

@@ -0,0 +1,50 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/pica/output_vertex.h"
#include "video_core/pica/regs_rasterizer.h"
namespace Pica {
OutputVertex::OutputVertex(const RasterizerRegs& regs, const AttributeBuffer& output) {
// Attributes can be used without being set in GPUREG_SH_OUTMAP_Oi
// Hardware tests have shown that they are initialized to 1 in this case.
std::array<f24, 32> vertex_slots_overflow;
vertex_slots_overflow.fill(f24::One());
const u32 num_attributes = regs.vs_output_total & 7;
for (std::size_t attrib = 0; attrib < num_attributes; ++attrib) {
const auto output_register_map = regs.vs_output_attributes[attrib];
vertex_slots_overflow[output_register_map.map_x] = output[attrib][0];
vertex_slots_overflow[output_register_map.map_y] = output[attrib][1];
vertex_slots_overflow[output_register_map.map_z] = output[attrib][2];
vertex_slots_overflow[output_register_map.map_w] = output[attrib][3];
}
// Copy to result
std::memcpy(this, vertex_slots_overflow.data(), sizeof(OutputVertex));
// The hardware takes the absolute and saturates vertex colors, *before* doing interpolation
for (u32 i = 0; i < 4; ++i) {
const f32 c = std::fabs(color[i].ToFloat32());
color[i] = f24::FromFloat32(c < 1.0f ? c : 1.0f);
}
}
#define ASSERT_POS(var, pos) \
static_assert(offsetof(OutputVertex, var) == pos * sizeof(f24), "Semantic at wrong " \
"offset.")
ASSERT_POS(pos, RasterizerRegs::VSOutputAttributes::POSITION_X);
ASSERT_POS(quat, RasterizerRegs::VSOutputAttributes::QUATERNION_X);
ASSERT_POS(color, RasterizerRegs::VSOutputAttributes::COLOR_R);
ASSERT_POS(tc0, RasterizerRegs::VSOutputAttributes::TEXCOORD0_U);
ASSERT_POS(tc1, RasterizerRegs::VSOutputAttributes::TEXCOORD1_U);
ASSERT_POS(tc0_w, RasterizerRegs::VSOutputAttributes::TEXCOORD0_W);
ASSERT_POS(view, RasterizerRegs::VSOutputAttributes::VIEW_X);
ASSERT_POS(tc2, RasterizerRegs::VSOutputAttributes::TEXCOORD2_U);
#undef ASSERT_POS
} // namespace Pica

View File

@@ -0,0 +1,48 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/vector_math.h"
#include "video_core/pica_types.h"
namespace Pica {
struct RasterizerRegs;
using AttributeBuffer = std::array<Common::Vec4<f24>, 16>;
struct OutputVertex {
OutputVertex() = default;
explicit OutputVertex(const RasterizerRegs& regs, const AttributeBuffer& output);
Common::Vec4<f24> pos;
Common::Vec4<f24> quat;
Common::Vec4<f24> color;
Common::Vec2<f24> tc0;
Common::Vec2<f24> tc1;
f24 tc0_w;
INSERT_PADDING_WORDS(1);
Common::Vec3<f24> view;
INSERT_PADDING_WORDS(1);
Common::Vec2<f24> tc2;
private:
template <class Archive>
void serialize(Archive& ar, const u32) {
ar& pos;
ar& quat;
ar& color;
ar& tc0;
ar& tc1;
ar& tc0_w;
ar& view;
ar& tc2;
}
friend class boost::serialization::access;
};
static_assert(std::is_trivial_v<OutputVertex>, "Structure is not POD");
static_assert(sizeof(OutputVertex) == 24 * sizeof(f32), "OutputVertex has invalid size");
} // namespace Pica

View File

@@ -0,0 +1,74 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <boost/serialization/binary_object.hpp>
#include "common/vector_math.h"
#include "video_core/pica_types.h"
namespace Pica {
/**
* Uniforms and fixed attributes are written in a packed format such that four float24 values are
* encoded in three 32-bit numbers. Uniforms can also encode four float32 values in four 32-bit
* numbers. We write to internal memory once a full vector is written.
*/
struct PackedAttribute {
std::array<u32, 4> buffer{};
u32 index{};
/// Places a word to the queue and returns true if the queue becomes full.
constexpr bool Push(u32 word, bool is_float32 = false) {
buffer[index++] = word;
return (index >= 4 && is_float32) || (index >= 3 && !is_float32);
}
/// Resets the queue discarding previous entries.
constexpr void Reset() {
index = 0;
}
/// Returns the queue contents with either float24 or float32 interpretation.
constexpr Common::Vec4<f24> Get(bool is_float32 = false) {
Reset();
if (is_float32) {
return AsFloat32();
} else {
return AsFloat24();
}
}
private:
/// Decodes the queue contents with float24 transfer mode.
constexpr Common::Vec4<f24> AsFloat24() const {
const u32 x = buffer[2] & 0xFFFFFF;
const u32 y = ((buffer[1] & 0xFFFF) << 8) | ((buffer[2] >> 24) & 0xFF);
const u32 z = ((buffer[0] & 0xFF) << 16) | ((buffer[1] >> 16) & 0xFFFF);
const u32 w = buffer[0] >> 8;
return Common::Vec4<f24>{f24::FromRaw(x), f24::FromRaw(y), f24::FromRaw(z),
f24::FromRaw(w)};
}
/// Decodes the queue contents with float32 transfer mode.
constexpr Common::Vec4<f24> AsFloat32() const {
Common::Vec4<f24> uniform;
for (u32 i = 0; i < 4; i++) {
const f32 buffer_value = std::bit_cast<f32>(buffer[i]);
uniform[3 - i] = f24::FromFloat32(buffer_value);
}
return uniform;
}
private:
template <class Archive>
void serialize(Archive& ar, const u32) {
ar& buffer;
ar& index;
}
friend class boost::serialization::access;
};
} // namespace Pica

View File

@@ -0,0 +1,592 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/arch.h"
#include "common/archives.h"
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "common/settings.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/debug_utils/debug_utils.h"
#include "video_core/pica/pica_core.h"
#include "video_core/pica/vertex_loader.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/shader/shader.h"
namespace Pica {
MICROPROFILE_DEFINE(GPU_Drawing, "GPU", "Drawing", MP_RGB(50, 50, 240));
using namespace DebugUtils;
union CommandHeader {
u32 hex;
BitField<0, 16, u32> cmd_id;
BitField<16, 4, u32> parameter_mask;
BitField<20, 8, u32> extra_data_length;
BitField<31, 1, u32> group_commands;
};
static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!");
PicaCore::PicaCore(Memory::MemorySystem& memory_, DebugContext& debug_context_)
: memory{memory_}, debug_context{debug_context_}, geometry_pipeline{regs.internal, gs_unit,
gs_setup},
shader_engine{CreateEngine(Settings::values.use_shader_jit.GetValue())} {
SetFramebufferDefaults();
const auto submit_vertex = [this](const AttributeBuffer& buffer) {
const auto add_triangle = [this](const OutputVertex& v0, const OutputVertex& v1,
const OutputVertex& v2) {
rasterizer->AddTriangle(v0, v1, v2);
};
const auto vertex = OutputVertex(regs.internal.rasterizer, buffer);
primitive_assembler.SubmitVertex(vertex, add_triangle);
};
gs_unit.SetVertexHandlers(submit_vertex, [this]() { primitive_assembler.SetWinding(); });
geometry_pipeline.SetVertexHandler(submit_vertex);
primitive_assembler.Reconfigure(PipelineRegs::TriangleTopology::List);
}
PicaCore::~PicaCore() = default;
void PicaCore::SetFramebufferDefaults() {
auto& framebuffer_top = regs.framebuffer_config[0];
auto& framebuffer_sub = regs.framebuffer_config[1];
// Set framebuffer defaults from nn::gx::Initialize
framebuffer_top.address_left1 = 0x181E6000;
framebuffer_top.address_left2 = 0x1822C800;
framebuffer_top.address_right1 = 0x18273000;
framebuffer_top.address_right2 = 0x182B9800;
framebuffer_sub.address_left1 = 0x1848F000;
framebuffer_sub.address_left2 = 0x184C7800;
framebuffer_top.width.Assign(240);
framebuffer_top.height.Assign(400);
framebuffer_top.stride = 3 * 240;
framebuffer_top.color_format.Assign(PixelFormat::RGB8);
framebuffer_top.active_fb = 0;
framebuffer_sub.width.Assign(240);
framebuffer_sub.height.Assign(320);
framebuffer_sub.stride = 3 * 240;
framebuffer_sub.color_format.Assign(PixelFormat::RGB8);
framebuffer_sub.active_fb = 0;
}
void PicaCore::BindRasterizer(VideoCore::RasterizerInterface* rasterizer) {
this->rasterizer = rasterizer;
}
void PicaCore::SetInterruptHandler(Service::GSP::InterruptHandler& signal_interrupt) {
this->signal_interrupt = signal_interrupt;
}
void PicaCore::ProcessCmdList(PAddr list, u32 size) {
// Initialize command list tracking.
const u8* head = memory.GetPhysicalPointer(list);
cmd_list.Reset(list, head, size);
while (cmd_list.current_index < cmd_list.length) {
// Align read pointer to 8 bytes
if (cmd_list.current_index % 2 != 0) {
cmd_list.current_index++;
}
// Read the header and the value to write.
const u32 value = cmd_list.head[cmd_list.current_index++];
const CommandHeader header{cmd_list.head[cmd_list.current_index++]};
// Write to the requested PICA register.
WriteInternalReg(header.cmd_id, value, header.parameter_mask);
// Write any extra paramters as well.
for (u32 i = 0; i < header.extra_data_length; ++i) {
const u32 cmd = header.cmd_id + (header.group_commands ? i + 1 : 0);
const u32 extra_value = cmd_list.head[cmd_list.current_index++];
WriteInternalReg(cmd, extra_value, header.parameter_mask);
}
}
}
void PicaCore::WriteInternalReg(u32 id, u32 value, u32 mask) {
if (id >= RegsInternal::NUM_REGS) {
LOG_ERROR(
HW_GPU,
"Commandlist tried to write to invalid register 0x{:03X} (value: {:08X}, mask: {:X})",
id, value, mask);
return;
}
// Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF
constexpr std::array<u32, 16> ExpandBitsToBytes = {
0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, 0x00ff0000, 0x00ff00ff,
0x00ffff00, 0x00ffffff, 0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff,
0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff,
};
// TODO: Figure out how register masking acts on e.g. vs.uniform_setup.set_value
const u32 old_value = regs.internal.reg_array[id];
const u32 write_mask = ExpandBitsToBytes[mask];
regs.internal.reg_array[id] = (old_value & ~write_mask) | (value & write_mask);
// Track register write.
DebugUtils::OnPicaRegWrite(id, mask, regs.internal.reg_array[id]);
// Track events.
debug_context.OnEvent(DebugContext::Event::PicaCommandLoaded, &id);
SCOPE_EXIT({ debug_context.OnEvent(DebugContext::Event::PicaCommandProcessed, &id); });
switch (id) {
// Trigger IRQ
case PICA_REG_INDEX(trigger_irq):
signal_interrupt(Service::GSP::InterruptId::P3D);
break;
case PICA_REG_INDEX(pipeline.triangle_topology):
primitive_assembler.Reconfigure(regs.internal.pipeline.triangle_topology);
break;
case PICA_REG_INDEX(pipeline.restart_primitive):
primitive_assembler.Reset();
break;
case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.index):
immediate.Reset();
break;
// Load default vertex input attributes
case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.set_value[0]):
case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.set_value[1]):
case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.set_value[2]):
SubmitImmediate(value);
break;
case PICA_REG_INDEX(pipeline.gpu_mode):
// This register likely just enables vertex processing and doesn't need any special handling
break;
case PICA_REG_INDEX(pipeline.command_buffer.trigger[0]):
case PICA_REG_INDEX(pipeline.command_buffer.trigger[1]): {
const u32 index = static_cast<u32>(id - PICA_REG_INDEX(pipeline.command_buffer.trigger[0]));
const PAddr addr = regs.internal.pipeline.command_buffer.GetPhysicalAddress(index);
const u32 size = regs.internal.pipeline.command_buffer.GetSize(index);
const u8* head = memory.GetPhysicalPointer(addr);
cmd_list.Reset(addr, head, size);
break;
}
// It seems like these trigger vertex rendering
case PICA_REG_INDEX(pipeline.trigger_draw):
case PICA_REG_INDEX(pipeline.trigger_draw_indexed): {
const bool is_indexed = (id == PICA_REG_INDEX(pipeline.trigger_draw_indexed));
DrawArrays(is_indexed);
break;
}
case PICA_REG_INDEX(gs.bool_uniforms):
gs_setup.WriteUniformBoolReg(regs.internal.gs.bool_uniforms.Value());
break;
case PICA_REG_INDEX(gs.int_uniforms[0]):
case PICA_REG_INDEX(gs.int_uniforms[1]):
case PICA_REG_INDEX(gs.int_uniforms[2]):
case PICA_REG_INDEX(gs.int_uniforms[3]): {
const u32 index = (id - PICA_REG_INDEX(gs.int_uniforms[0]));
gs_setup.WriteUniformIntReg(index, regs.internal.gs.GetIntUniform(index));
break;
}
case PICA_REG_INDEX(gs.uniform_setup.set_value[0]):
case PICA_REG_INDEX(gs.uniform_setup.set_value[1]):
case PICA_REG_INDEX(gs.uniform_setup.set_value[2]):
case PICA_REG_INDEX(gs.uniform_setup.set_value[3]):
case PICA_REG_INDEX(gs.uniform_setup.set_value[4]):
case PICA_REG_INDEX(gs.uniform_setup.set_value[5]):
case PICA_REG_INDEX(gs.uniform_setup.set_value[6]):
case PICA_REG_INDEX(gs.uniform_setup.set_value[7]): {
gs_setup.WriteUniformFloatReg(regs.internal.gs, value);
break;
}
case PICA_REG_INDEX(gs.program.set_word[0]):
case PICA_REG_INDEX(gs.program.set_word[1]):
case PICA_REG_INDEX(gs.program.set_word[2]):
case PICA_REG_INDEX(gs.program.set_word[3]):
case PICA_REG_INDEX(gs.program.set_word[4]):
case PICA_REG_INDEX(gs.program.set_word[5]):
case PICA_REG_INDEX(gs.program.set_word[6]):
case PICA_REG_INDEX(gs.program.set_word[7]): {
u32& offset = regs.internal.gs.program.offset;
if (offset >= 4096) {
LOG_ERROR(HW_GPU, "Invalid GS program offset {}", offset);
} else {
gs_setup.program_code[offset] = value;
gs_setup.MarkProgramCodeDirty();
offset++;
}
break;
}
case PICA_REG_INDEX(gs.swizzle_patterns.set_word[0]):
case PICA_REG_INDEX(gs.swizzle_patterns.set_word[1]):
case PICA_REG_INDEX(gs.swizzle_patterns.set_word[2]):
case PICA_REG_INDEX(gs.swizzle_patterns.set_word[3]):
case PICA_REG_INDEX(gs.swizzle_patterns.set_word[4]):
case PICA_REG_INDEX(gs.swizzle_patterns.set_word[5]):
case PICA_REG_INDEX(gs.swizzle_patterns.set_word[6]):
case PICA_REG_INDEX(gs.swizzle_patterns.set_word[7]): {
u32& offset = regs.internal.gs.swizzle_patterns.offset;
if (offset >= gs_setup.swizzle_data.size()) {
LOG_ERROR(HW_GPU, "Invalid GS swizzle pattern offset {}", offset);
} else {
gs_setup.swizzle_data[offset] = value;
gs_setup.MarkSwizzleDataDirty();
offset++;
}
break;
}
case PICA_REG_INDEX(vs.bool_uniforms):
vs_setup.WriteUniformBoolReg(regs.internal.vs.bool_uniforms.Value());
break;
case PICA_REG_INDEX(vs.int_uniforms[0]):
case PICA_REG_INDEX(vs.int_uniforms[1]):
case PICA_REG_INDEX(vs.int_uniforms[2]):
case PICA_REG_INDEX(vs.int_uniforms[3]): {
const u32 index = (id - PICA_REG_INDEX(vs.int_uniforms[0]));
vs_setup.WriteUniformIntReg(index, regs.internal.vs.GetIntUniform(index));
break;
}
case PICA_REG_INDEX(vs.uniform_setup.set_value[0]):
case PICA_REG_INDEX(vs.uniform_setup.set_value[1]):
case PICA_REG_INDEX(vs.uniform_setup.set_value[2]):
case PICA_REG_INDEX(vs.uniform_setup.set_value[3]):
case PICA_REG_INDEX(vs.uniform_setup.set_value[4]):
case PICA_REG_INDEX(vs.uniform_setup.set_value[5]):
case PICA_REG_INDEX(vs.uniform_setup.set_value[6]):
case PICA_REG_INDEX(vs.uniform_setup.set_value[7]): {
vs_setup.WriteUniformFloatReg(regs.internal.vs, value);
break;
}
case PICA_REG_INDEX(vs.program.set_word[0]):
case PICA_REG_INDEX(vs.program.set_word[1]):
case PICA_REG_INDEX(vs.program.set_word[2]):
case PICA_REG_INDEX(vs.program.set_word[3]):
case PICA_REG_INDEX(vs.program.set_word[4]):
case PICA_REG_INDEX(vs.program.set_word[5]):
case PICA_REG_INDEX(vs.program.set_word[6]):
case PICA_REG_INDEX(vs.program.set_word[7]): {
u32& offset = regs.internal.vs.program.offset;
if (offset >= 512) {
LOG_ERROR(HW_GPU, "Invalid VS program offset {}", offset);
} else {
vs_setup.program_code[offset] = value;
vs_setup.MarkProgramCodeDirty();
if (!regs.internal.pipeline.gs_unit_exclusive_configuration) {
gs_setup.program_code[offset] = value;
gs_setup.MarkProgramCodeDirty();
}
offset++;
}
break;
}
case PICA_REG_INDEX(vs.swizzle_patterns.set_word[0]):
case PICA_REG_INDEX(vs.swizzle_patterns.set_word[1]):
case PICA_REG_INDEX(vs.swizzle_patterns.set_word[2]):
case PICA_REG_INDEX(vs.swizzle_patterns.set_word[3]):
case PICA_REG_INDEX(vs.swizzle_patterns.set_word[4]):
case PICA_REG_INDEX(vs.swizzle_patterns.set_word[5]):
case PICA_REG_INDEX(vs.swizzle_patterns.set_word[6]):
case PICA_REG_INDEX(vs.swizzle_patterns.set_word[7]): {
u32& offset = regs.internal.vs.swizzle_patterns.offset;
if (offset >= vs_setup.swizzle_data.size()) {
LOG_ERROR(HW_GPU, "Invalid VS swizzle pattern offset {}", offset);
} else {
vs_setup.swizzle_data[offset] = value;
vs_setup.MarkSwizzleDataDirty();
if (!regs.internal.pipeline.gs_unit_exclusive_configuration) {
gs_setup.swizzle_data[offset] = value;
gs_setup.MarkSwizzleDataDirty();
}
offset++;
}
break;
}
case PICA_REG_INDEX(lighting.lut_data[0]):
case PICA_REG_INDEX(lighting.lut_data[1]):
case PICA_REG_INDEX(lighting.lut_data[2]):
case PICA_REG_INDEX(lighting.lut_data[3]):
case PICA_REG_INDEX(lighting.lut_data[4]):
case PICA_REG_INDEX(lighting.lut_data[5]):
case PICA_REG_INDEX(lighting.lut_data[6]):
case PICA_REG_INDEX(lighting.lut_data[7]): {
auto& lut_config = regs.internal.lighting.lut_config;
ASSERT_MSG(lut_config.index < 256, "lut_config.index exceeded maximum value of 255!");
lighting.luts[lut_config.type][lut_config.index].raw = value;
lut_config.index.Assign(lut_config.index + 1);
break;
}
case PICA_REG_INDEX(texturing.fog_lut_data[0]):
case PICA_REG_INDEX(texturing.fog_lut_data[1]):
case PICA_REG_INDEX(texturing.fog_lut_data[2]):
case PICA_REG_INDEX(texturing.fog_lut_data[3]):
case PICA_REG_INDEX(texturing.fog_lut_data[4]):
case PICA_REG_INDEX(texturing.fog_lut_data[5]):
case PICA_REG_INDEX(texturing.fog_lut_data[6]):
case PICA_REG_INDEX(texturing.fog_lut_data[7]): {
fog.lut[regs.internal.texturing.fog_lut_offset % 128].raw = value;
regs.internal.texturing.fog_lut_offset.Assign(regs.internal.texturing.fog_lut_offset + 1);
break;
}
case PICA_REG_INDEX(texturing.proctex_lut_data[0]):
case PICA_REG_INDEX(texturing.proctex_lut_data[1]):
case PICA_REG_INDEX(texturing.proctex_lut_data[2]):
case PICA_REG_INDEX(texturing.proctex_lut_data[3]):
case PICA_REG_INDEX(texturing.proctex_lut_data[4]):
case PICA_REG_INDEX(texturing.proctex_lut_data[5]):
case PICA_REG_INDEX(texturing.proctex_lut_data[6]):
case PICA_REG_INDEX(texturing.proctex_lut_data[7]): {
auto& index = regs.internal.texturing.proctex_lut_config.index;
switch (regs.internal.texturing.proctex_lut_config.ref_table.Value()) {
case TexturingRegs::ProcTexLutTable::Noise:
proctex.noise_table[index % proctex.noise_table.size()].raw = value;
break;
case TexturingRegs::ProcTexLutTable::ColorMap:
proctex.color_map_table[index % proctex.color_map_table.size()].raw = value;
break;
case TexturingRegs::ProcTexLutTable::AlphaMap:
proctex.alpha_map_table[index % proctex.alpha_map_table.size()].raw = value;
break;
case TexturingRegs::ProcTexLutTable::Color:
proctex.color_table[index % proctex.color_table.size()].raw = value;
break;
case TexturingRegs::ProcTexLutTable::ColorDiff:
proctex.color_diff_table[index % proctex.color_diff_table.size()].raw = value;
break;
}
index.Assign(index + 1);
break;
}
default:
break;
}
// Notify the rasterizer an internal register was updated.
rasterizer->NotifyPicaRegisterChanged(id);
}
void PicaCore::SubmitImmediate(u32 value) {
// Push to word to the queue. This returns true when a full attribute is formed.
if (!immediate.queue.Push(value)) {
return;
}
constexpr size_t IMMEDIATE_MODE_INDEX = 0xF;
auto& setup = regs.internal.pipeline.vs_default_attributes_setup;
if (setup.index > IMMEDIATE_MODE_INDEX) {
LOG_ERROR(HW_GPU, "Invalid VS default attribute index {}", setup.index);
return;
}
// Retrieve the attribute and place it in the default attribute buffer.
const auto attribute = immediate.queue.Get();
if (setup.index < IMMEDIATE_MODE_INDEX) {
input_default_attributes[setup.index] = attribute;
setup.index++;
return;
}
// When index is 0xF the attribute is used for immediate mode drawing.
immediate.input_vertex[immediate.current_attribute] = attribute;
if (immediate.current_attribute < regs.internal.pipeline.max_input_attrib_index) {
immediate.current_attribute++;
return;
}
// We formed a vertex, flush.
DrawImmediate();
}
void PicaCore::DrawImmediate() {
// Compile the vertex shader.
shader_engine->SetupBatch(vs_setup, regs.internal.vs.main_offset);
// Track vertex in the debug recorder.
debug_context.OnEvent(DebugContext::Event::VertexShaderInvocation,
std::addressof(immediate.input_vertex));
SCOPE_EXIT({ debug_context.OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); });
ShaderUnit shader_unit;
AttributeBuffer output{};
// Invoke the vertex shader for the vertex.
shader_unit.LoadInput(regs.internal.vs, immediate.input_vertex);
shader_engine->Run(vs_setup, shader_unit);
shader_unit.WriteOutput(regs.internal.vs, output);
// Reconfigure geometry pipeline if needed.
if (immediate.reset_geometry_pipeline) {
geometry_pipeline.Reconfigure();
immediate.reset_geometry_pipeline = false;
}
// Send to geometry pipeline.
ASSERT(!geometry_pipeline.NeedIndexInput());
geometry_pipeline.Setup(shader_engine.get());
geometry_pipeline.SubmitVertex(output);
// Flush the immediate triangle.
rasterizer->DrawTriangles();
immediate.current_attribute = 0;
}
void PicaCore::DrawArrays(bool is_indexed) {
MICROPROFILE_SCOPE(GPU_Drawing);
// Track vertex in the debug recorder.
debug_context.OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr);
SCOPE_EXIT({ debug_context.OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); });
const bool accelerate_draw = [this] {
// Geometry shaders cannot be accelerated due to register preservation.
if (regs.internal.pipeline.use_gs == PipelineRegs::UseGS::Yes) {
return false;
}
// TODO (wwylele): for Strip/Fan topology, if the primitive assember is not restarted
// after this draw call, the buffered vertex from this draw should "leak" to the next
// draw, in which case we should buffer the vertex into the software primitive assember,
// or disable accelerate draw completely. However, there is not game found yet that does
// this, so this is left unimplemented for now. Revisit this when an issue is found in
// games.
bool accelerate_draw = Settings::values.use_hw_shader && primitive_assembler.IsEmpty();
const auto topology = primitive_assembler.GetTopology();
if (topology == PipelineRegs::TriangleTopology::Shader ||
topology == PipelineRegs::TriangleTopology::List) {
accelerate_draw = accelerate_draw && (regs.internal.pipeline.num_vertices % 3) == 0;
}
return accelerate_draw;
}();
// Attempt to use hardware vertex shaders if possible.
if (accelerate_draw && rasterizer->AccelerateDrawBatch(is_indexed)) {
return;
}
// We cannot accelerate the draw, so load and execute the vertex shader for each vertex.
LoadVertices(is_indexed);
// Draw emitted triangles.
rasterizer->DrawTriangles();
}
void PicaCore::LoadVertices(bool is_indexed) {
// Read and validate vertex information from the loaders
const auto& pipeline = regs.internal.pipeline;
const PAddr base_address = pipeline.vertex_attributes.GetPhysicalBaseAddress();
const auto loader = VertexLoader(memory, pipeline);
regs.internal.rasterizer.ValidateSemantics();
// Locate index buffer.
const auto& index_info = pipeline.index_array;
const u8* index_address_8 = memory.GetPhysicalPointer(base_address + index_info.offset);
const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
const bool index_u16 = index_info.format != 0;
// Simple circular-replacement vertex cache
const std::size_t VERTEX_CACHE_SIZE = 64;
std::array<bool, VERTEX_CACHE_SIZE> vertex_cache_valid{};
std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids;
std::array<AttributeBuffer, VERTEX_CACHE_SIZE> vertex_cache;
u32 vertex_cache_pos = 0;
// Compile the vertex shader for this batch.
ShaderUnit shader_unit;
AttributeBuffer vs_output;
shader_engine->SetupBatch(vs_setup, regs.internal.vs.main_offset);
// Setup geometry pipeline in case we are using a geometry shader.
geometry_pipeline.Reconfigure();
geometry_pipeline.Setup(shader_engine.get());
ASSERT(!geometry_pipeline.NeedIndexInput() || is_indexed);
for (u32 index = 0; index < pipeline.num_vertices; ++index) {
// Indexed rendering doesn't use the start offset
const u32 vertex = is_indexed
? (index_u16 ? index_address_16[index] : index_address_8[index])
: (index + pipeline.vertex_offset);
bool vertex_cache_hit = false;
if (is_indexed) {
if (geometry_pipeline.NeedIndexInput()) {
geometry_pipeline.SubmitIndex(vertex);
continue;
}
for (u32 i = 0; i < VERTEX_CACHE_SIZE; ++i) {
if (vertex_cache_valid[i] && vertex == vertex_cache_ids[i]) {
vs_output = vertex_cache[i];
vertex_cache_hit = true;
break;
}
}
}
if (!vertex_cache_hit) {
// Initialize data for the current vertex
AttributeBuffer input;
loader.LoadVertex(base_address, index, vertex, input, input_default_attributes);
// Record vertex processing to the debugger.
debug_context.OnEvent(DebugContext::Event::VertexShaderInvocation,
std::addressof(input));
// Invoke the vertex shader for this vertex.
shader_unit.LoadInput(regs.internal.vs, input);
shader_engine->Run(vs_setup, shader_unit);
shader_unit.WriteOutput(regs.internal.vs, vs_output);
// Cache the vertex when doing indexed rendering.
if (is_indexed) {
vertex_cache[vertex_cache_pos] = vs_output;
vertex_cache_valid[vertex_cache_pos] = true;
vertex_cache_ids[vertex_cache_pos] = vertex;
vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE;
}
}
// Send to geometry pipeline
geometry_pipeline.SubmitVertex(vs_output);
}
}
template <class Archive>
void PicaCore::CommandList::serialize(Archive& ar, const u32 file_version) {
ar& addr;
ar& length;
ar& current_index;
if (Archive::is_loading::value) {
const u8* ptr = Core::System::GetInstance().Memory().GetPhysicalPointer(addr);
head = reinterpret_cast<const u32*>(ptr);
}
}
SERIALIZE_IMPL(PicaCore::CommandList)
} // namespace Pica

View File

@@ -0,0 +1,287 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "core/hle/service/gsp/gsp_interrupt.h"
#include "video_core/pica/geometry_pipeline.h"
#include "video_core/pica/packed_attribute.h"
#include "video_core/pica/primitive_assembly.h"
#include "video_core/pica/regs_external.h"
#include "video_core/pica/regs_internal.h"
#include "video_core/pica/regs_lcd.h"
#include "video_core/pica/shader_setup.h"
#include "video_core/pica/shader_unit.h"
namespace Memory {
class MemorySystem;
}
namespace VideoCore {
class RasterizerInterface;
}
namespace Pica {
class DebugContext;
class ShaderEngine;
class PicaCore {
public:
explicit PicaCore(Memory::MemorySystem& memory, DebugContext& debug_context_);
~PicaCore();
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
void SetInterruptHandler(Service::GSP::InterruptHandler& signal_interrupt);
void ProcessCmdList(PAddr list, u32 size);
private:
void SetFramebufferDefaults();
void WriteInternalReg(u32 id, u32 value, u32 mask);
void SubmitImmediate(u32 data);
void DrawImmediate();
void DrawArrays(bool is_indexed);
void LoadVertices(bool is_indexed);
public:
union Regs {
static constexpr size_t NUM_REGS = 0x732;
struct {
u32 hardware_id;
INSERT_PADDING_WORDS(0x3);
MemoryFillConfig memory_fill_config[2];
u32 vram_bank_control;
u32 gpu_busy;
INSERT_PADDING_WORDS(0x22);
u32 backlight_control;
INSERT_PADDING_WORDS(0xCF);
FramebufferConfig framebuffer_config[2];
INSERT_PADDING_WORDS(0x180);
DisplayTransferConfig display_transfer_config;
INSERT_PADDING_WORDS(0xF5);
RegsInternal internal;
};
std::array<u32, NUM_REGS> reg_array;
};
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32));
struct CommandList {
PAddr addr;
const u32* head;
u32 current_index;
u32 length;
void Reset(PAddr addr, const u8* head, u32 size) {
this->addr = addr;
this->head = reinterpret_cast<const u32*>(head);
this->length = size / sizeof(u32);
current_index = 0;
}
private:
friend class boost::serialization::access;
template <class Archive>
void serialize(Archive& ar, const u32 file_version);
};
struct ImmediateModeState {
AttributeBuffer input_vertex{};
u32 current_attribute{};
bool reset_geometry_pipeline{true};
PackedAttribute queue;
void Reset() {
current_attribute = 0;
reset_geometry_pipeline = true;
queue.Reset();
}
private:
friend class boost::serialization::access;
template <class Archive>
void serialize(Archive& ar, const u32 file_version) {
ar& input_vertex;
ar& current_attribute;
ar& reset_geometry_pipeline;
ar& queue;
}
};
struct ProcTex {
union ValueEntry {
u32 raw;
// LUT value, encoded as 12-bit fixed point, with 12 fraction bits
BitField<0, 12, u32> value; // 0.0.12 fixed point
// Difference between two entry values. Used for efficient interpolation.
// 0.0.12 fixed point with two's complement. The range is [-0.5, 0.5).
// Note: the type of this is different from the one of lighting LUT
BitField<12, 12, s32> difference;
f32 ToFloat() const {
return static_cast<f32>(value) / 4095.f;
}
f32 DiffToFloat() const {
return static_cast<f32>(difference) / 4095.f;
}
};
union ColorEntry {
u32 raw;
BitField<0, 8, u32> r;
BitField<8, 8, u32> g;
BitField<16, 8, u32> b;
BitField<24, 8, u32> a;
Common::Vec4<u8> ToVector() const {
return {static_cast<u8>(r), static_cast<u8>(g), static_cast<u8>(b),
static_cast<u8>(a)};
}
};
union ColorDifferenceEntry {
u32 raw;
BitField<0, 8, s32> r; // half of the difference between two ColorEntry
BitField<8, 8, s32> g;
BitField<16, 8, s32> b;
BitField<24, 8, s32> a;
Common::Vec4<s32> ToVector() const {
return Common::Vec4<s32>{r, g, b, a} * 2;
}
};
std::array<ValueEntry, 128> noise_table;
std::array<ValueEntry, 128> color_map_table;
std::array<ValueEntry, 128> alpha_map_table;
std::array<ColorEntry, 256> color_table;
std::array<ColorDifferenceEntry, 256> color_diff_table;
private:
friend class boost::serialization::access;
template <class Archive>
void serialize(Archive& ar, const u32 file_version) {
ar& boost::serialization::make_binary_object(this, sizeof(ProcTex));
}
};
struct Lighting {
union LutEntry {
// Used for raw access
u32 raw;
// LUT value, encoded as 12-bit fixed point, with 12 fraction bits
BitField<0, 12, u32> value; // 0.0.12 fixed point
// Used for efficient interpolation.
BitField<12, 11, u32> difference; // 0.0.11 fixed point
BitField<23, 1, u32> neg_difference;
f32 ToFloat() const {
return static_cast<f32>(value) / 4095.f;
}
f32 DiffToFloat() const {
const f32 diff = static_cast<f32>(difference) / 2047.f;
return neg_difference ? -diff : diff;
}
template <class Archive>
void serialize(Archive& ar, const u32 file_version) {
ar& raw;
}
};
std::array<std::array<LutEntry, 256>, 24> luts;
private:
friend class boost::serialization::access;
template <class Archive>
void serialize(Archive& ar, const u32 file_version) {
ar& boost::serialization::make_binary_object(this, sizeof(Lighting));
}
};
struct Fog {
union LutEntry {
// Used for raw access
u32 raw;
BitField<0, 13, s32> difference; // 1.1.11 fixed point
BitField<13, 11, u32> value; // 0.0.11 fixed point
f32 ToFloat() const {
return static_cast<f32>(value) / 2047.0f;
}
f32 DiffToFloat() const {
return static_cast<f32>(difference) / 2047.0f;
}
};
std::array<LutEntry, 128> lut;
private:
friend class boost::serialization::access;
template <class Archive>
void serialize(Archive& ar, const u32 file_version) {
ar& boost::serialization::make_binary_object(this, sizeof(Fog));
}
};
RegsLcd regs_lcd{};
Regs regs{};
// TODO: Move these to a separate shader scheduler class
GeometryShaderUnit gs_unit;
ShaderSetup vs_setup;
ShaderSetup gs_setup;
ProcTex proctex{};
Lighting lighting{};
Fog fog{};
AttributeBuffer input_default_attributes{};
ImmediateModeState immediate{};
private:
friend class boost::serialization::access;
template <class Archive>
void serialize(Archive& ar, const u32 file_version) {
ar& regs_lcd;
ar& regs.reg_array;
ar& gs_unit;
ar& vs_setup;
ar& gs_setup;
ar& proctex;
ar& lighting;
ar& fog;
ar& input_default_attributes;
ar& immediate;
ar& geometry_pipeline;
ar& primitive_assembler;
ar& cmd_list;
}
private:
Memory::MemorySystem& memory;
VideoCore::RasterizerInterface* rasterizer;
DebugContext& debug_context;
Service::GSP::InterruptHandler signal_interrupt;
GeometryPipeline geometry_pipeline;
PrimitiveAssembler primitive_assembler;
CommandList cmd_list;
std::unique_ptr<ShaderEngine> shader_engine;
};
#define GPU_REG_INDEX(field_name) (offsetof(Pica::PicaCore::Regs, field_name) / sizeof(u32))
} // namespace Pica

View File

@@ -0,0 +1,53 @@
// Copyright 2014 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/logging/log.h"
#include "video_core/pica/primitive_assembly.h"
namespace Pica {
PrimitiveAssembler::PrimitiveAssembler(PipelineRegs::TriangleTopology topology)
: topology(topology) {}
void PrimitiveAssembler::SubmitVertex(const OutputVertex& vtx,
const TriangleHandler& triangle_handler) {
switch (topology) {
case PipelineRegs::TriangleTopology::List:
case PipelineRegs::TriangleTopology::Shader:
if (buffer_index < 2) {
buffer[buffer_index++] = vtx;
} else {
buffer_index = 0;
if (topology == PipelineRegs::TriangleTopology::Shader && winding) {
triangle_handler(buffer[1], buffer[0], vtx);
winding = false;
} else {
triangle_handler(buffer[0], buffer[1], vtx);
}
}
break;
case PipelineRegs::TriangleTopology::Strip:
case PipelineRegs::TriangleTopology::Fan:
if (strip_ready) {
triangle_handler(buffer[0], buffer[1], vtx);
}
buffer[buffer_index] = vtx;
strip_ready |= (buffer_index == 1);
if (topology == PipelineRegs::TriangleTopology::Strip) {
buffer_index = !buffer_index;
} else if (topology == PipelineRegs::TriangleTopology::Fan) {
buffer_index = 1;
}
break;
default:
LOG_ERROR(HW_GPU, "Unknown triangle topology {:x}:", (int)topology);
break;
}
}
} // namespace Pica

View File

@@ -0,0 +1,92 @@
// Copyright 2014 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <functional>
#include <boost/serialization/access.hpp>
#include <boost/serialization/array.hpp>
#include "video_core/pica/output_vertex.h"
#include "video_core/pica/regs_pipeline.h"
namespace Pica {
/**
* Utility class to build triangles from a series of vertices,
* according to a given triangle topology.
*/
struct PrimitiveAssembler {
using TriangleHandler =
std::function<void(const OutputVertex&, const OutputVertex&, const OutputVertex&)>;
explicit PrimitiveAssembler(
PipelineRegs::TriangleTopology topology = PipelineRegs::TriangleTopology::List);
/**
* Queues a vertex, builds primitives from the vertex queue according to the given
* triangle topology, and calls triangle_handler for each generated primitive.
* NOTE: We could specify the triangle handler in the constructor, but this way we can
* keep event and handler code next to each other.
*/
void SubmitVertex(const OutputVertex& vtx, const TriangleHandler& triangle_handler);
/**
* Invert the vertex order of the next triangle. Called by geometry shader emitter.
* This only takes effect for TriangleTopology::Shader.
*/
void SetWinding() noexcept {
winding = true;
}
/**
* Resets the internal state of the PrimitiveAssembler.
*/
void Reset() {
buffer_index = 0;
strip_ready = false;
winding = false;
}
/**
* Reconfigures the PrimitiveAssembler to use a different triangle topology.
*/
void Reconfigure(PipelineRegs::TriangleTopology topology) {
Reset();
this->topology = topology;
}
/**
* Returns whether the PrimitiveAssembler has an empty internal buffer.
*/
bool IsEmpty() const {
return buffer_index == 0 && !strip_ready;
}
/**
* Returns the current topology.
*/
PipelineRegs::TriangleTopology GetTopology() const {
return topology;
}
private:
PipelineRegs::TriangleTopology topology;
int buffer_index = 0;
std::array<OutputVertex, 2> buffer;
bool strip_ready = false;
bool winding = false;
template <class Archive>
void serialize(Archive& ar, const unsigned int version) {
ar& topology;
ar& buffer_index;
ar& buffer;
ar& strip_ready;
ar& winding;
}
friend class boost::serialization::access;
};
} // namespace Pica

View File

@@ -0,0 +1,217 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/bit_field.h"
namespace Pica {
/**
* Most physical addresses which GPU registers refer to are 8-byte aligned.
* This function should be used to get the address from a raw register value.
*/
constexpr u32 DecodeAddressRegister(u32 register_value) {
return register_value * 8;
}
/// Components are laid out in reverse byte order, most significant bits first.
enum class PixelFormat : u32 {
RGBA8 = 0,
RGB8 = 1,
RGB565 = 2,
RGB5A1 = 3,
RGBA4 = 4,
};
constexpr u32 BytesPerPixel(Pica::PixelFormat format) {
switch (format) {
case Pica::PixelFormat::RGBA8:
return 4;
case Pica::PixelFormat::RGB8:
return 3;
case Pica::PixelFormat::RGB565:
case Pica::PixelFormat::RGB5A1:
case Pica::PixelFormat::RGBA4:
return 2;
default:
UNREACHABLE();
}
return 0;
}
struct MemoryFillConfig {
u32 address_start;
u32 address_end;
union {
u32 value_32bit;
BitField<0, 16, u32> value_16bit;
// TODO: Verify component order
BitField<0, 8, u32> value_24bit_r;
BitField<8, 8, u32> value_24bit_g;
BitField<16, 8, u32> value_24bit_b;
};
union {
u32 control;
// Setting this field to 1 triggers the memory fill.
// This field also acts as a status flag, and gets reset to 0 upon completion.
BitField<0, 1, u32> trigger;
// Set to 1 upon completion.
BitField<1, 1, u32> finished;
// If both of these bits are unset, then it will fill the memory with a 16 bit value
// 1: fill with 24-bit wide values
BitField<8, 1, u32> fill_24bit;
// 1: fill with 32-bit wide values
BitField<9, 1, u32> fill_32bit;
};
inline u32 GetStartAddress() const {
return DecodeAddressRegister(address_start);
}
inline u32 GetEndAddress() const {
return DecodeAddressRegister(address_end);
}
inline std::string DebugName() const {
return fmt::format("from {:#X} to {:#X} with {}-bit value {:#X}", GetStartAddress(),
GetEndAddress(), fill_32bit ? "32" : (fill_24bit ? "24" : "16"),
value_32bit);
}
};
static_assert(sizeof(MemoryFillConfig) == 0x10);
struct FramebufferConfig {
INSERT_PADDING_WORDS(0x17);
union {
u32 size;
BitField<0, 16, u32> width;
BitField<16, 16, u32> height;
};
INSERT_PADDING_WORDS(0x2);
u32 address_left1;
u32 address_left2;
union {
u32 format;
BitField<0, 3, PixelFormat> color_format;
};
INSERT_PADDING_WORDS(0x1);
union {
u32 active_fb;
// 0: Use parameters ending with "1"
// 1: Use parameters ending with "2"
BitField<0, 1, u32> second_fb_active;
};
INSERT_PADDING_WORDS(0x5);
// Distance between two pixel rows, in bytes
u32 stride;
u32 address_right1;
u32 address_right2;
INSERT_PADDING_WORDS(0x19);
};
static_assert(sizeof(FramebufferConfig) == 0x100);
struct DisplayTransferConfig {
u32 input_address;
u32 output_address;
inline u32 GetPhysicalInputAddress() const {
return DecodeAddressRegister(input_address);
}
inline u32 GetPhysicalOutputAddress() const {
return DecodeAddressRegister(output_address);
}
inline std::string DebugName() const noexcept {
return fmt::format("from {:#x} to {:#x} with {} scaling and stride {}, width {}",
GetPhysicalInputAddress(), GetPhysicalOutputAddress(),
scaling == NoScale ? "no" : (scaling == ScaleX ? "X" : "XY"),
input_width.Value(), output_width.Value());
}
union {
u32 output_size;
BitField<0, 16, u32> output_width;
BitField<16, 16, u32> output_height;
};
union {
u32 input_size;
BitField<0, 16, u32> input_width;
BitField<16, 16, u32> input_height;
};
enum ScalingMode : u32 {
NoScale = 0, // Doesn't scale the image
ScaleX = 1, // Downscales the image in half in the X axis and applies a box filter
ScaleXY =
2, // Downscales the image in half in both the X and Y axes and applies a box filter
};
union {
u32 flags;
BitField<0, 1, u32> flip_vertically; // flips input data vertically
BitField<1, 1, u32> input_linear; // Converts from linear to tiled format
BitField<2, 1, u32> crop_input_lines;
BitField<3, 1, u32> is_texture_copy; // Copies the data without performing any
// processing and respecting texture copy fields
BitField<5, 1, u32> dont_swizzle;
BitField<8, 3, PixelFormat> input_format;
BitField<12, 3, PixelFormat> output_format;
/// Uses some kind of 32x32 block swizzling mode, instead of the usual 8x8 one.
BitField<16, 1, u32> block_32; // TODO(yuriks): unimplemented
BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer
};
INSERT_PADDING_WORDS(0x1);
// it seems that writing to this field triggers the display transfer
BitField<0, 1, u32> trigger;
INSERT_PADDING_WORDS(0x1);
struct {
u32 size; // The lower 4 bits are ignored
union {
u32 input_size;
BitField<0, 16, u32> input_width;
BitField<16, 16, u32> input_gap;
};
union {
u32 output_size;
BitField<0, 16, u32> output_width;
BitField<16, 16, u32> output_gap;
};
} texture_copy;
};
static_assert(sizeof(DisplayTransferConfig) == 0x2c);
} // namespace Pica

View File

@@ -0,0 +1,306 @@
// Copyright 2017 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/logging/log.h"
namespace Pica {
struct FramebufferRegs {
enum class FragmentOperationMode : u32 {
Default = 0,
Gas = 1,
Shadow = 3,
};
enum class LogicOp : u32 {
Clear = 0,
And = 1,
AndReverse = 2,
Copy = 3,
Set = 4,
CopyInverted = 5,
NoOp = 6,
Invert = 7,
Nand = 8,
Or = 9,
Nor = 10,
Xor = 11,
Equiv = 12,
AndInverted = 13,
OrReverse = 14,
OrInverted = 15,
};
enum class BlendEquation : u32 {
Add = 0,
Subtract = 1,
ReverseSubtract = 2,
Min = 3,
Max = 4,
};
enum class BlendFactor : u32 {
Zero = 0,
One = 1,
SourceColor = 2,
OneMinusSourceColor = 3,
DestColor = 4,
OneMinusDestColor = 5,
SourceAlpha = 6,
OneMinusSourceAlpha = 7,
DestAlpha = 8,
OneMinusDestAlpha = 9,
ConstantColor = 10,
OneMinusConstantColor = 11,
ConstantAlpha = 12,
OneMinusConstantAlpha = 13,
SourceAlphaSaturate = 14,
};
enum class CompareFunc : u32 {
Never = 0,
Always = 1,
Equal = 2,
NotEqual = 3,
LessThan = 4,
LessThanOrEqual = 5,
GreaterThan = 6,
GreaterThanOrEqual = 7,
};
enum class StencilAction : u32 {
Keep = 0,
Zero = 1,
Replace = 2,
Increment = 3,
Decrement = 4,
Invert = 5,
IncrementWrap = 6,
DecrementWrap = 7,
};
struct {
union {
BitField<0, 2, FragmentOperationMode> fragment_operation_mode;
// If false, logic blending is used
BitField<8, 1, u32> alphablend_enable;
};
union {
BitField<0, 3, BlendEquation> blend_equation_rgb;
BitField<8, 3, BlendEquation> blend_equation_a;
BitField<16, 4, BlendFactor> factor_source_rgb;
BitField<20, 4, BlendFactor> factor_dest_rgb;
BitField<24, 4, BlendFactor> factor_source_a;
BitField<28, 4, BlendFactor> factor_dest_a;
} alpha_blending;
union {
BitField<0, 4, LogicOp> logic_op;
};
union {
u32 raw;
BitField<0, 8, u32> r;
BitField<8, 8, u32> g;
BitField<16, 8, u32> b;
BitField<24, 8, u32> a;
} blend_const;
union {
BitField<0, 1, u32> enable;
BitField<4, 3, CompareFunc> func;
BitField<8, 8, u32> ref;
} alpha_test;
struct {
union {
// Raw value of this register
u32 raw_func;
// If true, enable stencil testing
BitField<0, 1, u32> enable;
// Comparison operation for stencil testing
BitField<4, 3, CompareFunc> func;
// Mask used to control writing to the stencil buffer
BitField<8, 8, u32> write_mask;
// Value to compare against for stencil testing
BitField<16, 8, u32> reference_value;
// Mask to apply on stencil test inputs
BitField<24, 8, u32> input_mask;
};
union {
// Raw value of this register
u32 raw_op;
// Action to perform when the stencil test fails
BitField<0, 3, StencilAction> action_stencil_fail;
// Action to perform when stencil testing passed but depth testing fails
BitField<4, 3, StencilAction> action_depth_fail;
// Action to perform when both stencil and depth testing pass
BitField<8, 3, StencilAction> action_depth_pass;
};
} stencil_test;
union {
u32 depth_color_mask;
BitField<0, 1, u32> depth_test_enable;
BitField<4, 3, CompareFunc> depth_test_func;
BitField<8, 1, u32> red_enable;
BitField<9, 1, u32> green_enable;
BitField<10, 1, u32> blue_enable;
BitField<11, 1, u32> alpha_enable;
BitField<12, 1, u32> depth_write_enable;
};
INSERT_PADDING_WORDS(0x8);
} output_merger;
// Components are laid out in reverse byte order, most significant bits first.
enum class ColorFormat : u32 {
RGBA8 = 0,
RGB8 = 1,
RGB5A1 = 2,
RGB565 = 3,
RGBA4 = 4,
};
enum class DepthFormat : u32 {
D16 = 0,
D24 = 2,
D24S8 = 3,
};
// Returns the number of bytes in the specified color format
static unsigned BytesPerColorPixel(ColorFormat format) {
switch (format) {
case ColorFormat::RGBA8:
return 4;
case ColorFormat::RGB8:
return 3;
case ColorFormat::RGB5A1:
case ColorFormat::RGB565:
case ColorFormat::RGBA4:
return 2;
default:
LOG_CRITICAL(HW_GPU, "Unknown color format {}", format);
UNIMPLEMENTED();
}
}
struct FramebufferConfig {
INSERT_PADDING_WORDS(0x3);
union {
BitField<0, 4, u32> allow_color_write; // 0 = disable, else enable
};
INSERT_PADDING_WORDS(0x1);
union {
BitField<0, 2, u32> allow_depth_stencil_write; // 0 = disable, else enable
};
BitField<0, 2, DepthFormat> depth_format;
BitField<16, 3, ColorFormat> color_format;
INSERT_PADDING_WORDS(0x4);
BitField<0, 28, u32> depth_buffer_address;
BitField<0, 28, u32> color_buffer_address;
union {
// Apparently, the framebuffer width is stored as expected,
// while the height is stored as the actual height minus one.
// Hence, don't access these fields directly but use the accessors
// GetWidth() and GetHeight() instead.
BitField<0, 11, u32> width;
BitField<12, 10, u32> height;
};
INSERT_PADDING_WORDS(0x1);
inline PAddr GetColorBufferPhysicalAddress() const {
return color_buffer_address * 8;
}
inline PAddr GetDepthBufferPhysicalAddress() const {
return depth_buffer_address * 8;
}
inline u32 GetWidth() const {
return width;
}
inline u32 GetHeight() const {
return height + 1;
}
} framebuffer;
// Returns the number of bytes in the specified depth format
static u32 BytesPerDepthPixel(DepthFormat format) {
switch (format) {
case DepthFormat::D16:
return 2;
case DepthFormat::D24:
return 3;
case DepthFormat::D24S8:
return 4;
}
ASSERT_MSG(false, "Unknown depth format {}", format);
}
// Returns the number of bits per depth component of the specified depth format
static u32 DepthBitsPerPixel(DepthFormat format) {
switch (format) {
case DepthFormat::D16:
return 16;
case DepthFormat::D24:
case DepthFormat::D24S8:
return 24;
default:
UNREACHABLE_MSG("Unknown depth format {}", format);
}
return 0;
}
[[nodiscard]] bool IsShadowRendering() const {
return output_merger.fragment_operation_mode == FragmentOperationMode::Shadow;
}
[[nodiscard]] bool HasStencil() const {
return framebuffer.depth_format == DepthFormat::D24S8;
};
INSERT_PADDING_WORDS(0x10); // Gas related registers
union {
BitField<0, 16, u32> constant; // float1.5.10
BitField<16, 16, u32> linear; // float1.5.10
} shadow;
INSERT_PADDING_WORDS(0xF);
};
static_assert(sizeof(FramebufferRegs) == 0x40 * sizeof(u32),
"FramebufferRegs struct has incorrect size");
} // namespace Pica

View File

@@ -0,0 +1,488 @@
// Copyright 2015 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <iterator>
#include <utility>
#include "common/common_types.h"
#include "video_core/pica/regs_internal.h"
namespace Pica {
static constexpr std::pair<u16, const char*> register_names[] = {
{0x010, "GPUREG_FINALIZE"},
{0x040, "GPUREG_FACECULLING_CONFIG"},
{0x041, "GPUREG_VIEWPORT_WIDTH"},
{0x042, "GPUREG_VIEWPORT_INVW"},
{0x043, "GPUREG_VIEWPORT_HEIGHT"},
{0x044, "GPUREG_VIEWPORT_INVH"},
{0x047, "GPUREG_FRAGOP_CLIP"},
{0x048, "GPUREG_FRAGOP_CLIP_DATA0"},
{0x049, "GPUREG_FRAGOP_CLIP_DATA1"},
{0x04A, "GPUREG_FRAGOP_CLIP_DATA2"},
{0x04B, "GPUREG_FRAGOP_CLIP_DATA3"},
{0x04D, "GPUREG_DEPTHMAP_SCALE"},
{0x04E, "GPUREG_DEPTHMAP_OFFSET"},
{0x04F, "GPUREG_SH_OUTMAP_TOTAL"},
{0x050, "GPUREG_SH_OUTMAP_O0"},
{0x051, "GPUREG_SH_OUTMAP_O1"},
{0x052, "GPUREG_SH_OUTMAP_O2"},
{0x053, "GPUREG_SH_OUTMAP_O3"},
{0x054, "GPUREG_SH_OUTMAP_O4"},
{0x055, "GPUREG_SH_OUTMAP_O5"},
{0x056, "GPUREG_SH_OUTMAP_O6"},
{0x061, "GPUREG_EARLYDEPTH_FUNC"},
{0x062, "GPUREG_EARLYDEPTH_TEST1"},
{0x063, "GPUREG_EARLYDEPTH_CLEAR"},
{0x064, "GPUREG_SH_OUTATTR_MODE"},
{0x065, "GPUREG_SCISSORTEST_MODE"},
{0x066, "GPUREG_SCISSORTEST_POS"},
{0x067, "GPUREG_SCISSORTEST_DIM"},
{0x068, "GPUREG_VIEWPORT_XY"},
{0x06A, "GPUREG_EARLYDEPTH_DATA"},
{0x06D, "GPUREG_DEPTHMAP_ENABLE"},
{0x06E, "GPUREG_RENDERBUF_DIM"},
{0x06F, "GPUREG_SH_OUTATTR_CLOCK"},
{0x080, "GPUREG_TEXUNIT_CONFIG"},
{0x081, "GPUREG_TEXUNIT0_BORDER_COLOR"},
{0x082, "GPUREG_TEXUNIT0_DIM"},
{0x083, "GPUREG_TEXUNIT0_PARAM"},
{0x084, "GPUREG_TEXUNIT0_LOD"},
{0x085, "GPUREG_TEXUNIT0_ADDR1"},
{0x086, "GPUREG_TEXUNIT0_ADDR2"},
{0x087, "GPUREG_TEXUNIT0_ADDR3"},
{0x088, "GPUREG_TEXUNIT0_ADDR4"},
{0x089, "GPUREG_TEXUNIT0_ADDR5"},
{0x08A, "GPUREG_TEXUNIT0_ADDR6"},
{0x08B, "GPUREG_TEXUNIT0_SHADOW"},
{0x08E, "GPUREG_TEXUNIT0_TYPE"},
{0x08F, "GPUREG_LIGHTING_ENABLE0"},
{0x091, "GPUREG_TEXUNIT1_BORDER_COLOR"},
{0x092, "GPUREG_TEXUNIT1_DIM"},
{0x093, "GPUREG_TEXUNIT1_PARAM"},
{0x094, "GPUREG_TEXUNIT1_LOD"},
{0x095, "GPUREG_TEXUNIT1_ADDR"},
{0x096, "GPUREG_TEXUNIT1_TYPE"},
{0x099, "GPUREG_TEXUNIT2_BORDER_COLOR"},
{0x09A, "GPUREG_TEXUNIT2_DIM"},
{0x09B, "GPUREG_TEXUNIT2_PARAM"},
{0x09C, "GPUREG_TEXUNIT2_LOD"},
{0x09D, "GPUREG_TEXUNIT2_ADDR"},
{0x09E, "GPUREG_TEXUNIT2_TYPE"},
{0x0A8, "GPUREG_TEXUNIT3_PROCTEX0"},
{0x0A9, "GPUREG_TEXUNIT3_PROCTEX1"},
{0x0AA, "GPUREG_TEXUNIT3_PROCTEX2"},
{0x0AB, "GPUREG_TEXUNIT3_PROCTEX3"},
{0x0AC, "GPUREG_TEXUNIT3_PROCTEX4"},
{0x0AD, "GPUREG_TEXUNIT3_PROCTEX5"},
{0x0AF, "GPUREG_PROCTEX_LUT"},
{0x0B0, "GPUREG_PROCTEX_LUT_DATA0"},
{0x0B1, "GPUREG_PROCTEX_LUT_DATA1"},
{0x0B2, "GPUREG_PROCTEX_LUT_DATA2"},
{0x0B3, "GPUREG_PROCTEX_LUT_DATA3"},
{0x0B4, "GPUREG_PROCTEX_LUT_DATA4"},
{0x0B5, "GPUREG_PROCTEX_LUT_DATA5"},
{0x0B6, "GPUREG_PROCTEX_LUT_DATA6"},
{0x0B7, "GPUREG_PROCTEX_LUT_DATA7"},
{0x0C0, "GPUREG_TEXENV0_SOURCE"},
{0x0C1, "GPUREG_TEXENV0_OPERAND"},
{0x0C2, "GPUREG_TEXENV0_COMBINER"},
{0x0C3, "GPUREG_TEXENV0_COLOR"},
{0x0C4, "GPUREG_TEXENV0_SCALE"},
{0x0C8, "GPUREG_TEXENV1_SOURCE"},
{0x0C9, "GPUREG_TEXENV1_OPERAND"},
{0x0CA, "GPUREG_TEXENV1_COMBINER"},
{0x0CB, "GPUREG_TEXENV1_COLOR"},
{0x0CC, "GPUREG_TEXENV1_SCALE"},
{0x0D0, "GPUREG_TEXENV2_SOURCE"},
{0x0D1, "GPUREG_TEXENV2_OPERAND"},
{0x0D2, "GPUREG_TEXENV2_COMBINER"},
{0x0D3, "GPUREG_TEXENV2_COLOR"},
{0x0D4, "GPUREG_TEXENV2_SCALE"},
{0x0D8, "GPUREG_TEXENV3_SOURCE"},
{0x0D9, "GPUREG_TEXENV3_OPERAND"},
{0x0DA, "GPUREG_TEXENV3_COMBINER"},
{0x0DB, "GPUREG_TEXENV3_COLOR"},
{0x0DC, "GPUREG_TEXENV3_SCALE"},
{0x0E0, "GPUREG_TEXENV_UPDATE_BUFFER"},
{0x0E1, "GPUREG_FOG_COLOR"},
{0x0E4, "GPUREG_GAS_ATTENUATION"},
{0x0E5, "GPUREG_GAS_ACCMAX"},
{0x0E6, "GPUREG_FOG_LUT_INDEX"},
{0x0E8, "GPUREG_FOG_LUT_DATA0"},
{0x0E9, "GPUREG_FOG_LUT_DATA1"},
{0x0EA, "GPUREG_FOG_LUT_DATA2"},
{0x0EB, "GPUREG_FOG_LUT_DATA3"},
{0x0EC, "GPUREG_FOG_LUT_DATA4"},
{0x0ED, "GPUREG_FOG_LUT_DATA5"},
{0x0EE, "GPUREG_FOG_LUT_DATA6"},
{0x0EF, "GPUREG_FOG_LUT_DATA7"},
{0x0F0, "GPUREG_TEXENV4_SOURCE"},
{0x0F1, "GPUREG_TEXENV4_OPERAND"},
{0x0F2, "GPUREG_TEXENV4_COMBINER"},
{0x0F3, "GPUREG_TEXENV4_COLOR"},
{0x0F4, "GPUREG_TEXENV4_SCALE"},
{0x0F8, "GPUREG_TEXENV5_SOURCE"},
{0x0F9, "GPUREG_TEXENV5_OPERAND"},
{0x0FA, "GPUREG_TEXENV5_COMBINER"},
{0x0FB, "GPUREG_TEXENV5_COLOR"},
{0x0FC, "GPUREG_TEXENV5_SCALE"},
{0x0FD, "GPUREG_TEXENV_BUFFER_COLOR"},
{0x100, "GPUREG_COLOR_OPERATION"},
{0x101, "GPUREG_BLEND_FUNC"},
{0x102, "GPUREG_LOGIC_OP"},
{0x103, "GPUREG_BLEND_COLOR"},
{0x104, "GPUREG_FRAGOP_ALPHA_TEST"},
{0x105, "GPUREG_STENCIL_TEST"},
{0x106, "GPUREG_STENCIL_OP"},
{0x107, "GPUREG_DEPTH_COLOR_MASK"},
{0x110, "GPUREG_FRAMEBUFFER_INVALIDATE"},
{0x111, "GPUREG_FRAMEBUFFER_FLUSH"},
{0x112, "GPUREG_COLORBUFFER_READ"},
{0x113, "GPUREG_COLORBUFFER_WRITE"},
{0x114, "GPUREG_DEPTHBUFFER_READ"},
{0x115, "GPUREG_DEPTHBUFFER_WRITE"},
{0x116, "GPUREG_DEPTHBUFFER_FORMAT"},
{0x117, "GPUREG_COLORBUFFER_FORMAT"},
{0x118, "GPUREG_EARLYDEPTH_TEST2"},
{0x11B, "GPUREG_FRAMEBUFFER_BLOCK32"},
{0x11C, "GPUREG_DEPTHBUFFER_LOC"},
{0x11D, "GPUREG_COLORBUFFER_LOC"},
{0x11E, "GPUREG_FRAMEBUFFER_DIM"},
{0x120, "GPUREG_GAS_LIGHT_XY"},
{0x121, "GPUREG_GAS_LIGHT_Z"},
{0x122, "GPUREG_GAS_LIGHT_Z_COLOR"},
{0x123, "GPUREG_GAS_LUT_INDEX"},
{0x124, "GPUREG_GAS_LUT_DATA"},
{0x126, "GPUREG_GAS_DELTAZ_DEPTH"},
{0x130, "GPUREG_FRAGOP_SHADOW"},
{0x140, "GPUREG_LIGHT0_SPECULAR0"},
{0x141, "GPUREG_LIGHT0_SPECULAR1"},
{0x142, "GPUREG_LIGHT0_DIFFUSE"},
{0x143, "GPUREG_LIGHT0_AMBIENT"},
{0x144, "GPUREG_LIGHT0_XY"},
{0x145, "GPUREG_LIGHT0_Z"},
{0x146, "GPUREG_LIGHT0_SPOTDIR_XY"},
{0x147, "GPUREG_LIGHT0_SPOTDIR_Z"},
{0x149, "GPUREG_LIGHT0_CONFIG"},
{0x14A, "GPUREG_LIGHT0_ATTENUATION_BIAS"},
{0x14B, "GPUREG_LIGHT0_ATTENUATION_SCALE"},
{0x150, "GPUREG_LIGHT1_SPECULAR0"},
{0x151, "GPUREG_LIGHT1_SPECULAR1"},
{0x152, "GPUREG_LIGHT1_DIFFUSE"},
{0x153, "GPUREG_LIGHT1_AMBIENT"},
{0x154, "GPUREG_LIGHT1_XY"},
{0x155, "GPUREG_LIGHT1_Z"},
{0x156, "GPUREG_LIGHT1_SPOTDIR_XY"},
{0x157, "GPUREG_LIGHT1_SPOTDIR_Z"},
{0x159, "GPUREG_LIGHT1_CONFIG"},
{0x15A, "GPUREG_LIGHT1_ATTENUATION_BIAS"},
{0x15B, "GPUREG_LIGHT1_ATTENUATION_SCALE"},
{0x160, "GPUREG_LIGHT2_SPECULAR0"},
{0x161, "GPUREG_LIGHT2_SPECULAR1"},
{0x162, "GPUREG_LIGHT2_DIFFUSE"},
{0x163, "GPUREG_LIGHT2_AMBIENT"},
{0x164, "GPUREG_LIGHT2_XY"},
{0x165, "GPUREG_LIGHT2_Z"},
{0x166, "GPUREG_LIGHT2_SPOTDIR_XY"},
{0x167, "GPUREG_LIGHT2_SPOTDIR_Z"},
{0x169, "GPUREG_LIGHT2_CONFIG"},
{0x16A, "GPUREG_LIGHT2_ATTENUATION_BIAS"},
{0x16B, "GPUREG_LIGHT2_ATTENUATION_SCALE"},
{0x170, "GPUREG_LIGHT3_SPECULAR0"},
{0x171, "GPUREG_LIGHT3_SPECULAR1"},
{0x172, "GPUREG_LIGHT3_DIFFUSE"},
{0x173, "GPUREG_LIGHT3_AMBIENT"},
{0x174, "GPUREG_LIGHT3_XY"},
{0x175, "GPUREG_LIGHT3_Z"},
{0x176, "GPUREG_LIGHT3_SPOTDIR_XY"},
{0x177, "GPUREG_LIGHT3_SPOTDIR_Z"},
{0x179, "GPUREG_LIGHT3_CONFIG"},
{0x17A, "GPUREG_LIGHT3_ATTENUATION_BIAS"},
{0x17B, "GPUREG_LIGHT3_ATTENUATION_SCALE"},
{0x180, "GPUREG_LIGHT4_SPECULAR0"},
{0x181, "GPUREG_LIGHT4_SPECULAR1"},
{0x182, "GPUREG_LIGHT4_DIFFUSE"},
{0x183, "GPUREG_LIGHT4_AMBIENT"},
{0x184, "GPUREG_LIGHT4_XY"},
{0x185, "GPUREG_LIGHT4_Z"},
{0x186, "GPUREG_LIGHT4_SPOTDIR_XY"},
{0x187, "GPUREG_LIGHT4_SPOTDIR_Z"},
{0x189, "GPUREG_LIGHT4_CONFIG"},
{0x18A, "GPUREG_LIGHT4_ATTENUATION_BIAS"},
{0x18B, "GPUREG_LIGHT4_ATTENUATION_SCALE"},
{0x190, "GPUREG_LIGHT5_SPECULAR0"},
{0x191, "GPUREG_LIGHT5_SPECULAR1"},
{0x192, "GPUREG_LIGHT5_DIFFUSE"},
{0x193, "GPUREG_LIGHT5_AMBIENT"},
{0x194, "GPUREG_LIGHT5_XY"},
{0x195, "GPUREG_LIGHT5_Z"},
{0x196, "GPUREG_LIGHT5_SPOTDIR_XY"},
{0x197, "GPUREG_LIGHT5_SPOTDIR_Z"},
{0x199, "GPUREG_LIGHT5_CONFIG"},
{0x19A, "GPUREG_LIGHT5_ATTENUATION_BIAS"},
{0x19B, "GPUREG_LIGHT5_ATTENUATION_SCALE"},
{0x1A0, "GPUREG_LIGHT6_SPECULAR0"},
{0x1A1, "GPUREG_LIGHT6_SPECULAR1"},
{0x1A2, "GPUREG_LIGHT6_DIFFUSE"},
{0x1A3, "GPUREG_LIGHT6_AMBIENT"},
{0x1A4, "GPUREG_LIGHT6_XY"},
{0x1A5, "GPUREG_LIGHT6_Z"},
{0x1A6, "GPUREG_LIGHT6_SPOTDIR_XY"},
{0x1A7, "GPUREG_LIGHT6_SPOTDIR_Z"},
{0x1A9, "GPUREG_LIGHT6_CONFIG"},
{0x1AA, "GPUREG_LIGHT6_ATTENUATION_BIAS"},
{0x1AB, "GPUREG_LIGHT6_ATTENUATION_SCALE"},
{0x1B0, "GPUREG_LIGHT7_SPECULAR0"},
{0x1B1, "GPUREG_LIGHT7_SPECULAR1"},
{0x1B2, "GPUREG_LIGHT7_DIFFUSE"},
{0x1B3, "GPUREG_LIGHT7_AMBIENT"},
{0x1B4, "GPUREG_LIGHT7_XY"},
{0x1B5, "GPUREG_LIGHT7_Z"},
{0x1B6, "GPUREG_LIGHT7_SPOTDIR_XY"},
{0x1B7, "GPUREG_LIGHT7_SPOTDIR_Z"},
{0x1B9, "GPUREG_LIGHT7_CONFIG"},
{0x1BA, "GPUREG_LIGHT7_ATTENUATION_BIAS"},
{0x1BB, "GPUREG_LIGHT7_ATTENUATION_SCALE"},
{0x1C0, "GPUREG_LIGHTING_AMBIENT"},
{0x1C2, "GPUREG_LIGHTING_NUM_LIGHTS"},
{0x1C3, "GPUREG_LIGHTING_CONFIG0"},
{0x1C4, "GPUREG_LIGHTING_CONFIG1"},
{0x1C5, "GPUREG_LIGHTING_LUT_INDEX"},
{0x1C6, "GPUREG_LIGHTING_ENABLE1"},
{0x1C8, "GPUREG_LIGHTING_LUT_DATA0"},
{0x1C9, "GPUREG_LIGHTING_LUT_DATA1"},
{0x1CA, "GPUREG_LIGHTING_LUT_DATA2"},
{0x1CB, "GPUREG_LIGHTING_LUT_DATA3"},
{0x1CC, "GPUREG_LIGHTING_LUT_DATA4"},
{0x1CD, "GPUREG_LIGHTING_LUT_DATA5"},
{0x1CE, "GPUREG_LIGHTING_LUT_DATA6"},
{0x1CF, "GPUREG_LIGHTING_LUT_DATA7"},
{0x1D0, "GPUREG_LIGHTING_LUTINPUT_ABS"},
{0x1D1, "GPUREG_LIGHTING_LUTINPUT_SELECT"},
{0x1D2, "GPUREG_LIGHTING_LUTINPUT_SCALE"},
{0x1D9, "GPUREG_LIGHTING_LIGHT_PERMUTATION"},
{0x200, "GPUREG_ATTRIBBUFFERS_LOC"},
{0x201, "GPUREG_ATTRIBBUFFERS_FORMAT_LOW"},
{0x202, "GPUREG_ATTRIBBUFFERS_FORMAT_HIGH"},
{0x203, "GPUREG_ATTRIBBUFFER0_OFFSET"},
{0x204, "GPUREG_ATTRIBBUFFER0_CONFIG1"},
{0x205, "GPUREG_ATTRIBBUFFER0_CONFIG2"},
{0x206, "GPUREG_ATTRIBBUFFER1_OFFSET"},
{0x207, "GPUREG_ATTRIBBUFFER1_CONFIG1"},
{0x208, "GPUREG_ATTRIBBUFFER1_CONFIG2"},
{0x209, "GPUREG_ATTRIBBUFFER2_OFFSET"},
{0x20A, "GPUREG_ATTRIBBUFFER2_CONFIG1"},
{0x20B, "GPUREG_ATTRIBBUFFER2_CONFIG2"},
{0x20C, "GPUREG_ATTRIBBUFFER3_OFFSET"},
{0x20D, "GPUREG_ATTRIBBUFFER3_CONFIG1"},
{0x20E, "GPUREG_ATTRIBBUFFER3_CONFIG2"},
{0x20F, "GPUREG_ATTRIBBUFFER4_OFFSET"},
{0x210, "GPUREG_ATTRIBBUFFER4_CONFIG1"},
{0x211, "GPUREG_ATTRIBBUFFER4_CONFIG2"},
{0x212, "GPUREG_ATTRIBBUFFER5_OFFSET"},
{0x213, "GPUREG_ATTRIBBUFFER5_CONFIG1"},
{0x214, "GPUREG_ATTRIBBUFFER5_CONFIG2"},
{0x215, "GPUREG_ATTRIBBUFFER6_OFFSET"},
{0x216, "GPUREG_ATTRIBBUFFER6_CONFIG1"},
{0x217, "GPUREG_ATTRIBBUFFER6_CONFIG2"},
{0x218, "GPUREG_ATTRIBBUFFER7_OFFSET"},
{0x219, "GPUREG_ATTRIBBUFFER7_CONFIG1"},
{0x21A, "GPUREG_ATTRIBBUFFER7_CONFIG2"},
{0x21B, "GPUREG_ATTRIBBUFFER8_OFFSET"},
{0x21C, "GPUREG_ATTRIBBUFFER8_CONFIG1"},
{0x21D, "GPUREG_ATTRIBBUFFER8_CONFIG2"},
{0x21E, "GPUREG_ATTRIBBUFFER9_OFFSET"},
{0x21F, "GPUREG_ATTRIBBUFFER9_CONFIG1"},
{0x220, "GPUREG_ATTRIBBUFFER9_CONFIG2"},
{0x221, "GPUREG_ATTRIBBUFFER10_OFFSET"},
{0x222, "GPUREG_ATTRIBBUFFER10_CONFIG1"},
{0x223, "GPUREG_ATTRIBBUFFER10_CONFIG2"},
{0x224, "GPUREG_ATTRIBBUFFER11_OFFSET"},
{0x225, "GPUREG_ATTRIBBUFFER11_CONFIG1"},
{0x226, "GPUREG_ATTRIBBUFFER11_CONFIG2"},
{0x227, "GPUREG_INDEXBUFFER_CONFIG"},
{0x228, "GPUREG_NUMVERTICES"},
{0x229, "GPUREG_GEOSTAGE_CONFIG"},
{0x22A, "GPUREG_VERTEX_OFFSET"},
{0x22D, "GPUREG_POST_VERTEX_CACHE_NUM"},
{0x22E, "GPUREG_DRAWARRAYS"},
{0x22F, "GPUREG_DRAWELEMENTS"},
{0x231, "GPUREG_VTX_FUNC"},
{0x232, "GPUREG_FIXEDATTRIB_INDEX"},
{0x233, "GPUREG_FIXEDATTRIB_DATA0"},
{0x234, "GPUREG_FIXEDATTRIB_DATA1"},
{0x235, "GPUREG_FIXEDATTRIB_DATA2"},
{0x238, "GPUREG_CMDBUF_SIZE0"},
{0x239, "GPUREG_CMDBUF_SIZE1"},
{0x23A, "GPUREG_CMDBUF_ADDR0"},
{0x23B, "GPUREG_CMDBUF_ADDR1"},
{0x23C, "GPUREG_CMDBUF_JUMP0"},
{0x23D, "GPUREG_CMDBUF_JUMP1"},
{0x242, "GPUREG_VSH_NUM_ATTR"},
{0x244, "GPUREG_VSH_COM_MODE"},
{0x245, "GPUREG_START_DRAW_FUNC0"},
{0x24A, "GPUREG_VSH_OUTMAP_TOTAL1"},
{0x251, "GPUREG_VSH_OUTMAP_TOTAL2"},
{0x252, "GPUREG_GSH_MISC0"},
{0x253, "GPUREG_GEOSTAGE_CONFIG2"},
{0x254, "GPUREG_GSH_MISC1"},
{0x25E, "GPUREG_PRIMITIVE_CONFIG"},
{0x25F, "GPUREG_RESTART_PRIMITIVE"},
{0x280, "GPUREG_GSH_BOOLUNIFORM"},
{0x281, "GPUREG_GSH_INTUNIFORM_I0"},
{0x282, "GPUREG_GSH_INTUNIFORM_I1"},
{0x283, "GPUREG_GSH_INTUNIFORM_I2"},
{0x284, "GPUREG_GSH_INTUNIFORM_I3"},
{0x289, "GPUREG_GSH_INPUTBUFFER_CONFIG"},
{0x28A, "GPUREG_GSH_ENTRYPOINT"},
{0x28B, "GPUREG_GSH_ATTRIBUTES_PERMUTATION_LOW"},
{0x28C, "GPUREG_GSH_ATTRIBUTES_PERMUTATION_HIGH"},
{0x28D, "GPUREG_GSH_OUTMAP_MASK"},
{0x28F, "GPUREG_GSH_CODETRANSFER_END"},
{0x290, "GPUREG_GSH_FLOATUNIFORM_INDEX"},
{0x291, "GPUREG_GSH_FLOATUNIFORM_DATA0"},
{0x292, "GPUREG_GSH_FLOATUNIFORM_DATA1"},
{0x293, "GPUREG_GSH_FLOATUNIFORM_DATA2"},
{0x294, "GPUREG_GSH_FLOATUNIFORM_DATA3"},
{0x295, "GPUREG_GSH_FLOATUNIFORM_DATA4"},
{0x296, "GPUREG_GSH_FLOATUNIFORM_DATA5"},
{0x297, "GPUREG_GSH_FLOATUNIFORM_DATA6"},
{0x298, "GPUREG_GSH_FLOATUNIFORM_DATA7"},
{0x29B, "GPUREG_GSH_CODETRANSFER_INDEX"},
{0x29C, "GPUREG_GSH_CODETRANSFER_DATA0"},
{0x29D, "GPUREG_GSH_CODETRANSFER_DATA1"},
{0x29E, "GPUREG_GSH_CODETRANSFER_DATA2"},
{0x29F, "GPUREG_GSH_CODETRANSFER_DATA3"},
{0x2A0, "GPUREG_GSH_CODETRANSFER_DATA4"},
{0x2A1, "GPUREG_GSH_CODETRANSFER_DATA5"},
{0x2A2, "GPUREG_GSH_CODETRANSFER_DATA6"},
{0x2A3, "GPUREG_GSH_CODETRANSFER_DATA7"},
{0x2A5, "GPUREG_GSH_OPDESCS_INDEX"},
{0x2A6, "GPUREG_GSH_OPDESCS_DATA0"},
{0x2A7, "GPUREG_GSH_OPDESCS_DATA1"},
{0x2A8, "GPUREG_GSH_OPDESCS_DATA2"},
{0x2A9, "GPUREG_GSH_OPDESCS_DATA3"},
{0x2AA, "GPUREG_GSH_OPDESCS_DATA4"},
{0x2AB, "GPUREG_GSH_OPDESCS_DATA5"},
{0x2AC, "GPUREG_GSH_OPDESCS_DATA6"},
{0x2AD, "GPUREG_GSH_OPDESCS_DATA7"},
{0x2B0, "GPUREG_VSH_BOOLUNIFORM"},
{0x2B1, "GPUREG_VSH_INTUNIFORM_I0"},
{0x2B2, "GPUREG_VSH_INTUNIFORM_I1"},
{0x2B3, "GPUREG_VSH_INTUNIFORM_I2"},
{0x2B4, "GPUREG_VSH_INTUNIFORM_I3"},
{0x2B9, "GPUREG_VSH_INPUTBUFFER_CONFIG"},
{0x2BA, "GPUREG_VSH_ENTRYPOINT"},
{0x2BB, "GPUREG_VSH_ATTRIBUTES_PERMUTATION_LOW"},
{0x2BC, "GPUREG_VSH_ATTRIBUTES_PERMUTATION_HIGH"},
{0x2BD, "GPUREG_VSH_OUTMAP_MASK"},
{0x2BF, "GPUREG_VSH_CODETRANSFER_END"},
{0x2C0, "GPUREG_VSH_FLOATUNIFORM_INDEX"},
{0x2C1, "GPUREG_VSH_FLOATUNIFORM_DATA0"},
{0x2C2, "GPUREG_VSH_FLOATUNIFORM_DATA1"},
{0x2C3, "GPUREG_VSH_FLOATUNIFORM_DATA2"},
{0x2C4, "GPUREG_VSH_FLOATUNIFORM_DATA3"},
{0x2C5, "GPUREG_VSH_FLOATUNIFORM_DATA4"},
{0x2C6, "GPUREG_VSH_FLOATUNIFORM_DATA5"},
{0x2C7, "GPUREG_VSH_FLOATUNIFORM_DATA6"},
{0x2C8, "GPUREG_VSH_FLOATUNIFORM_DATA7"},
{0x2CB, "GPUREG_VSH_CODETRANSFER_INDEX"},
{0x2CC, "GPUREG_VSH_CODETRANSFER_DATA0"},
{0x2CD, "GPUREG_VSH_CODETRANSFER_DATA1"},
{0x2CE, "GPUREG_VSH_CODETRANSFER_DATA2"},
{0x2CF, "GPUREG_VSH_CODETRANSFER_DATA3"},
{0x2D0, "GPUREG_VSH_CODETRANSFER_DATA4"},
{0x2D1, "GPUREG_VSH_CODETRANSFER_DATA5"},
{0x2D2, "GPUREG_VSH_CODETRANSFER_DATA6"},
{0x2D3, "GPUREG_VSH_CODETRANSFER_DATA7"},
{0x2D5, "GPUREG_VSH_OPDESCS_INDEX"},
{0x2D6, "GPUREG_VSH_OPDESCS_DATA0"},
{0x2D7, "GPUREG_VSH_OPDESCS_DATA1"},
{0x2D8, "GPUREG_VSH_OPDESCS_DATA2"},
{0x2D9, "GPUREG_VSH_OPDESCS_DATA3"},
{0x2DA, "GPUREG_VSH_OPDESCS_DATA4"},
{0x2DB, "GPUREG_VSH_OPDESCS_DATA5"},
{0x2DC, "GPUREG_VSH_OPDESCS_DATA6"},
{0x2DD, "GPUREG_VSH_OPDESCS_DATA7"},
};
const char* RegsInternal::GetRegisterName(u16 index) {
const auto it = std::lower_bound(std::begin(register_names), std::end(register_names), index,
[](auto p, auto i) { return p.first < i; });
if (it->first == index) {
return it->second;
}
// Return empty string if no match is found
return "";
}
} // namespace Pica

View File

@@ -0,0 +1,116 @@
// Copyright 2017 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "video_core/pica/regs_framebuffer.h"
#include "video_core/pica/regs_lighting.h"
#include "video_core/pica/regs_pipeline.h"
#include "video_core/pica/regs_rasterizer.h"
#include "video_core/pica/regs_shader.h"
#include "video_core/pica/regs_texturing.h"
namespace Pica {
#define PICA_REG_INDEX(field_name) (offsetof(Pica::RegsInternal, field_name) / sizeof(u32))
struct RegsInternal {
static constexpr std::size_t NUM_REGS = 0x300;
union {
struct {
INSERT_PADDING_WORDS(0x10);
u32 trigger_irq;
INSERT_PADDING_WORDS(0x2f);
RasterizerRegs rasterizer;
TexturingRegs texturing;
FramebufferRegs framebuffer;
LightingRegs lighting;
PipelineRegs pipeline;
ShaderRegs gs;
ShaderRegs vs;
INSERT_PADDING_WORDS(0x20);
};
std::array<u32, NUM_REGS> reg_array;
};
/// Map register indices to names readable by humans
static const char* GetRegisterName(u16 index);
};
static_assert(sizeof(RegsInternal) == RegsInternal::NUM_REGS * sizeof(u32),
"Regs struct has wrong size");
#define ASSERT_REG_POSITION(field_name, position) \
static_assert(offsetof(RegsInternal, field_name) == position * 4, \
"Field " #field_name " has invalid position")
ASSERT_REG_POSITION(trigger_irq, 0x10);
ASSERT_REG_POSITION(rasterizer, 0x40);
ASSERT_REG_POSITION(rasterizer.cull_mode, 0x40);
ASSERT_REG_POSITION(rasterizer.viewport_size_x, 0x41);
ASSERT_REG_POSITION(rasterizer.viewport_size_y, 0x43);
ASSERT_REG_POSITION(rasterizer.viewport_depth_range, 0x4d);
ASSERT_REG_POSITION(rasterizer.viewport_depth_near_plane, 0x4e);
ASSERT_REG_POSITION(rasterizer.vs_output_attributes[0], 0x50);
ASSERT_REG_POSITION(rasterizer.vs_output_attributes[1], 0x51);
ASSERT_REG_POSITION(rasterizer.scissor_test, 0x65);
ASSERT_REG_POSITION(rasterizer.viewport_corner, 0x68);
ASSERT_REG_POSITION(rasterizer.depthmap_enable, 0x6D);
ASSERT_REG_POSITION(texturing, 0x80);
ASSERT_REG_POSITION(texturing.main_config, 0x80);
ASSERT_REG_POSITION(texturing.texture0, 0x81);
ASSERT_REG_POSITION(texturing.texture0_format, 0x8e);
ASSERT_REG_POSITION(texturing.fragment_lighting_enable, 0x8f);
ASSERT_REG_POSITION(texturing.texture1, 0x91);
ASSERT_REG_POSITION(texturing.texture1_format, 0x96);
ASSERT_REG_POSITION(texturing.texture2, 0x99);
ASSERT_REG_POSITION(texturing.texture2_format, 0x9e);
ASSERT_REG_POSITION(texturing.proctex, 0xa8);
ASSERT_REG_POSITION(texturing.proctex_noise_u, 0xa9);
ASSERT_REG_POSITION(texturing.proctex_noise_v, 0xaa);
ASSERT_REG_POSITION(texturing.proctex_noise_frequency, 0xab);
ASSERT_REG_POSITION(texturing.proctex_lut, 0xac);
ASSERT_REG_POSITION(texturing.proctex_lut_offset, 0xad);
ASSERT_REG_POSITION(texturing.proctex_lut_config, 0xaf);
ASSERT_REG_POSITION(texturing.tev_stage0, 0xc0);
ASSERT_REG_POSITION(texturing.tev_stage1, 0xc8);
ASSERT_REG_POSITION(texturing.tev_stage2, 0xd0);
ASSERT_REG_POSITION(texturing.tev_stage3, 0xd8);
ASSERT_REG_POSITION(texturing.tev_combiner_buffer_input, 0xe0);
ASSERT_REG_POSITION(texturing.fog_mode, 0xe0);
ASSERT_REG_POSITION(texturing.fog_color, 0xe1);
ASSERT_REG_POSITION(texturing.fog_lut_offset, 0xe6);
ASSERT_REG_POSITION(texturing.fog_lut_data, 0xe8);
ASSERT_REG_POSITION(texturing.tev_stage4, 0xf0);
ASSERT_REG_POSITION(texturing.tev_stage5, 0xf8);
ASSERT_REG_POSITION(texturing.tev_combiner_buffer_color, 0xfd);
ASSERT_REG_POSITION(framebuffer, 0x100);
ASSERT_REG_POSITION(framebuffer.output_merger, 0x100);
ASSERT_REG_POSITION(framebuffer.framebuffer, 0x110);
ASSERT_REG_POSITION(lighting, 0x140);
ASSERT_REG_POSITION(pipeline, 0x200);
ASSERT_REG_POSITION(pipeline.vertex_attributes, 0x200);
ASSERT_REG_POSITION(pipeline.index_array, 0x227);
ASSERT_REG_POSITION(pipeline.num_vertices, 0x228);
ASSERT_REG_POSITION(pipeline.vertex_offset, 0x22a);
ASSERT_REG_POSITION(pipeline.trigger_draw, 0x22e);
ASSERT_REG_POSITION(pipeline.trigger_draw_indexed, 0x22f);
ASSERT_REG_POSITION(pipeline.vs_default_attributes_setup, 0x232);
ASSERT_REG_POSITION(pipeline.command_buffer, 0x238);
ASSERT_REG_POSITION(pipeline.gpu_mode, 0x245);
ASSERT_REG_POSITION(pipeline.triangle_topology, 0x25e);
ASSERT_REG_POSITION(pipeline.restart_primitive, 0x25f);
ASSERT_REG_POSITION(gs, 0x280);
ASSERT_REG_POSITION(vs, 0x2b0);
#undef ASSERT_REG_POSITION
} // namespace Pica

View File

@@ -0,0 +1,78 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <boost/serialization/access.hpp>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/vector_math.h"
#define LCD_REG_INDEX(field_name) (offsetof(Pica::RegsLcd, field_name) / sizeof(u32))
namespace Pica {
union ColorFill {
u32 raw;
BitField<0, 8, u32> color_r;
BitField<8, 8, u32> color_g;
BitField<16, 8, u32> color_b;
BitField<24, 1, u32> is_enabled;
Common::Vec3<u8> AsVector() const noexcept {
return Common::MakeVec<u8>(color_r, color_g, color_b);
}
};
struct RegsLcd {
INSERT_PADDING_WORDS(0x81);
ColorFill color_fill_top;
INSERT_PADDING_WORDS(0xE);
u32 backlight_top;
INSERT_PADDING_WORDS(0x1F0);
ColorFill color_fill_bottom;
INSERT_PADDING_WORDS(0xE);
u32 backlight_bottom;
INSERT_PADDING_WORDS(0x16F);
static constexpr std::size_t NumIds() {
return sizeof(RegsLcd) / sizeof(u32);
}
const u32& operator[](int index) const {
const u32* content = reinterpret_cast<const u32*>(this);
return content[index];
}
u32& operator[](int index) {
u32* content = reinterpret_cast<u32*>(this);
return content[index];
}
private:
template <class Archive>
void serialize(Archive& ar, const unsigned int) {
ar& color_fill_top.raw;
ar& backlight_top;
ar& color_fill_bottom.raw;
ar& backlight_bottom;
}
friend class boost::serialization::access;
};
static_assert(std::is_standard_layout_v<RegsLcd>, "Structure does not use standard layout");
#define ASSERT_REG_POSITION(field_name, position) \
static_assert(offsetof(RegsLcd, field_name) == position * 4, \
"Field " #field_name " has invalid position")
ASSERT_REG_POSITION(color_fill_top, 0x81);
ASSERT_REG_POSITION(backlight_top, 0x90);
ASSERT_REG_POSITION(color_fill_bottom, 0x281);
ASSERT_REG_POSITION(backlight_bottom, 0x290);
#undef ASSERT_REG_POSITION
} // namespace Pica

View File

@@ -0,0 +1,327 @@
// Copyright 2017 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/vector_math.h"
namespace Pica {
struct LightingRegs {
enum class LightingSampler {
Distribution0 = 0,
Distribution1 = 1,
Fresnel = 3,
ReflectBlue = 4,
ReflectGreen = 5,
ReflectRed = 6,
SpotlightAttenuation = 8,
DistanceAttenuation = 16,
};
static constexpr u32 NumLightingSampler = 24;
static LightingSampler SpotlightAttenuationSampler(u32 index) {
return static_cast<LightingSampler>(
static_cast<u32>(LightingSampler::SpotlightAttenuation) + index);
}
static LightingSampler DistanceAttenuationSampler(u32 index) {
return static_cast<LightingSampler>(static_cast<u32>(LightingSampler::DistanceAttenuation) +
index);
}
/**
* Pica fragment lighting supports using different LUTs for each lighting component: Reflectance
* R, G, and B channels, distribution function for specular components 0 and 1, fresnel factor,
* and spotlight attenuation. Furthermore, which LUTs are used for each channel (or whether a
* channel is enabled at all) is specified by various pre-defined lighting configurations. With
* configurations that require more LUTs, more cycles are required on HW to perform lighting
* computations.
*/
enum class LightingConfig : u32 {
Config0 = 0, ///< Reflect Red, Distribution 0, Spotlight
Config1 = 1, ///< Reflect Red, Fresnel, Spotlight
Config2 = 2, ///< Reflect Red, Distribution 0/1
Config3 = 3, ///< Distribution 0/1, Fresnel
Config4 = 4, ///< Reflect Red/Green/Blue, Distribution 0/1, Spotlight
Config5 = 5, ///< Reflect Red/Green/Blue, Distribution 0, Fresnel, Spotlight
Config6 = 6, ///< Reflect Red, Distribution 0/1, Fresnel, Spotlight
Config7 = 8, ///< Reflect Red/Green/Blue, Distribution 0/1, Fresnel, Spotlight
///< NOTE: '8' is intentional, '7' does not appear to be a valid configuration
};
/// Factor used to scale the output of a lighting LUT
enum class LightingScale : u32 {
Scale1 = 0, ///< Scale is 1x
Scale2 = 1, ///< Scale is 2x
Scale4 = 2, ///< Scale is 4x
Scale8 = 3, ///< Scale is 8x
Scale1_4 = 6, ///< Scale is 0.25x
Scale1_2 = 7, ///< Scale is 0.5x
};
enum class LightingLutInput : u32 {
NH = 0, // Cosine of the angle between the normal and half-angle vectors
VH = 1, // Cosine of the angle between the view and half-angle vectors
NV = 2, // Cosine of the angle between the normal and the view vector
LN = 3, // Cosine of the angle between the light and the normal vectors
SP = 4, // Cosine of the angle between the light and the inverse spotlight vectors
CP = 5, // Cosine of the angle between the tangent and projection of half-angle vectors
};
enum class LightingBumpMode : u32 {
None = 0,
NormalMap = 1,
TangentMap = 2,
};
union LightColor {
BitField<0, 10, u32> b;
BitField<10, 10, u32> g;
BitField<20, 10, u32> r;
Common::Vec3f ToVec3f() const {
// These fields are 10 bits wide, however 255 corresponds to 1.0f for each color
// component
return Common::MakeVec((f32)r / 255.f, (f32)g / 255.f, (f32)b / 255.f);
}
};
/// Returns true if the specified lighting sampler is supported by the current Pica lighting
/// configuration
static bool IsLightingSamplerSupported(LightingConfig config, LightingSampler sampler) {
switch (sampler) {
case LightingSampler::Distribution0:
return (config != LightingConfig::Config1);
case LightingSampler::Distribution1:
return (config != LightingConfig::Config0) && (config != LightingConfig::Config1) &&
(config != LightingConfig::Config5);
case LightingSampler::SpotlightAttenuation:
return (config != LightingConfig::Config2) && (config != LightingConfig::Config3);
case LightingSampler::Fresnel:
return (config != LightingConfig::Config0) && (config != LightingConfig::Config2) &&
(config != LightingConfig::Config4);
case LightingSampler::ReflectRed:
return (config != LightingConfig::Config3);
case LightingSampler::ReflectGreen:
case LightingSampler::ReflectBlue:
return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) ||
(config == LightingConfig::Config7);
default:
UNREACHABLE_MSG("Regs::IsLightingSamplerSupported: Reached unreachable section, "
"sampler should be one of Distribution0, Distribution1, "
"SpotlightAttenuation, Fresnel, ReflectRed, ReflectGreen or "
"ReflectBlue, instead got %i",
config);
}
return false;
}
struct LightSrc {
LightColor specular_0; // material.specular_0 * light.specular_0
LightColor specular_1; // material.specular_1 * light.specular_1
LightColor diffuse; // material.diffuse * light.diffuse
LightColor ambient; // material.ambient * light.ambient
// Encoded as 16-bit floating point
union {
BitField<0, 16, u32> x;
BitField<16, 16, u32> y;
};
union {
BitField<0, 16, u32> z;
};
// inverse spotlight direction vector, encoded as fixed1.1.11
union {
BitField<0, 13, s32> spot_x;
BitField<16, 13, s32> spot_y;
};
union {
BitField<0, 13, s32> spot_z;
};
INSERT_PADDING_WORDS(0x1);
union {
BitField<0, 1, u32> directional;
BitField<1, 1, u32> two_sided_diffuse; // When disabled, clamp dot-product to 0
BitField<2, 1, u32> geometric_factor_0;
BitField<3, 1, u32> geometric_factor_1;
} config;
BitField<0, 20, u32> dist_atten_bias;
BitField<0, 20, u32> dist_atten_scale;
INSERT_PADDING_WORDS(0x4);
};
static_assert(sizeof(LightSrc) == 0x10 * sizeof(u32), "LightSrc structure must be 0x10 words");
LightSrc light[8];
LightColor global_ambient; // Emission + (material.ambient * lighting.ambient)
INSERT_PADDING_WORDS(0x1);
BitField<0, 3, u32> max_light_index; // Number of enabled lights - 1
union {
BitField<0, 1, u32> enable_shadow;
BitField<2, 1, u32> enable_primary_alpha;
BitField<3, 1, u32> enable_secondary_alpha;
BitField<4, 4, LightingConfig> config;
BitField<16, 1, u32> shadow_primary;
BitField<17, 1, u32> shadow_secondary;
BitField<18, 1, u32> shadow_invert;
BitField<19, 1, u32> shadow_alpha;
BitField<22, 2, u32> bump_selector; // 0: Texture 0, 1: Texture 1, 2: Texture 2
BitField<24, 2, u32> shadow_selector;
BitField<27, 1, u32> clamp_highlights;
BitField<28, 2, LightingBumpMode> bump_mode;
BitField<30, 1, u32> disable_bump_renorm;
} config0;
union {
u32 raw;
// Each bit specifies whether shadow should be applied for the corresponding light.
BitField<0, 8, u32> disable_shadow;
// Each bit specifies whether spot light attenuation should be applied for the corresponding
// light.
BitField<8, 8, u32> disable_spot_atten;
BitField<16, 1, u32> disable_lut_d0;
BitField<17, 1, u32> disable_lut_d1;
// Note: by intuition, BitField<18, 1, u32> should be disable_lut_sp, but it is actually a
// dummy bit which is always set as 1.
BitField<19, 1, u32> disable_lut_fr;
BitField<20, 1, u32> disable_lut_rr;
BitField<21, 1, u32> disable_lut_rg;
BitField<22, 1, u32> disable_lut_rb;
// Each bit specifies whether distance attenuation should be applied for the corresponding
// light.
BitField<24, 8, u32> disable_dist_atten;
} config1;
bool IsDistAttenDisabled(unsigned index) const {
return (config1.disable_dist_atten & (1 << index)) != 0;
}
bool IsSpotAttenDisabled(unsigned index) const {
return (config1.disable_spot_atten & (1 << index)) != 0;
}
bool IsShadowDisabled(unsigned index) const {
return (config1.disable_shadow & (1 << index)) != 0;
}
union {
BitField<0, 8, u32> index; ///< Index at which to set data in the LUT
BitField<8, 5, u32> type; ///< Type of LUT for which to set data
} lut_config;
BitField<0, 1, u32> disable;
INSERT_PADDING_WORDS(0x1);
// When data is written to any of these registers, it gets written to the lookup table of the
// selected type at the selected index, specified above in the `lut_config` register. With each
// write, `lut_config.index` is incremented. It does not matter which of these registers is
// written to, the behavior will be the same.
u32 lut_data[8];
// These are used to specify if absolute (abs) value should be used for each LUT index. When
// abs mode is disabled, LUT indexes are in the range of (-1.0, 1.0). Otherwise, they are in
// the range of (0.0, 1.0).
union {
BitField<1, 1, u32> disable_d0;
BitField<5, 1, u32> disable_d1;
BitField<9, 1, u32> disable_sp;
BitField<13, 1, u32> disable_fr;
BitField<17, 1, u32> disable_rb;
BitField<21, 1, u32> disable_rg;
BitField<25, 1, u32> disable_rr;
} abs_lut_input;
union {
BitField<0, 3, LightingLutInput> d0;
BitField<4, 3, LightingLutInput> d1;
BitField<8, 3, LightingLutInput> sp;
BitField<12, 3, LightingLutInput> fr;
BitField<16, 3, LightingLutInput> rb;
BitField<20, 3, LightingLutInput> rg;
BitField<24, 3, LightingLutInput> rr;
} lut_input;
union {
BitField<0, 3, LightingScale> d0;
BitField<4, 3, LightingScale> d1;
BitField<8, 3, LightingScale> sp;
BitField<12, 3, LightingScale> fr;
BitField<16, 3, LightingScale> rb;
BitField<20, 3, LightingScale> rg;
BitField<24, 3, LightingScale> rr;
static float GetScale(LightingScale scale) {
switch (scale) {
case LightingScale::Scale1:
return 1.0f;
case LightingScale::Scale2:
return 2.0f;
case LightingScale::Scale4:
return 4.0f;
case LightingScale::Scale8:
return 8.0f;
case LightingScale::Scale1_4:
return 0.25f;
case LightingScale::Scale1_2:
return 0.5f;
}
return 0.0f;
}
} lut_scale;
INSERT_PADDING_WORDS(0x6);
union {
// There are 8 light enable "slots", corresponding to the total number of lights supported
// by Pica. For N enabled lights (specified by register 0x1c2, or 'src_num' above), the
// first N slots below will be set to integers within the range of 0-7, corresponding to the
// actual light that is enabled for each slot.
BitField<0, 3, u32> slot_0;
BitField<4, 3, u32> slot_1;
BitField<8, 3, u32> slot_2;
BitField<12, 3, u32> slot_3;
BitField<16, 3, u32> slot_4;
BitField<20, 3, u32> slot_5;
BitField<24, 3, u32> slot_6;
BitField<28, 3, u32> slot_7;
unsigned GetNum(unsigned index) const {
const unsigned enable_slots[] = {slot_0, slot_1, slot_2, slot_3,
slot_4, slot_5, slot_6, slot_7};
return enable_slots[index];
}
} light_enable;
INSERT_PADDING_WORDS(0x26);
};
static_assert(sizeof(LightingRegs) == 0xC0 * sizeof(u32), "LightingRegs struct has incorrect size");
} // namespace Pica

View File

@@ -0,0 +1,283 @@
// Copyright 2017 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
namespace Pica {
struct PipelineRegs {
enum class VertexAttributeFormat : u32 {
BYTE = 0,
UBYTE = 1,
SHORT = 2,
FLOAT = 3,
};
static u32 GetFormatBytes(VertexAttributeFormat format) {
switch (format) {
case VertexAttributeFormat::FLOAT:
return 4;
case VertexAttributeFormat::SHORT:
return 2;
case VertexAttributeFormat::BYTE:
case VertexAttributeFormat::UBYTE:
return 1;
default:
UNREACHABLE();
}
}
struct {
BitField<1, 28, u32> base_address;
PAddr GetPhysicalBaseAddress() const {
return base_address * 16;
}
// Descriptor for internal vertex attributes
union {
BitField<0, 2, VertexAttributeFormat> format0; // size of one element
BitField<2, 2, u32> size0; // number of elements minus 1
BitField<4, 2, VertexAttributeFormat> format1;
BitField<6, 2, u32> size1;
BitField<8, 2, VertexAttributeFormat> format2;
BitField<10, 2, u32> size2;
BitField<12, 2, VertexAttributeFormat> format3;
BitField<14, 2, u32> size3;
BitField<16, 2, VertexAttributeFormat> format4;
BitField<18, 2, u32> size4;
BitField<20, 2, VertexAttributeFormat> format5;
BitField<22, 2, u32> size5;
BitField<24, 2, VertexAttributeFormat> format6;
BitField<26, 2, u32> size6;
BitField<28, 2, VertexAttributeFormat> format7;
BitField<30, 2, u32> size7;
};
union {
BitField<0, 2, VertexAttributeFormat> format8;
BitField<2, 2, u32> size8;
BitField<4, 2, VertexAttributeFormat> format9;
BitField<6, 2, u32> size9;
BitField<8, 2, VertexAttributeFormat> format10;
BitField<10, 2, u32> size10;
BitField<12, 2, VertexAttributeFormat> format11;
BitField<14, 2, u32> size11;
BitField<16, 12, u32> attribute_mask;
// number of total attributes minus 1
BitField<28, 4, u32> max_attribute_index;
};
VertexAttributeFormat GetFormat(std::size_t n) const {
VertexAttributeFormat formats[] = {format0, format1, format2, format3,
format4, format5, format6, format7,
format8, format9, format10, format11};
return formats[n];
}
u32 GetNumElements(std::size_t n) const {
u32 sizes[] = {size0, size1, size2, size3, size4, size5,
size6, size7, size8, size9, size10, size11};
return sizes[n] + 1;
}
u32 GetElementSizeInBytes(std::size_t n) const {
return (GetFormat(n) == VertexAttributeFormat::FLOAT) ? 4
: (GetFormat(n) == VertexAttributeFormat::SHORT) ? 2
: 1;
}
u32 GetStride(std::size_t n) const {
return GetNumElements(n) * GetElementSizeInBytes(n);
}
bool IsDefaultAttribute(std::size_t id) const {
return (id >= 12) || (attribute_mask & (1ULL << id)) != 0;
}
u32 GetNumTotalAttributes() const {
return max_attribute_index + 1;
}
// Attribute loaders map the source vertex data to input attributes
// This e.g. allows to load different attributes from different memory locations
struct {
// Source attribute data offset from the base address
BitField<0, 28, u32> data_offset;
union {
BitField<0, 4, u32> comp0;
BitField<4, 4, u32> comp1;
BitField<8, 4, u32> comp2;
BitField<12, 4, u32> comp3;
BitField<16, 4, u32> comp4;
BitField<20, 4, u32> comp5;
BitField<24, 4, u32> comp6;
BitField<28, 4, u32> comp7;
};
union {
BitField<0, 4, u32> comp8;
BitField<4, 4, u32> comp9;
BitField<8, 4, u32> comp10;
BitField<12, 4, u32> comp11;
// bytes for a single vertex in this loader
BitField<16, 8, u32> byte_count;
BitField<28, 4, u32> component_count;
};
u32 GetComponent(std::size_t n) const {
u32 components[] = {comp0, comp1, comp2, comp3, comp4, comp5,
comp6, comp7, comp8, comp9, comp10, comp11};
return components[n];
}
} attribute_loaders[12];
} vertex_attributes;
struct {
enum IndexFormat : u32 {
BYTE = 0,
SHORT = 1,
};
union {
BitField<0, 28, u32> offset; // relative to base attribute address
BitField<28, 3, u32> unused;
BitField<31, 1, IndexFormat> format;
};
} index_array;
// Number of vertices to render
u32 num_vertices;
enum class UseGS : u32 {
No = 0,
Yes = 2,
};
union {
BitField<0, 2, UseGS> use_gs;
BitField<31, 1, u32> variable_primitive;
};
// The index of the first vertex to render
u32 vertex_offset;
INSERT_PADDING_WORDS(0x3);
// These two trigger rendering of triangles
u32 trigger_draw;
u32 trigger_draw_indexed;
INSERT_PADDING_WORDS(0x2);
// These registers are used to setup the default "fall-back" vertex shader attributes
struct {
// Index of the current default attribute
u32 index;
// Writing to these registers sets the "current" default attribute.
u32 set_value[3];
} vs_default_attributes_setup;
INSERT_PADDING_WORDS(0x2);
struct {
// There are two channels that can be used to configure the next command buffer, which can
// be then executed by writing to the "trigger" registers. There are two reasons why a game
// might use this feature:
// 1) With this, an arbitrary number of additional command buffers may be executed in
// sequence without requiring any intervention of the CPU after the initial one is
// kicked off.
// 2) Games can configure these registers to provide a command list subroutine mechanism.
// TODO: verify the bit length of these two fields
// According to 3dbrew, the bit length of them are 21 and 29, respectively
BitField<0, 20, u32> size[2]; ///< Size (in bytes / 8) of each channel's command buffer
BitField<0, 28, u32> addr[2]; ///< Physical address / 8 of each channel's command buffer
u32 trigger[2]; ///< Triggers execution of the channel's command buffer when written to
u32 GetSize(u32 index) const {
ASSERT(index < 2);
return 8 * size[index];
}
PAddr GetPhysicalAddress(u32 index) const {
ASSERT(index < 2);
return 8 * addr[index];
}
} command_buffer;
INSERT_PADDING_WORDS(4);
/// Number of input attributes to the vertex shader minus 1
BitField<0, 4, u32> max_input_attrib_index;
INSERT_PADDING_WORDS(1);
// The shader unit 3, which can be used for both vertex and geometry shader, gets its
// configuration depending on this register. If this is not set, unit 3 will share some
// configuration with other units. It is known that program code and swizzle pattern uploaded
// via regs.vs will be also uploaded to unit 3 if this is not set. Although very likely, it is
// still unclear whether uniforms and other configuration can be also shared.
BitField<0, 1, u32> gs_unit_exclusive_configuration;
enum class GPUMode : u32 {
Drawing = 0,
Configuring = 1,
};
GPUMode gpu_mode;
INSERT_PADDING_WORDS(0x4);
BitField<0, 4, u32> vs_outmap_total_minus_1_a;
INSERT_PADDING_WORDS(0x6);
BitField<0, 4, u32> vs_outmap_total_minus_1_b;
enum class GSMode : u32 {
Point = 0,
VariablePrimitive = 1,
FixedPrimitive = 2,
};
union {
BitField<0, 8, GSMode> mode;
BitField<8, 4, u32> fixed_vertex_num_minus_1;
BitField<12, 4, u32> stride_minus_1;
BitField<16, 4, u32> start_index;
} gs_config;
INSERT_PADDING_WORDS(0x1);
u32 variable_vertex_main_num_minus_1;
INSERT_PADDING_WORDS(0x9);
enum class TriangleTopology : u32 {
List = 0,
Strip = 1,
Fan = 2,
Shader = 3, // Programmable setup unit implemented in a geometry shader
};
BitField<8, 2, TriangleTopology> triangle_topology;
u32 restart_primitive;
INSERT_PADDING_WORDS(0x20);
};
static_assert(sizeof(PipelineRegs) == 0x80 * sizeof(u32), "PipelineRegs struct has incorrect size");
} // namespace Pica

View File

@@ -0,0 +1,165 @@
// Copyright 2017 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/math_util.h"
#include "common/vector_math.h"
#include "video_core/pica_types.h"
namespace Pica {
struct RasterizerRegs {
enum class CullMode : u32 {
// Select which polygons are considered to be "frontfacing".
KeepAll = 0,
KeepClockWise = 1,
KeepCounterClockWise = 2,
// TODO: What does the third value imply?
};
union {
BitField<0, 2, CullMode> cull_mode;
};
BitField<0, 24, u32> viewport_size_x;
INSERT_PADDING_WORDS(0x1);
BitField<0, 24, u32> viewport_size_y;
INSERT_PADDING_WORDS(0x3);
BitField<0, 1, u32> clip_enable;
BitField<0, 24, u32> clip_coef[4]; // float24
Common::Vec4<f24> GetClipCoef() const {
return {f24::FromRaw(clip_coef[0]), f24::FromRaw(clip_coef[1]), f24::FromRaw(clip_coef[2]),
f24::FromRaw(clip_coef[3])};
}
Common::Rectangle<s32> GetViewportRect() const {
return {
// These registers hold half-width and half-height, so must be multiplied by 2
viewport_corner.x, // left
viewport_corner.y + // top
static_cast<s32>(f24::FromRaw(viewport_size_y).ToFloat32() * 2),
viewport_corner.x + // right
static_cast<s32>(f24::FromRaw(viewport_size_x).ToFloat32() * 2),
viewport_corner.y // bottom
};
}
INSERT_PADDING_WORDS(0x1);
BitField<0, 24, u32> viewport_depth_range; // float24
BitField<0, 24, u32> viewport_depth_near_plane; // float24
BitField<0, 3, u32> vs_output_total;
union VSOutputAttributes {
// Maps components of output vertex attributes to semantics
enum Semantic : u32 {
POSITION_X = 0,
POSITION_Y = 1,
POSITION_Z = 2,
POSITION_W = 3,
QUATERNION_X = 4,
QUATERNION_Y = 5,
QUATERNION_Z = 6,
QUATERNION_W = 7,
COLOR_R = 8,
COLOR_G = 9,
COLOR_B = 10,
COLOR_A = 11,
TEXCOORD0_U = 12,
TEXCOORD0_V = 13,
TEXCOORD1_U = 14,
TEXCOORD1_V = 15,
TEXCOORD0_W = 16,
VIEW_X = 18,
VIEW_Y = 19,
VIEW_Z = 20,
TEXCOORD2_U = 22,
TEXCOORD2_V = 23,
INVALID = 31,
};
BitField<0, 5, Semantic> map_x;
BitField<8, 5, Semantic> map_y;
BitField<16, 5, Semantic> map_z;
BitField<24, 5, Semantic> map_w;
u32 raw;
} vs_output_attributes[7];
void ValidateSemantics() {
for (std::size_t attrib = 0; attrib < vs_output_total; ++attrib) {
const u32 output_register_map = vs_output_attributes[attrib].raw;
for (std::size_t comp = 0; comp < 4; ++comp) {
const u32 semantic = (output_register_map >> (8 * comp)) & 0x1F;
ASSERT_MSG(semantic < 24 || semantic == VSOutputAttributes::INVALID,
"Invalid/unknown semantic id: {}", semantic);
}
}
}
INSERT_PADDING_WORDS(0xe);
enum class ScissorMode : u32 {
Disabled = 0,
Exclude = 1, // Exclude pixels inside the scissor box
Include = 3 // Exclude pixels outside the scissor box
};
struct {
BitField<0, 2, ScissorMode> mode;
union {
BitField<0, 10, u32> x1;
BitField<16, 10, u32> y1;
};
union {
BitField<0, 10, u32> x2;
BitField<16, 10, u32> y2;
};
} scissor_test;
union {
BitField<0, 10, s32> x;
BitField<16, 10, s32> y;
} viewport_corner;
INSERT_PADDING_WORDS(0x1);
// TODO: early depth
INSERT_PADDING_WORDS(0x1);
INSERT_PADDING_WORDS(0x2);
enum DepthBuffering : u32 {
WBuffering = 0,
ZBuffering = 1,
};
BitField<0, 1, DepthBuffering> depthmap_enable;
INSERT_PADDING_WORDS(0x12);
};
static_assert(sizeof(RasterizerRegs) == 0x40 * sizeof(u32),
"RasterizerRegs struct has incorrect size");
} // namespace Pica

View File

@@ -0,0 +1,114 @@
// Copyright 2017 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/vector_math.h"
namespace Pica {
struct ShaderRegs {
BitField<0, 16, u32> bool_uniforms;
union {
BitField<0, 8, u32> x;
BitField<8, 8, u32> y;
BitField<16, 8, u32> z;
BitField<24, 8, u32> w;
} int_uniforms[4];
Common::Vec4<u8> GetIntUniform(u32 index) const {
const auto& values = int_uniforms[index];
return Common::MakeVec<u8>(values.x, values.y, values.z, values.w);
}
INSERT_PADDING_WORDS(0x4);
enum ShaderMode {
GS = 0x08,
VS = 0xA0,
};
union {
// Number of input attributes to shader unit - 1
BitField<0, 4, u32> max_input_attribute_index;
BitField<8, 8, u32> input_to_uniform;
BitField<24, 8, ShaderMode> shader_mode;
};
// Offset to shader program entry point (in words)
BitField<0, 16, u32> main_offset;
/// Maps input attributes to registers. 4-bits per attribute, specifying a register index
u32 input_attribute_to_register_map_low;
u32 input_attribute_to_register_map_high;
u32 GetRegisterForAttribute(std::size_t attribute_index) const {
const u64 map = (static_cast<u64>(input_attribute_to_register_map_high) << 32) |
static_cast<u64>(input_attribute_to_register_map_low);
return static_cast<u32>((map >> (attribute_index * 4)) & 0b1111);
}
BitField<0, 16, u32> output_mask;
// 0x28E, CODETRANSFER_END
INSERT_PADDING_WORDS(0x2);
struct {
enum class Format : u32 {
Float24 = 0,
Float32 = 1,
};
bool IsFloat32() const {
return format == Format::Float32;
}
union {
// Index of the next uniform to write to
// TODO: ctrulib uses 8 bits for this, however that seems to yield lots of invalid
// indices
// TODO: Maybe the uppermost index is for the geometry shader? Investigate!
BitField<0, 7, u32> index;
BitField<31, 1, Format> format;
};
// Writing to these registers sets the current uniform.
u32 set_value[8];
} uniform_setup;
INSERT_PADDING_WORDS(0x2);
struct {
// Offset of the next instruction to write code to.
// Incremented with each instruction write.
u32 offset;
// Writing to these registers sets the "current" word in the shader program.
u32 set_word[8];
} program;
INSERT_PADDING_WORDS(0x1);
// This register group is used to load an internal table of swizzling patterns,
// which are indexed by each shader instruction to specify vector component swizzling.
struct {
// Offset of the next swizzle pattern to write code to.
// Incremented with each instruction write.
u32 offset;
// Writing to these registers sets the current swizzle pattern in the table.
u32 set_word[8];
} swizzle_patterns;
INSERT_PADDING_WORDS(0x2);
};
static_assert(sizeof(ShaderRegs) == 0x30 * sizeof(u32), "ShaderRegs struct has incorrect size");
} // namespace Pica

View File

@@ -0,0 +1,465 @@
// Copyright 2017 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
namespace Pica {
struct TexturingRegs {
struct TextureConfig {
enum TextureType : u32 {
Texture2D = 0,
TextureCube = 1,
Shadow2D = 2,
Projection2D = 3,
ShadowCube = 4,
Disabled = 5,
};
enum WrapMode : u32 {
ClampToEdge = 0,
ClampToBorder = 1,
Repeat = 2,
MirroredRepeat = 3,
// Mode 4-7 produces some weird result and may be just invalid:
ClampToEdge2 = 4, // Positive coord: clamp to edge; negative coord: repeat
ClampToBorder2 = 5, // Positive coord: clamp to border; negative coord: repeat
Repeat2 = 6, // Same as Repeat
Repeat3 = 7, // Same as Repeat
};
enum TextureFilter : u32 {
Nearest = 0,
Linear = 1,
};
union {
u32 raw;
BitField<0, 8, u32> r;
BitField<8, 8, u32> g;
BitField<16, 8, u32> b;
BitField<24, 8, u32> a;
} border_color;
union {
BitField<0, 11, u32> height;
BitField<16, 11, u32> width;
};
union {
BitField<1, 1, TextureFilter> mag_filter;
BitField<2, 1, TextureFilter> min_filter;
BitField<8, 3, WrapMode> wrap_t;
BitField<12, 3, WrapMode> wrap_s;
BitField<24, 1, TextureFilter> mip_filter;
/// @note Only valid for texture 0 according to 3DBrew.
BitField<28, 3, TextureType> type;
};
union {
BitField<0, 13, s32> bias; // fixed1.4.8
BitField<16, 4, u32> max_level;
BitField<24, 4, u32> min_level;
} lod;
BitField<0, 28, u32> address;
PAddr GetPhysicalAddress() const {
return address * 8;
}
// texture1 and texture2 store the texture format directly after the address
// whereas texture0 inserts some additional flags inbetween.
// Hence, we store the format separately so that all other parameters can be described
// in a single structure.
};
enum class TextureFormat : u32 {
RGBA8 = 0,
RGB8 = 1,
RGB5A1 = 2,
RGB565 = 3,
RGBA4 = 4,
IA8 = 5,
RG8 = 6, ///< @note Also called HILO8 in 3DBrew.
I8 = 7,
A8 = 8,
IA4 = 9,
I4 = 10,
A4 = 11,
ETC1 = 12, // compressed
ETC1A4 = 13, // compressed
};
static u32 NibblesPerPixel(TextureFormat format) {
switch (format) {
case TextureFormat::RGBA8:
return 8;
case TextureFormat::RGB8:
return 6;
case TextureFormat::RGB5A1:
case TextureFormat::RGB565:
case TextureFormat::RGBA4:
case TextureFormat::IA8:
case TextureFormat::RG8:
return 4;
case TextureFormat::I4:
case TextureFormat::A4:
return 1;
case TextureFormat::I8:
case TextureFormat::A8:
case TextureFormat::IA4:
return 2;
default: // placeholder for yet unknown formats
UNIMPLEMENTED();
return 0;
}
}
union {
BitField<0, 1, u32> texture0_enable;
BitField<1, 1, u32> texture1_enable;
BitField<2, 1, u32> texture2_enable;
BitField<8, 2, u32> texture3_coordinates;
BitField<10, 1, u32> texture3_enable;
BitField<13, 1, u32> texture2_use_coord1;
BitField<16, 1, u32> clear_texture_cache; // TODO: unimplemented
} main_config;
TextureConfig texture0;
enum class CubeFace {
PositiveX = 0,
NegativeX = 1,
PositiveY = 2,
NegativeY = 3,
PositiveZ = 4,
NegativeZ = 5,
};
BitField<0, 22, u32> cube_address[5];
PAddr GetCubePhysicalAddress(CubeFace face) const {
PAddr address = texture0.address;
if (face != CubeFace::PositiveX) {
// Bits [22:27] from the main texture address is shared with all cubemap additional
// addresses.
auto& face_addr = cube_address[static_cast<std::size_t>(face) - 1];
address &= ~face_addr.mask;
address |= face_addr;
}
// A multiplier of 8 is also needed in the same way as the main address.
return address * 8;
}
union {
BitField<0, 1, u32> orthographic; // 0: enable perspective divide
BitField<1, 23, u32> bias; // 23-bit fraction
} shadow;
INSERT_PADDING_WORDS(0x2);
BitField<0, 4, TextureFormat> texture0_format;
BitField<0, 1, u32> fragment_lighting_enable;
INSERT_PADDING_WORDS(0x1);
TextureConfig texture1;
BitField<0, 4, TextureFormat> texture1_format;
INSERT_PADDING_WORDS(0x2);
TextureConfig texture2;
BitField<0, 4, TextureFormat> texture2_format;
INSERT_PADDING_WORDS(0x9);
struct FullTextureConfig {
const bool enabled;
const TextureConfig config;
const TextureFormat format;
};
const std::array<FullTextureConfig, 3> GetTextures() const {
return {{
{static_cast<bool>(main_config.texture0_enable), texture0, texture0_format},
{static_cast<bool>(main_config.texture1_enable), texture1, texture1_format},
{static_cast<bool>(main_config.texture2_enable), texture2, texture2_format},
}};
}
// 0xa8-0xad: ProcTex Config
enum class ProcTexClamp : u32 {
ToZero = 0,
ToEdge = 1,
SymmetricalRepeat = 2,
MirroredRepeat = 3,
Pulse = 4,
};
enum class ProcTexCombiner : u32 {
U = 0, // u
U2 = 1, // u * u
V = 2, // v
V2 = 3, // v * v
Add = 4, // (u + v) / 2
Add2 = 5, // (u * u + v * v) / 2
SqrtAdd2 = 6, // sqrt(u * u + v * v)
Min = 7, // min(u, v)
Max = 8, // max(u, v)
RMax = 9, // Average of Max and SqrtAdd2
};
enum class ProcTexShift : u32 {
None = 0,
Odd = 1,
Even = 2,
};
union {
BitField<0, 3, ProcTexClamp> u_clamp;
BitField<3, 3, ProcTexClamp> v_clamp;
BitField<6, 4, ProcTexCombiner> color_combiner;
BitField<10, 4, ProcTexCombiner> alpha_combiner;
BitField<14, 1, u32> separate_alpha;
BitField<15, 1, u32> noise_enable;
BitField<16, 2, ProcTexShift> u_shift;
BitField<18, 2, ProcTexShift> v_shift;
BitField<20, 8, u32> bias_low; // float16 TODO: unimplemented
} proctex;
union ProcTexNoiseConfig {
BitField<0, 16, s32> amplitude; // fixed1.3.12
BitField<16, 16, u32> phase; // float16
};
ProcTexNoiseConfig proctex_noise_u;
ProcTexNoiseConfig proctex_noise_v;
union {
BitField<0, 16, u32> u; // float16
BitField<16, 16, u32> v; // float16
} proctex_noise_frequency;
enum class ProcTexFilter : u32 {
Nearest = 0,
Linear = 1,
NearestMipmapNearest = 2,
LinearMipmapNearest = 3,
NearestMipmapLinear = 4,
LinearMipmapLinear = 5,
};
union {
BitField<0, 3, ProcTexFilter> filter;
BitField<3, 4, u32> lod_min;
BitField<7, 4, u32> lod_max;
BitField<11, 8, u32> width;
BitField<19, 8, u32> bias_high; // TODO: unimplemented
} proctex_lut;
union {
BitField<0, 8, u32> level0;
BitField<8, 8, u32> level1;
BitField<16, 8, u32> level2;
BitField<24, 8, u32> level3;
} proctex_lut_offset;
INSERT_PADDING_WORDS(0x1);
// 0xaf-0xb7: ProcTex LUT
enum class ProcTexLutTable : u32 {
Noise = 0,
ColorMap = 2,
AlphaMap = 3,
Color = 4,
ColorDiff = 5,
};
union {
BitField<0, 8, u32> index;
BitField<8, 4, ProcTexLutTable> ref_table;
} proctex_lut_config;
u32 proctex_lut_data[8];
INSERT_PADDING_WORDS(0x8);
// 0xc0-0xff: Texture Combiner (akin to glTexEnv)
struct TevStageConfig {
enum class Source : u32 {
PrimaryColor = 0x0,
PrimaryFragmentColor = 0x1,
SecondaryFragmentColor = 0x2,
Texture0 = 0x3,
Texture1 = 0x4,
Texture2 = 0x5,
Texture3 = 0x6,
PreviousBuffer = 0xd,
Constant = 0xe,
Previous = 0xf,
};
enum class ColorModifier : u32 {
SourceColor = 0x0,
OneMinusSourceColor = 0x1,
SourceAlpha = 0x2,
OneMinusSourceAlpha = 0x3,
SourceRed = 0x4,
OneMinusSourceRed = 0x5,
SourceGreen = 0x8,
OneMinusSourceGreen = 0x9,
SourceBlue = 0xc,
OneMinusSourceBlue = 0xd,
};
enum class AlphaModifier : u32 {
SourceAlpha = 0x0,
OneMinusSourceAlpha = 0x1,
SourceRed = 0x2,
OneMinusSourceRed = 0x3,
SourceGreen = 0x4,
OneMinusSourceGreen = 0x5,
SourceBlue = 0x6,
OneMinusSourceBlue = 0x7,
};
enum class Operation : u32 {
Replace = 0,
Modulate = 1,
Add = 2,
AddSigned = 3,
Lerp = 4,
Subtract = 5,
Dot3_RGB = 6,
Dot3_RGBA = 7,
MultiplyThenAdd = 8,
AddThenMultiply = 9,
};
union {
u32 sources_raw;
BitField<0, 4, Source> color_source1;
BitField<4, 4, Source> color_source2;
BitField<8, 4, Source> color_source3;
BitField<16, 4, Source> alpha_source1;
BitField<20, 4, Source> alpha_source2;
BitField<24, 4, Source> alpha_source3;
};
union {
u32 modifiers_raw;
BitField<0, 4, ColorModifier> color_modifier1;
BitField<4, 4, ColorModifier> color_modifier2;
BitField<8, 4, ColorModifier> color_modifier3;
BitField<12, 3, AlphaModifier> alpha_modifier1;
BitField<16, 3, AlphaModifier> alpha_modifier2;
BitField<20, 3, AlphaModifier> alpha_modifier3;
};
union {
u32 ops_raw;
BitField<0, 4, Operation> color_op;
BitField<16, 4, Operation> alpha_op;
};
union {
u32 const_color;
BitField<0, 8, u32> const_r;
BitField<8, 8, u32> const_g;
BitField<16, 8, u32> const_b;
BitField<24, 8, u32> const_a;
};
union {
u32 scales_raw;
BitField<0, 2, u32> color_scale;
BitField<16, 2, u32> alpha_scale;
};
inline unsigned GetColorMultiplier() const {
return (color_scale < 3) ? (1 << color_scale) : 1;
}
inline unsigned GetAlphaMultiplier() const {
return (alpha_scale < 3) ? (1 << alpha_scale) : 1;
}
};
TevStageConfig tev_stage0;
INSERT_PADDING_WORDS(0x3);
TevStageConfig tev_stage1;
INSERT_PADDING_WORDS(0x3);
TevStageConfig tev_stage2;
INSERT_PADDING_WORDS(0x3);
TevStageConfig tev_stage3;
INSERT_PADDING_WORDS(0x3);
enum class FogMode : u32 {
None = 0,
Fog = 5,
Gas = 7,
};
union {
BitField<0, 3, FogMode> fog_mode;
BitField<16, 1, u32> fog_flip;
union {
// Tev stages 0-3 write their output to the combiner buffer if the corresponding bit in
// these masks are set
BitField<8, 4, u32> update_mask_rgb;
BitField<12, 4, u32> update_mask_a;
bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
return (stage_index < 4) && (update_mask_rgb & (1 << stage_index));
}
bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
return (stage_index < 4) && (update_mask_a & (1 << stage_index));
}
} tev_combiner_buffer_input;
};
union {
u32 raw;
BitField<0, 8, u32> r;
BitField<8, 8, u32> g;
BitField<16, 8, u32> b;
} fog_color;
INSERT_PADDING_WORDS(0x4);
BitField<0, 16, u32> fog_lut_offset;
INSERT_PADDING_WORDS(0x1);
u32 fog_lut_data[8];
TevStageConfig tev_stage4;
INSERT_PADDING_WORDS(0x3);
TevStageConfig tev_stage5;
union {
u32 raw;
BitField<0, 8, u32> r;
BitField<8, 8, u32> g;
BitField<16, 8, u32> b;
BitField<24, 8, u32> a;
} tev_combiner_buffer_color;
INSERT_PADDING_WORDS(0x2);
const std::array<TevStageConfig, 6> GetTevStages() const {
return {{tev_stage0, tev_stage1, tev_stage2, tev_stage3, tev_stage4, tev_stage5}};
};
};
static_assert(sizeof(TexturingRegs) == 0x80 * sizeof(u32),
"TexturingRegs struct has incorrect size");
} // namespace Pica

View File

@@ -0,0 +1,61 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_set.h"
#include "common/hash.h"
#include "video_core/pica/regs_shader.h"
#include "video_core/pica/shader_setup.h"
namespace Pica {
ShaderSetup::ShaderSetup() = default;
ShaderSetup::~ShaderSetup() = default;
void ShaderSetup::WriteUniformBoolReg(u32 value) {
const auto bits = BitSet32(value);
for (u32 i = 0; i < uniforms.b.size(); ++i) {
uniforms.b[i] = bits[i];
}
}
void ShaderSetup::WriteUniformIntReg(u32 index, const Common::Vec4<u8> values) {
ASSERT(index < uniforms.i.size());
uniforms.i[index] = values;
}
void ShaderSetup::WriteUniformFloatReg(ShaderRegs& config, u32 value) {
auto& uniform_setup = config.uniform_setup;
const bool is_float32 = uniform_setup.IsFloat32();
if (!uniform_queue.Push(value, is_float32)) {
return;
}
const auto uniform = uniform_queue.Get(is_float32);
if (uniform_setup.index >= uniforms.f.size()) {
LOG_ERROR(HW_GPU, "Invalid float uniform index {}", uniform_setup.index.Value());
return;
}
uniforms.f[uniform_setup.index] = uniform;
uniform_setup.index.Assign(uniform_setup.index + 1);
}
u64 ShaderSetup::GetProgramCodeHash() {
if (program_code_hash_dirty) {
program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code));
program_code_hash_dirty = false;
}
return program_code_hash;
}
u64 ShaderSetup::GetSwizzleDataHash() {
if (swizzle_data_hash_dirty) {
swizzle_data_hash = Common::ComputeHash64(&swizzle_data, sizeof(swizzle_data));
swizzle_data_hash_dirty = false;
}
return swizzle_data_hash;
}
} // namespace Pica

View File

@@ -0,0 +1,103 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/vector_math.h"
#include "video_core/pica/packed_attribute.h"
#include "video_core/pica_types.h"
namespace Pica {
constexpr u32 MAX_PROGRAM_CODE_LENGTH = 4096;
constexpr u32 MAX_SWIZZLE_DATA_LENGTH = 4096;
using ProgramCode = std::array<u32, MAX_PROGRAM_CODE_LENGTH>;
using SwizzleData = std::array<u32, MAX_SWIZZLE_DATA_LENGTH>;
struct Uniforms {
alignas(16) std::array<Common::Vec4<f24>, 96> f;
std::array<bool, 16> b;
std::array<Common::Vec4<u8>, 4> i;
static size_t GetFloatUniformOffset(u32 index) {
return offsetof(Uniforms, f) + index * sizeof(Common::Vec4<f24>);
}
static size_t GetBoolUniformOffset(u32 index) {
return offsetof(Uniforms, b) + index * sizeof(bool);
}
static size_t GetIntUniformOffset(u32 index) {
return offsetof(Uniforms, i) + index * sizeof(Common::Vec4<u8>);
}
private:
friend class boost::serialization::access;
template <class Archive>
void serialize(Archive& ar, const u32 file_version) {
ar& f;
ar& b;
ar& i;
}
};
struct ShaderRegs;
/**
* This structure contains the state information common for all shader units such as uniforms.
* The geometry shaders has a unique configuration so when enabled it has its own setup.
*/
struct ShaderSetup {
public:
explicit ShaderSetup();
~ShaderSetup();
void WriteUniformBoolReg(u32 value);
void WriteUniformIntReg(u32 index, const Common::Vec4<u8> values);
void WriteUniformFloatReg(ShaderRegs& config, u32 value);
u64 GetProgramCodeHash();
u64 GetSwizzleDataHash();
void MarkProgramCodeDirty() {
program_code_hash_dirty = true;
}
void MarkSwizzleDataDirty() {
swizzle_data_hash_dirty = true;
}
public:
Uniforms uniforms;
PackedAttribute uniform_queue;
ProgramCode program_code;
SwizzleData swizzle_data;
u32 entry_point;
const void* cached_shader{};
private:
bool program_code_hash_dirty{true};
bool swizzle_data_hash_dirty{true};
u64 program_code_hash{0xDEADC0DE};
u64 swizzle_data_hash{0xDEADC0DE};
friend class boost::serialization::access;
template <class Archive>
void serialize(Archive& ar, const u32 file_version) {
ar& uniforms;
ar& uniform_queue;
ar& program_code;
ar& swizzle_data;
ar& program_code_hash_dirty;
ar& swizzle_data_hash_dirty;
ar& program_code_hash;
ar& swizzle_data_hash;
}
};
} // namespace Pica

View File

@@ -0,0 +1,63 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_set.h"
#include "video_core/pica/regs_shader.h"
#include "video_core/pica/shader_unit.h"
namespace Pica {
ShaderUnit::ShaderUnit(GeometryEmitter* emitter) : emitter_ptr{emitter} {}
ShaderUnit::~ShaderUnit() = default;
void ShaderUnit::LoadInput(const ShaderRegs& config, const AttributeBuffer& buffer) {
const u32 max_attribute = config.max_input_attribute_index;
for (u32 attr = 0; attr <= max_attribute; ++attr) {
const u32 reg = config.GetRegisterForAttribute(attr);
input[reg] = buffer[attr];
}
}
void ShaderUnit::WriteOutput(const ShaderRegs& config, AttributeBuffer& buffer) {
u32 output_index{};
for (u32 reg : Common::BitSet<u32>(config.output_mask)) {
buffer[output_index++] = output[reg];
}
}
void GeometryEmitter::Emit(std::span<Common::Vec4<f24>, 16> output_regs) {
ASSERT(vertex_id < 3);
u32 output_index{};
for (u32 reg : Common::BitSet<u32>(output_mask)) {
buffer[vertex_id][output_index++] = output_regs[reg];
}
if (prim_emit) {
if (winding) {
handlers->winding_setter();
}
for (std::size_t i = 0; i < buffer.size(); ++i) {
handlers->vertex_handler(buffer[i]);
}
}
}
GeometryShaderUnit::GeometryShaderUnit() : ShaderUnit{&emitter} {}
GeometryShaderUnit::~GeometryShaderUnit() = default;
void GeometryShaderUnit::SetVertexHandlers(VertexHandler vertex_handler,
WindingSetter winding_setter) {
emitter.handlers = new Handlers;
emitter.handlers->vertex_handler = vertex_handler;
emitter.handlers->winding_setter = winding_setter;
}
void GeometryShaderUnit::ConfigOutput(const ShaderRegs& config) {
emitter.output_mask = config.output_mask;
}
} // namespace Pica

View File

@@ -0,0 +1,120 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <functional>
#include <span>
#include <boost/serialization/base_object.hpp>
#include "video_core/pica/output_vertex.h"
namespace Pica {
/// Handler type for receiving vertex outputs from vertex shader or geometry shader
using VertexHandler = std::function<void(const AttributeBuffer&)>;
/// Handler type for signaling to invert the vertex order of the next triangle
using WindingSetter = std::function<void()>;
struct ShaderRegs;
struct GeometryEmitter;
/**
* This structure contains the state information that needs to be unique for a shader unit. The 3DS
* has four shader units that process shaders in parallel.
*/
struct ShaderUnit {
explicit ShaderUnit(GeometryEmitter* emitter = nullptr);
~ShaderUnit();
void LoadInput(const ShaderRegs& config, const AttributeBuffer& input);
void WriteOutput(const ShaderRegs& config, AttributeBuffer& output);
static constexpr size_t InputOffset(s32 register_index) {
return offsetof(ShaderUnit, input) + register_index * sizeof(Common::Vec4<f24>);
}
static constexpr size_t OutputOffset(s32 register_index) {
return offsetof(ShaderUnit, output) + register_index * sizeof(Common::Vec4<f24>);
}
static constexpr size_t TemporaryOffset(s32 register_index) {
return offsetof(ShaderUnit, temporary) + register_index * sizeof(Common::Vec4<f24>);
}
public:
s32 address_registers[3];
bool conditional_code[2];
alignas(16) std::array<Common::Vec4<f24>, 16> input;
alignas(16) std::array<Common::Vec4<f24>, 16> temporary;
alignas(16) std::array<Common::Vec4<f24>, 16> output;
GeometryEmitter* emitter_ptr;
private:
friend class boost::serialization::access;
template <class Archive>
void serialize(Archive& ar, const u32 file_version) {
ar& input;
ar& temporary;
ar& output;
ar& conditional_code;
ar& address_registers;
}
};
struct Handlers {
VertexHandler vertex_handler;
WindingSetter winding_setter;
};
/// This structure contains state information for primitive emitting in geometry shader.
struct GeometryEmitter {
void Emit(std::span<Common::Vec4<f24>, 16> output_regs);
public:
std::array<AttributeBuffer, 3> buffer;
u8 vertex_id;
bool prim_emit;
bool winding;
u32 output_mask;
Handlers* handlers;
private:
friend class boost::serialization::access;
template <class Archive>
void serialize(Archive& ar, const u32 file_version) {
ar& buffer;
ar& vertex_id;
ar& prim_emit;
ar& winding;
ar& output_mask;
}
};
/**
* This is an extended shader unit state that represents the special unit that can run both vertex
* shader and geometry shader. It contains an additional primitive emitter and utilities for
* geometry shader.
*/
struct GeometryShaderUnit : public ShaderUnit {
GeometryShaderUnit();
~GeometryShaderUnit();
void SetVertexHandlers(VertexHandler vertex_handler, WindingSetter winding_setter);
void ConfigOutput(const ShaderRegs& config);
GeometryEmitter emitter;
private:
friend class boost::serialization::access;
template <class Archive>
void serialize(Archive& ar, const u32 file_version) {
ar& boost::serialization::base_object<ShaderUnit>(*this);
ar& emitter;
}
};
} // namespace Pica

View File

@@ -0,0 +1,109 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/alignment.h"
#include "common/logging/log.h"
#include "video_core/pica/vertex_loader.h"
namespace Pica {
VertexLoader::VertexLoader(Memory::MemorySystem& memory_, const PipelineRegs& regs)
: memory{memory_} {
const auto& attribute_config = regs.vertex_attributes;
num_total_attributes = attribute_config.GetNumTotalAttributes();
vertex_attribute_sources.fill(0xdeadbeef);
for (u32 i = 0; i < 16; i++) {
vertex_attribute_is_default[i] = attribute_config.IsDefaultAttribute(i);
}
// Setup attribute data from loaders
for (u32 loader = 0; loader < 12; ++loader) {
const auto& loader_config = attribute_config.attribute_loaders[loader];
u32 offset = 0;
// TODO: What happens if a loader overwrites a previous one's data?
for (u32 component = 0; component < loader_config.component_count; ++component) {
if (component >= 12) {
LOG_ERROR(HW_GPU,
"Overflow in the vertex attribute loader {} trying to load component {}",
loader, component);
continue;
}
u32 attribute_index = loader_config.GetComponent(component);
if (attribute_index < 12) {
offset = Common::AlignUp(offset,
attribute_config.GetElementSizeInBytes(attribute_index));
vertex_attribute_sources[attribute_index] = loader_config.data_offset + offset;
vertex_attribute_strides[attribute_index] =
static_cast<u32>(loader_config.byte_count);
vertex_attribute_formats[attribute_index] =
attribute_config.GetFormat(attribute_index);
vertex_attribute_elements[attribute_index] =
attribute_config.GetNumElements(attribute_index);
offset += attribute_config.GetStride(attribute_index);
} else if (attribute_index < 16) {
// Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings,
// respectively
offset = Common::AlignUp(offset, 4);
offset += (attribute_index - 11) * 4;
} else {
UNREACHABLE(); // This is truly unreachable due to the number of bits for each
// component
}
}
}
}
VertexLoader::~VertexLoader() = default;
void VertexLoader::LoadVertex(PAddr base_address, u32 index, u32 vertex, AttributeBuffer& input,
AttributeBuffer& input_default_attributes) const {
for (s32 i = 0; i < num_total_attributes; ++i) {
// Load the default attribute if we're configured to do so
if (vertex_attribute_is_default[i]) {
input[i] = input_default_attributes[i];
continue;
}
// TODO(yuriks): In this case, no data gets loaded and the vertex
// remains with the last value it had. This isn't currently maintained
// as global state, however, and so won't work in Citra yet.
if (vertex_attribute_elements[i] == 0) {
LOG_ERROR(HW_GPU, "Vertex retension unimplemented");
continue;
}
// Load per-vertex data from the loader arrays
const PAddr source_addr =
base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex;
switch (vertex_attribute_formats[i]) {
case PipelineRegs::VertexAttributeFormat::BYTE:
LoadAttribute<s8>(source_addr, i, input);
break;
case PipelineRegs::VertexAttributeFormat::UBYTE:
LoadAttribute<u8>(source_addr, i, input);
break;
case PipelineRegs::VertexAttributeFormat::SHORT:
LoadAttribute<s16>(source_addr, i, input);
break;
case PipelineRegs::VertexAttributeFormat::FLOAT:
LoadAttribute<f32>(source_addr, i, input);
break;
}
// Default attribute values set if array elements have < 4 components. This
// is *not* carried over from the default attribute settings even if they're
// enabled for this attribute.
for (u32 comp = vertex_attribute_elements[i]; comp < 4; comp++) {
input[i][comp] = comp == 3 ? f24::One() : f24::Zero();
}
}
}
} // namespace Pica

View File

@@ -0,0 +1,47 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "core/memory.h"
#include "video_core/pica/output_vertex.h"
#include "video_core/pica/regs_pipeline.h"
namespace Memory {
class MemorySystem;
}
namespace Pica {
class VertexLoader {
public:
explicit VertexLoader(Memory::MemorySystem& memory_, const PipelineRegs& regs);
~VertexLoader();
void LoadVertex(PAddr base_address, u32 index, u32 vertex, AttributeBuffer& input,
AttributeBuffer& input_default_attributes) const;
template <typename T>
void LoadAttribute(PAddr source_addr, u32 attrib, AttributeBuffer& out) const {
const T* data = reinterpret_cast<const T*>(memory.GetPhysicalPointer(source_addr));
for (u32 comp = 0; comp < vertex_attribute_elements[attrib]; ++comp) {
out[attrib][comp] = f24::FromFloat32(data[comp]);
}
}
int GetNumTotalAttributes() const {
return num_total_attributes;
}
private:
Memory::MemorySystem& memory;
std::array<u32, 16> vertex_attribute_sources;
std::array<u32, 16> vertex_attribute_strides{};
std::array<PipelineRegs::VertexAttributeFormat, 16> vertex_attribute_formats;
std::array<u32, 16> vertex_attribute_elements{};
std::array<bool, 16> vertex_attribute_is_default;
int num_total_attributes = 0;
};
} // namespace Pica