video_core: Refactor GPU interface (#7272)

* video_core: Refactor GPU interface

* citra_qt: Better debug widget lifetime
This commit is contained in:
GPUCode
2023-12-28 12:46:57 +02:00
committed by GitHub
parent 602f4f60d8
commit 2bb7f89c30
167 changed files with 4172 additions and 4866 deletions

View File

@@ -4,16 +4,16 @@
#include "common/color.h"
#include "core/core.h"
#include "core/hw/gpu.h"
#include "core/hw/hw.h"
#include "core/hw/lcd.h"
#include "video_core/gpu.h"
#include "video_core/pica/pica_core.h"
#include "video_core/renderer_software/renderer_software.h"
namespace SwRenderer {
RendererSoftware::RendererSoftware(Core::System& system, Frontend::EmuWindow& window)
: VideoCore::RendererBase{system, window, nullptr}, memory{system.Memory()},
rasterizer{system.Memory()} {}
RendererSoftware::RendererSoftware(Core::System& system, Pica::PicaCore& pica_,
Frontend::EmuWindow& window)
: VideoCore::RendererBase{system, window, nullptr}, memory{system.Memory()}, pica{pica_},
rasterizer{memory, pica} {}
RendererSoftware::~RendererSoftware() = default;
@@ -23,15 +23,11 @@ void RendererSoftware::SwapBuffers() {
}
void RendererSoftware::PrepareRenderTarget() {
const auto& regs_lcd = pica.regs_lcd;
for (u32 i = 0; i < 3; i++) {
const int fb_id = i == 2 ? 1 : 0;
u32 lcd_color_addr =
(fb_id == 0) ? LCD_REG_INDEX(color_fill_top) : LCD_REG_INDEX(color_fill_bottom);
lcd_color_addr = HW::VADDR_LCD + 4 * lcd_color_addr;
LCD::Regs::ColorFill color_fill = {0};
LCD::Read(color_fill.raw, lcd_color_addr);
const u32 fb_id = i == 2 ? 1 : 0;
const auto color_fill = fb_id == 0 ? regs_lcd.color_fill_top : regs_lcd.color_fill_bottom;
if (!color_fill.is_enabled) {
LoadFBToScreenInfo(i);
}
@@ -40,12 +36,12 @@ void RendererSoftware::PrepareRenderTarget() {
void RendererSoftware::LoadFBToScreenInfo(int i) {
const u32 fb_id = i == 2 ? 1 : 0;
const auto& framebuffer = GPU::g_regs.framebuffer_config[fb_id];
const auto& framebuffer = pica.regs.framebuffer_config[fb_id];
auto& info = screen_infos[i];
const PAddr framebuffer_addr =
framebuffer.active_fb == 0 ? framebuffer.address_left1 : framebuffer.address_left2;
const s32 bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format);
const s32 bpp = Pica::BytesPerPixel(framebuffer.color_format);
const u8* framebuffer_data = memory.GetPhysicalPointer(framebuffer_addr);
const s32 pixel_stride = framebuffer.stride / bpp;
@@ -58,15 +54,15 @@ void RendererSoftware::LoadFBToScreenInfo(int i) {
const u8* pixel = framebuffer_data + (y * pixel_stride + pixel_stride - x) * bpp;
const Common::Vec4 color = [&] {
switch (framebuffer.color_format) {
case GPU::Regs::PixelFormat::RGBA8:
case Pica::PixelFormat::RGBA8:
return Common::Color::DecodeRGBA8(pixel);
case GPU::Regs::PixelFormat::RGB8:
case Pica::PixelFormat::RGB8:
return Common::Color::DecodeRGB8(pixel);
case GPU::Regs::PixelFormat::RGB565:
case Pica::PixelFormat::RGB565:
return Common::Color::DecodeRGB565(pixel);
case GPU::Regs::PixelFormat::RGB5A1:
case Pica::PixelFormat::RGB5A1:
return Common::Color::DecodeRGB5A1(pixel);
case GPU::Regs::PixelFormat::RGBA4:
case Pica::PixelFormat::RGBA4:
return Common::Color::DecodeRGBA4(pixel);
}
UNREACHABLE();

View File

@@ -21,7 +21,8 @@ struct ScreenInfo {
class RendererSoftware : public VideoCore::RendererBase {
public:
explicit RendererSoftware(Core::System& system, Frontend::EmuWindow& window);
explicit RendererSoftware(Core::System& system, Pica::PicaCore& pica,
Frontend::EmuWindow& window);
~RendererSoftware() override;
[[nodiscard]] VideoCore::RasterizerInterface* Rasterizer() override {
@@ -42,6 +43,7 @@ private:
private:
Memory::MemorySystem& memory;
Pica::PicaCore& pica;
RasterizerSoftware rasterizer;
std::array<ScreenInfo, 3> screen_infos{};
};

View File

@@ -0,0 +1,346 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/alignment.h"
#include "common/color.h"
#include "common/vector_math.h"
#include "core/memory.h"
#include "video_core/pica/regs_external.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_software/sw_blitter.h"
#include "video_core/utils.h"
namespace SwRenderer {
static Common::Vec4<u8> DecodePixel(Pica::PixelFormat input_format, const u8* src_pixel) {
switch (input_format) {
case Pica::PixelFormat::RGBA8:
return Common::Color::DecodeRGBA8(src_pixel);
case Pica::PixelFormat::RGB8:
return Common::Color::DecodeRGB8(src_pixel);
case Pica::PixelFormat::RGB565:
return Common::Color::DecodeRGB565(src_pixel);
case Pica::PixelFormat::RGB5A1:
return Common::Color::DecodeRGB5A1(src_pixel);
case Pica::PixelFormat::RGBA4:
return Common::Color::DecodeRGBA4(src_pixel);
default:
LOG_ERROR(HW_GPU, "Unknown source framebuffer format {:x}", input_format);
return {0, 0, 0, 0};
}
}
SwBlitter::SwBlitter(Memory::MemorySystem& memory_, VideoCore::RasterizerInterface* rasterizer_)
: memory{memory_}, rasterizer{rasterizer_} {}
SwBlitter::~SwBlitter() = default;
void SwBlitter::TextureCopy(const Pica::DisplayTransferConfig& config) {
const PAddr src_addr = config.GetPhysicalInputAddress();
const PAddr dst_addr = config.GetPhysicalOutputAddress();
// TODO: do hwtest with invalid addresses
if (!memory.IsValidPhysicalAddress(src_addr)) {
LOG_CRITICAL(HW_GPU, "invalid input address {:#010X}", src_addr);
return;
}
if (!memory.IsValidPhysicalAddress(dst_addr)) {
LOG_CRITICAL(HW_GPU, "invalid output address {:#010X}", dst_addr);
return;
}
u8* src_pointer = memory.GetPhysicalPointer(src_addr);
u8* dst_pointer = memory.GetPhysicalPointer(dst_addr);
u32 remaining_size = Common::AlignDown(config.texture_copy.size, 16);
if (remaining_size == 0) {
LOG_CRITICAL(HW_GPU, "zero size. Real hardware freezes on this.");
return;
}
u32 input_gap = config.texture_copy.input_gap * 16;
u32 output_gap = config.texture_copy.output_gap * 16;
// Zero gap means contiguous input/output even if width = 0. To avoid infinite loop below, width
// is assigned with the total size if gap = 0.
u32 input_width = input_gap == 0 ? remaining_size : config.texture_copy.input_width * 16;
u32 output_width = output_gap == 0 ? remaining_size : config.texture_copy.output_width * 16;
if (input_width == 0) {
LOG_CRITICAL(HW_GPU, "zero input width. Real hardware freezes on this.");
return;
}
if (output_width == 0) {
LOG_CRITICAL(HW_GPU, "zero output width. Real hardware freezes on this.");
return;
}
const size_t contiguous_input_size =
config.texture_copy.size / input_width * (input_width + input_gap);
rasterizer->FlushRegion(config.GetPhysicalInputAddress(),
static_cast<u32>(contiguous_input_size));
const size_t contiguous_output_size =
config.texture_copy.size / output_width * (output_width + output_gap);
// Only need to flush output if it has a gap
if (output_gap != 0) {
rasterizer->FlushAndInvalidateRegion(dst_addr, static_cast<u32>(contiguous_output_size));
} else {
rasterizer->InvalidateRegion(dst_addr, static_cast<u32>(contiguous_output_size));
}
u32 remaining_input = input_width;
u32 remaining_output = output_width;
while (remaining_size > 0) {
u32 copy_size = std::min({remaining_input, remaining_output, remaining_size});
std::memcpy(dst_pointer, src_pointer, copy_size);
src_pointer += copy_size;
dst_pointer += copy_size;
remaining_input -= copy_size;
remaining_output -= copy_size;
remaining_size -= copy_size;
if (remaining_input == 0) {
remaining_input = input_width;
src_pointer += input_gap;
}
if (remaining_output == 0) {
remaining_output = output_width;
dst_pointer += output_gap;
}
}
}
void SwBlitter::DisplayTransfer(const Pica::DisplayTransferConfig& config) {
const PAddr src_addr = config.GetPhysicalInputAddress();
PAddr dst_addr = config.GetPhysicalOutputAddress();
// TODO: do hwtest with these cases
if (!memory.IsValidPhysicalAddress(src_addr)) {
LOG_CRITICAL(HW_GPU, "invalid input address {:#010X}", src_addr);
return;
}
if (!memory.IsValidPhysicalAddress(dst_addr)) {
LOG_CRITICAL(HW_GPU, "invalid output address {:#010X}", dst_addr);
return;
}
if (config.input_width == 0) {
LOG_CRITICAL(HW_GPU, "zero input width");
return;
}
if (config.input_height == 0) {
LOG_CRITICAL(HW_GPU, "zero input height");
return;
}
if (config.output_width == 0) {
LOG_CRITICAL(HW_GPU, "zero output width");
return;
}
if (config.output_height == 0) {
LOG_CRITICAL(HW_GPU, "zero output height");
return;
}
// Using flip_vertically alongside crop_input_lines produces skewed output on hardware.
// We have to emulate this because some games rely on this behaviour to render correctly.
if (config.flip_vertically && config.crop_input_lines) {
dst_addr += (config.input_width - config.output_width) * (config.output_height - 1) *
BytesPerPixel(config.output_format);
}
u8* src_pointer = memory.GetPhysicalPointer(src_addr);
u8* dst_pointer = memory.GetPhysicalPointer(dst_addr);
if (config.scaling > config.ScaleXY) {
LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode {}",
config.scaling.Value());
UNIMPLEMENTED();
return;
}
if (config.input_linear && config.scaling != config.NoScale) {
LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input");
UNIMPLEMENTED();
return;
}
const u32 horizontal_scale = config.scaling != config.NoScale ? 1 : 0;
const u32 vertical_scale = config.scaling == config.ScaleXY ? 1 : 0;
const u32 output_width = config.output_width >> horizontal_scale;
const u32 output_height = config.output_height >> vertical_scale;
const u32 input_size =
config.input_width * config.input_height * BytesPerPixel(config.input_format);
const u32 output_size = output_width * output_height * BytesPerPixel(config.output_format);
rasterizer->FlushRegion(config.GetPhysicalInputAddress(), input_size);
rasterizer->InvalidateRegion(config.GetPhysicalOutputAddress(), output_size);
for (u32 y = 0; y < output_height; ++y) {
for (u32 x = 0; x < output_width; ++x) {
Common::Vec4<u8> src_color;
// Calculate the [x,y] position of the input image
// based on the current output position and the scale
const u32 input_x = x << horizontal_scale;
const u32 input_y = y << vertical_scale;
u32 output_y;
if (config.flip_vertically) {
// Flip the y value of the output data,
// we do this after calculating the [x,y] position of the input image
// to account for the scaling options.
output_y = output_height - y - 1;
} else {
output_y = y;
}
const u32 dst_bytes_per_pixel = BytesPerPixel(config.output_format);
const u32 src_bytes_per_pixel = BytesPerPixel(config.input_format);
u32 src_offset;
u32 dst_offset;
if (config.input_linear) {
if (!config.dont_swizzle) {
// Interpret the input as linear and the output as tiled
u32 coarse_y = output_y & ~7;
u32 stride = output_width * dst_bytes_per_pixel;
src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
dst_offset = VideoCore::GetMortonOffset(x, output_y, dst_bytes_per_pixel) +
coarse_y * stride;
} else {
// Both input and output are linear
src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
dst_offset = (x + output_y * output_width) * dst_bytes_per_pixel;
}
} else {
if (!config.dont_swizzle) {
// Interpret the input as tiled and the output as linear
const u32 coarse_y = input_y & ~7;
const u32 stride = config.input_width * src_bytes_per_pixel;
src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) +
coarse_y * stride;
dst_offset = (x + output_y * output_width) * dst_bytes_per_pixel;
} else {
// Both input and output are tiled
const u32 out_coarse_y = output_y & ~7;
const u32 out_stride = output_width * dst_bytes_per_pixel;
const u32 in_coarse_y = input_y & ~7;
const u32 in_stride = config.input_width * src_bytes_per_pixel;
src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) +
in_coarse_y * in_stride;
dst_offset = VideoCore::GetMortonOffset(x, output_y, dst_bytes_per_pixel) +
out_coarse_y * out_stride;
}
}
const u8* src_pixel = src_pointer + src_offset;
src_color = DecodePixel(config.input_format, src_pixel);
if (config.scaling == config.ScaleX) {
const auto pixel =
DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel);
src_color = ((src_color + pixel) / 2).Cast<u8>();
} else if (config.scaling == config.ScaleXY) {
const auto pixel1 =
DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel);
const auto pixel2 =
DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel);
const auto pixel3 =
DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel);
src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>();
}
u8* dst_pixel = dst_pointer + dst_offset;
switch (config.output_format) {
case Pica::PixelFormat::RGBA8:
Common::Color::EncodeRGBA8(src_color, dst_pixel);
break;
case Pica::PixelFormat::RGB8:
Common::Color::EncodeRGB8(src_color, dst_pixel);
break;
case Pica::PixelFormat::RGB565:
Common::Color::EncodeRGB565(src_color, dst_pixel);
break;
case Pica::PixelFormat::RGB5A1:
Common::Color::EncodeRGB5A1(src_color, dst_pixel);
break;
case Pica::PixelFormat::RGBA4:
Common::Color::EncodeRGBA4(src_color, dst_pixel);
break;
default:
LOG_ERROR(HW_GPU, "Unknown destination framebuffer format {:x}",
static_cast<u32>(config.output_format.Value()));
break;
}
}
}
}
void SwBlitter::MemoryFill(const Pica::MemoryFillConfig& config) {
const PAddr start_addr = config.GetStartAddress();
const PAddr end_addr = config.GetEndAddress();
// TODO: do hwtest with these cases
if (!memory.IsValidPhysicalAddress(start_addr)) {
LOG_CRITICAL(HW_GPU, "invalid start address {:#010X}", start_addr);
return;
}
if (!memory.IsValidPhysicalAddress(end_addr)) {
LOG_CRITICAL(HW_GPU, "invalid end address {:#010X}", end_addr);
return;
}
if (end_addr <= start_addr) {
LOG_CRITICAL(HW_GPU, "invalid memory range from {:#010X} to {:#010X}", start_addr,
end_addr);
return;
}
u8* start = memory.GetPhysicalPointer(start_addr);
u8* end = memory.GetPhysicalPointer(end_addr);
rasterizer->InvalidateRegion(start_addr, end_addr - start_addr);
if (config.fill_24bit) {
// Fill with 24-bit values
for (u8* ptr = start; ptr < end; ptr += 3) {
ptr[0] = config.value_24bit_r;
ptr[1] = config.value_24bit_g;
ptr[2] = config.value_24bit_b;
}
} else if (config.fill_32bit) {
// Fill with 32-bit values
if (end > start) {
const u32 value = config.value_32bit;
const size_t len = (end - start) / sizeof(u32);
for (std::size_t i = 0; i < len; ++i) {
std::memcpy(&start[i * sizeof(u32)], &value, sizeof(u32));
}
}
} else {
// Fill with 16-bit values
const u16 value_16bit = config.value_16bit.Value();
for (u8* ptr = start; ptr < end; ptr += sizeof(u16)) {
std::memcpy(ptr, &value_16bit, sizeof(u16));
}
}
}
} // namespace SwRenderer

View File

@@ -0,0 +1,38 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
namespace Pica {
struct DisplayTransferConfig;
struct MemoryFillConfig;
} // namespace Pica
namespace Memory {
class MemorySystem;
}
namespace VideoCore {
class RasterizerInterface;
}
namespace SwRenderer {
class SwBlitter {
public:
explicit SwBlitter(Memory::MemorySystem& memory, VideoCore::RasterizerInterface* rasterizer);
~SwBlitter();
void TextureCopy(const Pica::DisplayTransferConfig& config);
void DisplayTransfer(const Pica::DisplayTransferConfig& config);
void MemoryFill(const Pica::MemoryFillConfig& config);
private:
Memory::MemorySystem& memory;
VideoCore::RasterizerInterface* rasterizer;
};
} // namespace SwRenderer

View File

@@ -4,7 +4,7 @@
#include <array>
#include <cstddef>
#include "video_core/regs_texturing.h"
#include "video_core/pica/regs_texturing.h"
#include "video_core/renderer_software/sw_clipper.h"
namespace SwRenderer {

View File

@@ -5,10 +5,10 @@
#include <algorithm>
#include "common/color.h"
#include "common/logging/log.h"
#include "core/hw/gpu.h"
#include "core/memory.h"
#include "video_core/pica/regs_external.h"
#include "video_core/pica/regs_framebuffer.h"
#include "video_core/pica_types.h"
#include "video_core/regs_framebuffer.h"
#include "video_core/renderer_software/sw_framebuffer.h"
#include "video_core/utils.h"
@@ -63,7 +63,7 @@ void Framebuffer::DrawPixel(u32 x, u32 y, const Common::Vec4<u8>& color) const {
const u32 coarse_y = y & ~7;
const u32 bytes_per_pixel =
GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value()));
Pica::BytesPerPixel(Pica::PixelFormat(framebuffer.color_format.Value()));
const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
coarse_y * framebuffer.width * bytes_per_pixel;
u8* dst_pixel = color_buffer + dst_offset;
@@ -97,7 +97,7 @@ const Common::Vec4<u8> Framebuffer::GetPixel(u32 x, u32 y) const {
const u32 coarse_y = y & ~7;
const u32 bytes_per_pixel =
GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value()));
Pica::BytesPerPixel(Pica::PixelFormat(framebuffer.color_format.Value()));
const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
coarse_y * framebuffer.width * bytes_per_pixel;
const u8* src_pixel = color_buffer + src_offset;

View File

@@ -6,7 +6,7 @@
#include "common/common_types.h"
#include "common/vector_math.h"
#include "video_core/regs_framebuffer.h"
#include "video_core/pica/regs_framebuffer.h"
namespace Memory {
class MemorySystem;

View File

@@ -10,7 +10,7 @@ namespace SwRenderer {
using Pica::f16;
using Pica::LightingRegs;
static float LookupLightingLut(const Pica::State::Lighting& lighting, std::size_t lut_index,
static float LookupLightingLut(const Pica::PicaCore::Lighting& lighting, std::size_t lut_index,
u8 index, float delta) {
ASSERT_MSG(lut_index < lighting.luts.size(), "Out of range lut");
ASSERT_MSG(index < lighting.luts[lut_index].size(), "Out of range index");
@@ -24,7 +24,7 @@ static float LookupLightingLut(const Pica::State::Lighting& lighting, std::size_
}
std::pair<Common::Vec4<u8>, Common::Vec4<u8>> ComputeFragmentsColors(
const Pica::LightingRegs& lighting, const Pica::State::Lighting& lighting_state,
const Pica::LightingRegs& lighting, const Pica::PicaCore::Lighting& lighting_state,
const Common::Quaternion<f32>& normquat, const Common::Vec3f& view,
std::span<const Common::Vec4<u8>, 4> texture_color) {

View File

@@ -9,12 +9,12 @@
#include "common/quaternion.h"
#include "common/vector_math.h"
#include "video_core/pica_state.h"
#include "video_core/pica/pica_core.h"
namespace SwRenderer {
std::pair<Common::Vec4<u8>, Common::Vec4<u8>> ComputeFragmentsColors(
const Pica::LightingRegs& lighting, const Pica::State::Lighting& lighting_state,
const Pica::LightingRegs& lighting, const Pica::PicaCore::Lighting& lighting_state,
const Common::Quaternion<f32>& normquat, const Common::Vec3f& view,
std::span<const Common::Vec4<u8>, 4> texture_color);

View File

@@ -15,7 +15,7 @@ using ProcTexCombiner = Pica::TexturingRegs::ProcTexCombiner;
using ProcTexFilter = Pica::TexturingRegs::ProcTexFilter;
using Pica::f16;
float LookupLUT(const std::array<Pica::State::ProcTex::ValueEntry, 128>& lut, float coord) {
float LookupLUT(const std::array<Pica::PicaCore::ProcTex::ValueEntry, 128>& lut, float coord) {
// For NoiseLUT/ColorMap/AlphaMap, coord=0.0 is lut[0], coord=127.0/128.0 is lut[127] and
// coord=1.0 is lut[127]+lut_diff[127]. For other indices, the result is interpolated using
// value entries and difference entries.
@@ -47,7 +47,7 @@ float NoiseRand2D(unsigned int x, unsigned int y) {
}
float NoiseCoef(float u, float v, const Pica::TexturingRegs& regs,
const Pica::State::ProcTex& state) {
const Pica::PicaCore::ProcTex& state) {
const float freq_u = f16::FromRaw(regs.proctex_noise_frequency.u).ToFloat32();
const float freq_v = f16::FromRaw(regs.proctex_noise_frequency.v).ToFloat32();
const float phase_u = f16::FromRaw(regs.proctex_noise_u.phase).ToFloat32();
@@ -115,7 +115,7 @@ void ClampCoord(float& coord, ProcTexClamp mode) {
}
float CombineAndMap(float u, float v, ProcTexCombiner combiner,
const std::array<Pica::State::ProcTex::ValueEntry, 128>& map_table) {
const std::array<Pica::PicaCore::ProcTex::ValueEntry, 128>& map_table) {
float f;
switch (combiner) {
case ProcTexCombiner::U:
@@ -158,7 +158,7 @@ float CombineAndMap(float u, float v, ProcTexCombiner combiner,
} // Anonymous namespace
Common::Vec4<u8> ProcTex(float u, float v, const Pica::TexturingRegs& regs,
const Pica::State::ProcTex& state) {
const Pica::PicaCore::ProcTex& state) {
u = std::abs(u);
v = std::abs(v);

View File

@@ -6,12 +6,12 @@
#include "common/common_types.h"
#include "common/vector_math.h"
#include "video_core/pica_state.h"
#include "video_core/pica/pica_core.h"
namespace SwRenderer {
/// Generates procedural texture color for the given coordinates
Common::Vec4<u8> ProcTex(float u, float v, const Pica::TexturingRegs& regs,
const Pica::State::ProcTex& state);
const Pica::PicaCore::ProcTex& state);
} // namespace SwRenderer

View File

@@ -8,14 +8,13 @@
#include "common/quaternion.h"
#include "common/vector_math.h"
#include "core/memory.h"
#include "video_core/pica_state.h"
#include "video_core/pica_types.h"
#include "video_core/pica/output_vertex.h"
#include "video_core/pica/pica_core.h"
#include "video_core/renderer_software/sw_framebuffer.h"
#include "video_core/renderer_software/sw_lighting.h"
#include "video_core/renderer_software/sw_proctex.h"
#include "video_core/renderer_software/sw_rasterizer.h"
#include "video_core/renderer_software/sw_texturing.h"
#include "video_core/shader/shader.h"
#include "video_core/texture/texture_decode.h"
namespace SwRenderer {
@@ -33,7 +32,7 @@ using Pica::Texture::TextureInfo;
// we can use a very small epsilon value for clip plane comparison.
constexpr f32 EPSILON_Z = 0.00000001f;
struct Vertex : Pica::Shader::OutputVertex {
struct Vertex : Pica::OutputVertex {
Vertex(const OutputVertex& v) : OutputVertex(v) {}
/// Attributes used to store intermediate results position after perspective divide.
@@ -101,14 +100,13 @@ private:
} // Anonymous namespace
RasterizerSoftware::RasterizerSoftware(Memory::MemorySystem& memory_)
: memory{memory_}, state{Pica::g_state}, regs{state.regs},
RasterizerSoftware::RasterizerSoftware(Memory::MemorySystem& memory_, Pica::PicaCore& pica_)
: memory{memory_}, pica{pica_}, regs{pica.regs.internal},
num_sw_threads{std::max(std::thread::hardware_concurrency(), 2U)},
sw_workers{num_sw_threads, "SwRenderer workers"}, fb{memory, regs.framebuffer} {}
void RasterizerSoftware::AddTriangle(const Pica::Shader::OutputVertex& v0,
const Pica::Shader::OutputVertex& v1,
const Pica::Shader::OutputVertex& v2) {
void RasterizerSoftware::AddTriangle(const Pica::OutputVertex& v0, const Pica::OutputVertex& v1,
const Pica::OutputVertex& v2) {
/**
* Clipping a planar n-gon against a plane will remove at least 1 vertex and introduces 2 at
* the new edge (or less in degenerate cases). As such, we can say that each clipping plane
@@ -170,8 +168,8 @@ void RasterizerSoftware::AddTriangle(const Pica::Shader::OutputVertex& v0,
}
}
if (state.regs.rasterizer.clip_enable) {
const ClippingEdge custom_edge{state.regs.rasterizer.GetClipCoef()};
if (regs.rasterizer.clip_enable) {
const ClippingEdge custom_edge{regs.rasterizer.GetClipCoef()};
clip(custom_edge);
if (output_list->size() < 3) {
return;
@@ -434,7 +432,7 @@ void RasterizerSoftware::ProcessTriangle(const Vertex& v0, const Vertex& v1, con
get_interpolated_attribute(v0.view.z, v1.view.z, v2.view.z).ToFloat32(),
};
std::tie(primary_fragment_color, secondary_fragment_color) =
ComputeFragmentsColors(regs.lighting, state.lighting, normquat, view,
ComputeFragmentsColors(regs.lighting, pica.lighting, normquat, view,
texture_color);
}
@@ -587,7 +585,7 @@ std::array<Common::Vec4<u8>, 4> RasterizerSoftware::TextureColor(
if (regs.texturing.main_config.texture3_enable) {
const auto& proctex_uv = uv[regs.texturing.main_config.texture3_coordinates];
texture_color[3] = ProcTex(proctex_uv.u().ToFloat32(), proctex_uv.v().ToFloat32(),
regs.texturing, state.proctex);
regs.texturing, pica.proctex);
}
return texture_color;
@@ -813,7 +811,7 @@ void RasterizerSoftware::WriteFog(float depth, Common::Vec4<u8>& combiner_output
// Generate clamped fog factor from LUT for given fog index
const f32 fog_i = std::clamp(floorf(fog_index), 0.0f, 127.0f);
const f32 fog_f = fog_index - fog_i;
const auto& fog_lut_entry = state.fog.lut[static_cast<u32>(fog_i)];
const auto& fog_lut_entry = pica.fog.lut[static_cast<u32>(fog_i)];
f32 fog_factor = fog_lut_entry.ToFloat() + fog_lut_entry.DiffToFloat() * fog_f;
fog_factor = std::clamp(fog_factor, 0.0f, 1.0f);
for (u32 i = 0; i < 3; i++) {

View File

@@ -6,18 +6,14 @@
#include <span>
#include "common/thread_worker.h"
#include "video_core/pica/regs_texturing.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/regs_texturing.h"
#include "video_core/renderer_software/sw_clipper.h"
#include "video_core/renderer_software/sw_framebuffer.h"
namespace Pica::Shader {
struct OutputVertex;
}
namespace Pica {
struct State;
struct Regs;
struct RegsInternal;
class PicaCore;
} // namespace Pica
namespace SwRenderer {
@@ -26,10 +22,10 @@ struct Vertex;
class RasterizerSoftware : public VideoCore::RasterizerInterface {
public:
explicit RasterizerSoftware(Memory::MemorySystem& memory);
explicit RasterizerSoftware(Memory::MemorySystem& memory, Pica::PicaCore& pica);
void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1,
const Pica::Shader::OutputVertex& v2) override;
void AddTriangle(const Pica::OutputVertex& v0, const Pica::OutputVertex& v1,
const Pica::OutputVertex& v2) override;
void DrawTriangles() override {}
void NotifyPicaRegisterChanged(u32 id) override {}
void FlushAll() override {}
@@ -72,8 +68,8 @@ private:
private:
Memory::MemorySystem& memory;
Pica::State& state;
const Pica::Regs& regs;
Pica::PicaCore& pica;
Pica::RegsInternal& regs;
size_t num_sw_threads;
Common::ThreadWorker sw_workers;
Framebuffer fb;

View File

@@ -6,7 +6,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "common/vector_math.h"
#include "video_core/regs_texturing.h"
#include "video_core/pica/regs_texturing.h"
#include "video_core/renderer_software/sw_texturing.h"
namespace SwRenderer {

View File

@@ -8,7 +8,7 @@
#include "common/common_types.h"
#include "common/vector_math.h"
#include "video_core/regs_texturing.h"
#include "video_core/pica/regs_texturing.h"
namespace SwRenderer {