From 117c6dfb97016b35d35f374b3ce39d49e6b53d10 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 2 Jul 2023 00:56:56 +0300 Subject: [PATCH 01/19] Make 3DS clock return system time and not UTC time --- src/core/memory.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 37c13c7d..9318c627 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -3,6 +3,7 @@ #include "resource_limits.hpp" #include #include // For time since epoch +#include using namespace KernelMemoryTypes; @@ -424,9 +425,19 @@ void Memory::mirrorMapping(u32 destAddress, u32 sourceAddress, u32 size) { u64 Memory::timeSince3DSEpoch() { using namespace std::chrono; - // ms since Jan 1 1970 - milliseconds ms = duration_cast(system_clock::now().time_since_epoch()); - // ms between Jan 1 1900 and Jan 1 1970 (2208988800 seconds elapsed between the two) + std::time_t rawTime = std::time(nullptr); // Get current UTC time + auto localTime = std::localtime(&rawTime); // Convert to local time + + bool daylightSavings = localTime->tm_isdst; // Get if time includes DST + localTime = std::gmtime(&rawTime); + + // Use gmtime + mktime to calculate difference between local time and UTC + auto timezoneDifference = rawTime - std::mktime(localTime); + if (daylightSavings) { + timezoneDifference += 60ull * 60ull; // Add 1 hour (60 seconds * 60 minutes) + } + + milliseconds ms = duration_cast(seconds(rawTime + timezoneDifference)); constexpr u64 offset = 2208988800ull * 1000; return ms.count() + offset; } \ No newline at end of file From 8930d44f5d1ed9a16178da39f4ede337ee4760b2 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 2 Jul 2023 01:00:57 +0300 Subject: [PATCH 02/19] Proper daylight savings time check --- src/core/memory.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 9318c627..dfa155a2 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -428,7 +428,7 @@ u64 Memory::timeSince3DSEpoch() { std::time_t rawTime = std::time(nullptr); // Get current UTC time auto localTime = std::localtime(&rawTime); // Convert to local time - bool daylightSavings = localTime->tm_isdst; // Get if time includes DST + bool daylightSavings = localTime->tm_isdst > 0; // Get if time includes DST localTime = std::gmtime(&rawTime); // Use gmtime + mktime to calculate difference between local time and UTC @@ -437,7 +437,8 @@ u64 Memory::timeSince3DSEpoch() { timezoneDifference += 60ull * 60ull; // Add 1 hour (60 seconds * 60 minutes) } - milliseconds ms = duration_cast(seconds(rawTime + timezoneDifference)); - constexpr u64 offset = 2208988800ull * 1000; - return ms.count() + offset; + // seconds between Jan 1 1900 and Jan 1 1970 + constexpr u64 offset = 2208988800ull; + milliseconds ms = duration_cast(seconds(rawTime + timezoneDifference + offset)); + return ms.count(); } \ No newline at end of file From de3c4020d859ec364a6975d3d0d79038d1e2695b Mon Sep 17 00:00:00 2001 From: Sky Date: Sat, 1 Jul 2023 15:16:35 -0700 Subject: [PATCH 03/19] [GL] Add lighting attributes --- src/core/renderer_gl/renderer_gl.cpp | 40 +++++++++++++++++----------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index fe28c19a..0ac76060 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -11,12 +11,14 @@ using namespace Helpers; const char* vertexShader = R"( #version 410 core - layout (location = 0) in vec4 a_coords; - layout (location = 1) in vec4 a_vertexColour; - layout (location = 2) in vec2 a_texcoord0; - layout (location = 3) in vec2 a_texcoord1; - layout (location = 4) in float a_texcoord0_w; - layout (location = 5) in vec2 a_texcoord2; + layout (location = 0) in vec4 a_coords; + layout (location = 1) in vec4 a_quaternion; + layout (location = 2) in vec4 a_vertexColour; + layout (location = 3) in vec2 a_texcoord0; + layout (location = 4) in vec2 a_texcoord1; + layout (location = 5) in float a_texcoord0_w; + layout (location = 6) in vec3 a_view; + layout (location = 7) in vec2 a_texcoord2; out vec4 v_colour; out vec3 v_texcoord0; @@ -392,21 +394,27 @@ void Renderer::initGraphicsContext() { // Position (x, y, z, w) attributes vao.setAttributeFloat(0, 4, sizeof(PicaVertex), offsetof(PicaVertex, s.positions)); vao.enableAttribute(0); - // Colour attribute - vao.setAttributeFloat(1, 4, sizeof(PicaVertex), offsetof(PicaVertex, s.colour)); + // Quaternion attribute + vao.setAttributeFloat(1, 4, sizeof(PicaVertex), offsetof(PicaVertex, s.quaternion)); vao.enableAttribute(1); - // UV 0 attribute - vao.setAttributeFloat(2, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord0)); + // Colour attribute + vao.setAttributeFloat(2, 4, sizeof(PicaVertex), offsetof(PicaVertex, s.colour)); vao.enableAttribute(2); - // UV 1 attribute - vao.setAttributeFloat(3, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord1)); + // UV 0 attribute + vao.setAttributeFloat(3, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord0)); vao.enableAttribute(3); - // UV 0 W-component attribute - vao.setAttributeFloat(4, 1, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord0_w)); + // UV 1 attribute + vao.setAttributeFloat(4, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord1)); vao.enableAttribute(4); - // UV 2 attribute - vao.setAttributeFloat(5, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord2)); + // UV 0 W-component attribute + vao.setAttributeFloat(5, 1, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord0_w)); vao.enableAttribute(5); + // View + vao.setAttributeFloat(6, 3, sizeof(PicaVertex), offsetof(PicaVertex, s.view)); + vao.enableAttribute(6); + // UV 2 attribute + vao.setAttributeFloat(7, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord2)); + vao.enableAttribute(7); dummyVBO.create(); dummyVAO.create(); From 6fababb93a64278bf7deac7811c0d42b4fd35d3a Mon Sep 17 00:00:00 2001 From: Sky Date: Sat, 1 Jul 2023 16:43:57 -0700 Subject: [PATCH 04/19] [GL] Pipe Pica registers and implement ambient lighting --- include/renderer_gl/renderer_gl.hpp | 1 + src/core/renderer_gl/renderer_gl.cpp | 35 +++++++++++++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 636c4c65..2f3b2119 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -32,6 +32,7 @@ class Renderer { GLint textureEnvScaleLoc = -1; GLint textureEnvUpdateBufferLoc = -1; GLint textureEnvBufferColorLoc = -1; + GLint picaRegLoc = -1; // Depth configuration uniform locations GLint depthOffsetLoc = -1; diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 0ac76060..ed14c603 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -20,9 +20,11 @@ const char* vertexShader = R"( layout (location = 6) in vec3 a_view; layout (location = 7) in vec2 a_texcoord2; + out vec4 v_quaternion; out vec4 v_colour; out vec3 v_texcoord0; out vec2 v_texcoord1; + out vec3 v_view; out vec2 v_texcoord2; flat out vec4 v_textureEnvColor[6]; flat out vec4 v_textureEnvBufferColor; @@ -30,6 +32,12 @@ const char* vertexShader = R"( // TEV uniforms uniform uint u_textureEnvColor[6]; uniform uint u_textureEnvBufferColor; + uniform uint u_picaRegs[0x200-0x47]; + + //Helper so that the implementation of u_pica_regs can be changed later + uint readPicaReg(uint reg_addr){ + return u_picaRegs[reg_addr-0x47]; + } vec4 abgr8888ToVec4(uint abgr) { const float scale = 1.0 / 255.0; @@ -50,6 +58,8 @@ const char* vertexShader = R"( v_texcoord0 = vec3(a_texcoord0.x, 1.0 - a_texcoord0.y, a_texcoord0_w); v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y); v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y); + v_quaternion = a_quaternion; + v_view = a_view; for (int i = 0; i < 6; i++) { v_textureEnvColor[i] = abgr8888ToVec4(u_textureEnvColor[i]); @@ -62,9 +72,11 @@ const char* vertexShader = R"( const char* fragmentShader = R"( #version 410 core + in vec4 v_quaternion; in vec4 v_colour; in vec3 v_texcoord0; in vec2 v_texcoord1; + in vec3 v_view; in vec2 v_texcoord2; flat in vec4 v_textureEnvColor[6]; flat in vec4 v_textureEnvBufferColor; @@ -90,6 +102,13 @@ const char* fragmentShader = R"( uniform sampler2D u_tex1; uniform sampler2D u_tex2; + uniform uint u_picaRegs[0x200-0x47]; + + //Helper so that the implementation of u_pica_regs can be changed later + uint readPicaReg(uint reg_addr){ + return u_picaRegs[reg_addr-0x47]; + } + vec4 tevSources[16]; vec4 tevNextPreviousBuffer; bool tevUnimplementedSourceFlag = false; @@ -193,7 +212,18 @@ const char* fragmentShader = R"( } void calcLighting(out vec4 primary_color, out vec4 secondary_color){ - primary_color = vec4(vec3(0.5) ,1.0); + uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008F); + if(bitfieldExtract(GPUREG_LIGHTING_ENABLE,0,1)==0){ + primary_color = secondary_color = vec4(0.0); + return; + } + uint GPUREG_LIGHTING_AMBIENT = readPicaReg(0x01C0); + vec3 ambient = vec3( + float(bitfieldExtract(GPUREG_LIGHTING_AMBIENT,20,8))/255., + float(bitfieldExtract(GPUREG_LIGHTING_AMBIENT,10,8))/255., + float(bitfieldExtract(GPUREG_LIGHTING_AMBIENT,00,8))/255. + ); + primary_color = vec4(ambient,1.0); secondary_color = vec4(vec3(0.5) ,1.0); } @@ -373,6 +403,7 @@ void Renderer::initGraphicsContext() { depthScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_depthScale"); depthOffsetLoc = OpenGL::uniformLocation(triangleProgram, "u_depthOffset"); depthmapEnableLoc = OpenGL::uniformLocation(triangleProgram, "u_depthmapEnable"); + picaRegLoc = OpenGL::uniformLocation(triangleProgram, "u_picaRegs"); // Init sampler objects glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex0"), 0); @@ -621,6 +652,8 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span setupTextureEnvState(); bindTexturesToSlots(); + //Upload Pica Registers + glUniform1uiv(picaRegLoc,0x200-0x47,®s[0x47]); // TODO: Actually use this float viewportWidth = f24::fromRaw(regs[PICA::InternalRegs::ViewportWidth] & 0xffffff).toFloat32() * 2.0; From 769e90a45fde0f4b4c34dd1e2fbf628da41362d8 Mon Sep 17 00:00:00 2001 From: Sky Date: Sat, 1 Jul 2023 22:14:57 -0700 Subject: [PATCH 05/19] [GL] Implement most fragment lighting --- src/core/renderer_gl/renderer_gl.cpp | 173 +++++++++++++++++++++++++-- 1 file changed, 165 insertions(+), 8 deletions(-) diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index ed14c603..5a0ca3ad 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -210,21 +210,178 @@ const char* fragmentShader = R"( return result; } + #define D0_LUT 0u + #define D1_LUT 1u + #define SP_LUT 2u + #define FR_LUT 3u + #define RB_LUT 4u + #define RG_LUT 5u + #define RR_LUT 6u + + float lutLookup(uint lut, float value){ + //TODO: Implement this. + return value; + } + vec3 regToColor(uint reg){ + return vec3( + float(bitfieldExtract(reg,20,8))/255., + float(bitfieldExtract(reg,10,8))/255., + float(bitfieldExtract(reg,00,8))/255. + ); + } + vec3 rotateVec3ByQuaternion(vec3 v, vec4 q){ + vec3 u=q.xyz; + float s = q.w; + return 2.0*dot(u, v)*u + (s*s - dot(u, u))*v + 2.0*s*cross(u, v); + } + float decodeFP(uint hex, uint E, uint M){ + uint width = M + E + 1u; + uint bias = 128u - (1u << (E - 1u)); + uint exponent = (hex >> M) & ((1u << E) - 1u); + uint mantissa = hex & ((1u << M) - 1u); + uint sign = (hex >> (E + M)) << 31u; + + if ((hex & ((1u << (width - 1u)) - 1u)) != 0) { + if (exponent == (1u << E) - 1u) exponent = 255u; + else exponent += bias; + hex = sign | (mantissa << (23u - M)) | (exponent << 23u); + }else hex = sign; + return uintBitsToFloat(hex); + } + //Implements the following algorthm: https://mathb.in/26766 void calcLighting(out vec4 primary_color, out vec4 secondary_color){ + // Quaternions describe a transformation from surface-local space to eye space. + // In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1), + // the tangent vector is (1,0,0), and the bitangent vector is (0,1,0). + vec4 quat = v_quaternion; + vec3 normal = normalize(rotateVec3ByQuaternion(vec3(0.0,0.0,1.0), quat)); + vec3 tangent = normalize(rotateVec3ByQuaternion(vec3(1.0,0.0,0.0), quat)); + vec3 bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0,1.0,0.0), quat)); + vec3 view = normalize(v_view); + uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008F); if(bitfieldExtract(GPUREG_LIGHTING_ENABLE,0,1)==0){ - primary_color = secondary_color = vec4(0.0); + primary_color = secondary_color = vec4(1.0); return; } uint GPUREG_LIGHTING_AMBIENT = readPicaReg(0x01C0); - vec3 ambient = vec3( - float(bitfieldExtract(GPUREG_LIGHTING_AMBIENT,20,8))/255., - float(bitfieldExtract(GPUREG_LIGHTING_AMBIENT,10,8))/255., - float(bitfieldExtract(GPUREG_LIGHTING_AMBIENT,00,8))/255. - ); - primary_color = vec4(ambient,1.0); - secondary_color = vec4(vec3(0.5) ,1.0); + + uint GPUREG_LIGHTING_NUM_LIGHTS = (readPicaReg(0x01C2)&0x7u)+1; + uint GPUREG_LIGHTING_LIGHT_PERMUTATION = readPicaReg(0x01D9); + + primary_color = vec4(vec3(0.0),1.0); + secondary_color = vec4(vec3(0.0),1.0); + + primary_color.rgb+= regToColor(GPUREG_LIGHTING_AMBIENT); + + uint GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0); + uint GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1); + uint GPUREG_LIGHTING_CONFIG0 = readPicaReg(0x01C3); + uint GPUREG_LIGHTING_CONFIG1 = readPicaReg(0x01C4); + uint GPUREG_LIGHTING_LUTINPUT_SCALE = readPicaReg(0x01D2); + float d[7]; + + bool error_unimpl = false; + + for(uint i = 0; i=6u) scale/=256.0; + + uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT,i*4,3); + if(input_id==0u)d[i] = dot(normal,half_vector); + else if(input_id==1u)d[i] = dot(view,half_vector); + else if(input_id==2u)d[i] = dot(normal,view); + else if(input_id==3u)d[i] = dot(light_vector,normal); + else if(input_id==4u){ + uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146+0x10*light_id); + uint GPUREG_LIGHTi_SPOTDIR_HIGH= readPicaReg(0x0147+0x10*light_id); + vec3 spot_light_vector = vec3( + decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW,0,16),1,11), + decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW,16,16),1,11), + decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH,0,16),1,11) + ); + d[i] = dot(-light_vector,spot_light_vector);// -L . P (aka Spotlight aka SP); + }else if(input_id==5u){ + d[i] = 1.0;//TODO: cos (aka CP); + error_unimpl = true; + }else d[i] = 1.0; + + d[i] = lutLookup(i,d[i])*scale; + if(bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS,2*i,1)!=0u)d[i]=abs(d[i]); + }else d[i]=1.0; + } + uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG,4,4); + if(lookup_config==0){ + d[D1_LUT] = 1.0; + d[FR_LUT] = 1.0; + d[RG_LUT]=d[RB_LUT]=d[RR_LUT]; + }else if(lookup_config==1){ + d[D0_LUT] = 1.0; + d[D1_LUT] = 1.0; + d[RG_LUT]=d[RB_LUT]=d[RR_LUT]; + }else if(lookup_config==2){ + d[FR_LUT] = 1.0; + d[SP_LUT] = 1.0; + d[RG_LUT]=d[RB_LUT]=d[RR_LUT]; + }else if(lookup_config==3){ + d[SP_LUT] = 1.0; + d[RG_LUT]=d[RB_LUT]=d[RR_LUT]=1.0; + }else if(lookup_config==4)d[FR_LUT] = 1.0; + else if(lookup_config==5)d[D1_LUT] = 1.0; + else if(lookup_config==6)d[RG_LUT]=d[RB_LUT]=d[RR_LUT]; + + float distance_factor = 1.0; //a + float indirect_factor = 1.0;//fi + float shadow_factor = 1.0; //o + + float NdotL = dot(normal,light_vector); //Li*N + + //Two sided diffuse + if(bitfieldExtract(GPUREG_LIGHTi_CONFIG,1,1)==0)NdotL=max(0.0,NdotL); + else NdotL=abs(NdotL); + + float light_factor = distance_factor*d[SP_LUT]*indirect_factor*shadow_factor; + + primary_color.rgb += light_factor*(regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE)*NdotL); + secondary_color.rgb += light_factor*( + regToColor(GPUREG_LIGHTi_SPECULAR0)*d[D0_LUT]+ + regToColor(GPUREG_LIGHTi_SPECULAR1)*d[D1_LUT]*vec3(d[RR_LUT],d[RG_LUT],d[RB_LUT]) + ); + } + uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0,2,1); + uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0,3,1); + + if(fresnel_output1==1u) primary_color.a = d[FR_LUT]; + if(fresnel_output2==1u) secondary_color.a = d[FR_LUT]; + + if(error_unimpl){ + secondary_color = primary_color = vec4(1.0,0.,1.0,1.0); + } } void main() { From cb251581847fadf642c2f36c218e64fd5005c146 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 2 Jul 2023 17:10:47 +0300 Subject: [PATCH 06/19] Stop downloading LLVM on MacOS CI --- .github/workflows/MacOS_Build.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/MacOS_Build.yml b/.github/workflows/MacOS_Build.yml index 819d4647..4007e0e9 100644 --- a/.github/workflows/MacOS_Build.yml +++ b/.github/workflows/MacOS_Build.yml @@ -23,13 +23,10 @@ jobs: - name: Fetch submodules run: git submodule update --init --recursive - - name: Install LLVM # MacOS comes with "AppleClang" instead of regular Clang, and it can't build the project because no proper C++20 - run: brew install llvm - - name: Configure CMake # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=/usr/local/opt/llvm/bin/clang -DCMAKE_CXX_COMPILER=/usr/local/opt/llvm/bin/clang++ + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} - name: Build # Build your program with the given configuration From cba1b9f39b949d4b63f3745e2a93d85a304414b4 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 2 Jul 2023 20:18:34 +0300 Subject: [PATCH 07/19] [NCCH loader] Fix crypto check --- src/core/loader/ncch.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp index 0f29ddb5..5a94dd55 100644 --- a/src/core/loader/ncch.cpp +++ b/src/core/loader/ncch.cpp @@ -54,14 +54,6 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn romFS.hashRegionSize = u64(*(u32*)&header[0x1B8]) * mediaUnit; if (encrypted) { - if (!aesEngine.haveKeys()) { - Helpers::panic( - "Loaded an encrypted ROM but AES keys don't seem to have been provided correctly! Navigate to the emulator's\n" - "app data folder and make sure you have a sysdata directory with a file called aes_keys.txt which contains your keys!" - ); - return false; - } - Crypto::AESKey primaryKeyY; Crypto::AESKey secondaryKeyY; std::memcpy(primaryKeyY.data(), header, primaryKeyY.size()); @@ -128,6 +120,14 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn } // If it's truly encrypted, we need to read section again. if (encrypted) { + if (!aesEngine.haveKeys()) { + Helpers::panic( + "Loaded an encrypted ROM but AES keys don't seem to have been provided correctly! Navigate to the emulator's\n" + "app data folder and make sure you have a sysdata directory with a file called aes_keys.txt which contains your keys!" + ); + return false; + } + auto [success, bytes] = readFromFile(file, exheaderInfo, &exheader[0], 0, exheaderSize); if (!success || bytes != exheaderSize) { printf("Failed to read Extended NCCH header\n"); From 0b2ff6c5c8534d3138c8f5b3bf201163f2e27160 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 2 Jul 2023 22:36:45 +0300 Subject: [PATCH 08/19] Separate user and dev builds --- .github/workflows/Linux_Build.yml | 2 +- .github/workflows/MacOS_Build.yml | 2 +- .github/workflows/Windows_Build.yml | 2 +- CMakeLists.txt | 12 ++++++++- include/helpers.hpp | 34 +++++++++++++++++------- src/core/kernel/directory_operations.cpp | 2 +- src/core/renderer_gl/renderer_gl.cpp | 4 ++- src/core/services/apt.cpp | 5 +++- src/core/services/cecd.cpp | 5 +++- 9 files changed, 50 insertions(+), 18 deletions(-) diff --git a/.github/workflows/Linux_Build.yml b/.github/workflows/Linux_Build.yml index 71a318a8..d58c3c94 100644 --- a/.github/workflows/Linux_Build.yml +++ b/.github/workflows/Linux_Build.yml @@ -26,7 +26,7 @@ jobs: - name: Configure CMake # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DENABLE_USER_BUILD=ON - name: Build # Build your program with the given configuration diff --git a/.github/workflows/MacOS_Build.yml b/.github/workflows/MacOS_Build.yml index 819d4647..942452d8 100644 --- a/.github/workflows/MacOS_Build.yml +++ b/.github/workflows/MacOS_Build.yml @@ -29,7 +29,7 @@ jobs: - name: Configure CMake # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=/usr/local/opt/llvm/bin/clang -DCMAKE_CXX_COMPILER=/usr/local/opt/llvm/bin/clang++ + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=/usr/local/opt/llvm/bin/clang -DCMAKE_CXX_COMPILER=/usr/local/opt/llvm/bin/clang++ -DENABLE_USER_BUILD=ON - name: Build # Build your program with the given configuration diff --git a/.github/workflows/Windows_Build.yml b/.github/workflows/Windows_Build.yml index 0a4abe41..2e8a8562 100644 --- a/.github/workflows/Windows_Build.yml +++ b/.github/workflows/Windows_Build.yml @@ -26,7 +26,7 @@ jobs: - name: Configure CMake # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_USER_BUILD=ON - name: Build # Build your program with the given configuration diff --git a/CMakeLists.txt b/CMakeLists.txt index b7310d95..6596ffe7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,8 +13,10 @@ endif() project(Alber) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +option(DISABLE_PANIC_DEV "Make a build with fewer and less intrusive asserts" OFF) option(GPU_DEBUG_INFO "Enable additional GPU debugging info" OFF) option(ENABLE_LTO "Enable link-time optimization" OFF) +option(ENABLE_USER_BUILD "Make a user-facing build. These builds have various assertions disabled, LTO, and more" OFF) include_directories(${PROJECT_SOURCE_DIR}/include/) include_directories(${PROJECT_SOURCE_DIR}/include/kernel) @@ -159,7 +161,7 @@ source_group("Source Files\\Third Party" FILES ${THIRD_PARTY_SOURCE_FILES}) add_executable(Alber ${SOURCE_FILES} ${FS_SOURCE_FILES} ${CRYPTO_SOURCE_FILES} ${KERNEL_SOURCE_FILES} ${LOADER_SOURCE_FILES} ${SERVICE_SOURCE_FILES} ${PICA_SOURCE_FILES} ${RENDERER_GL_SOURCE_FILES} ${THIRD_PARTY_SOURCE_FILES} ${HEADER_FILES}) -if(ENABLE_LTO) +if(ENABLE_LTO OR ENABLE_USER_BUILD) set_target_properties(Alber PROPERTIES INTERPROCEDURAL_OPTIMIZATION TRUE) endif() @@ -167,4 +169,12 @@ target_link_libraries(Alber PRIVATE dynarmic SDL2-static glad cryptopp) if(GPU_DEBUG_INFO) target_compile_definitions(Alber PRIVATE GPU_DEBUG_INFO=1) +endif() + +if(ENABLE_USER_BUILD) + target_compile_definitions(Alber PRIVATE PANDA3DS_USER_BUILD=1) +endif() + +if(ENABLE_USER_BUILD OR DISABLE_PANIC_DEV) + target_compile_definitions(Alber PRIVATE PANDA3DS_LIMITED_PANICS=1) endif() \ No newline at end of file diff --git a/include/helpers.hpp b/include/helpers.hpp index 53c57c7c..9830cc88 100644 --- a/include/helpers.hpp +++ b/include/helpers.hpp @@ -30,24 +30,31 @@ using s32 = std::int32_t; using s64 = std::int64_t; namespace Helpers { - [[noreturn]] static void panic(const char* fmt, ...) { - std::va_list args; - va_start(args, fmt); + // Unconditional panic, unlike panicDev which does not panic on user builds + template + [[noreturn]] static void panic(const char* fmt, Args&&... args) { std::cout << termcolor::on_red << "[FATAL] "; - std::vprintf(fmt, args); + std::printf(fmt, args...); std::cout << termcolor::reset << "\n"; - va_end(args); exit(1); } + +#ifdef PANDA3DS_LIMITED_PANICS + template + static void panicDev(const char* fmt, Args&&... args) {} +#else + template + [[noreturn]] static void panicDev(const char* fmt, Args&&... args) { + panic(fmt, args...); + } +#endif - static void warn(const char* fmt, ...) { - std::va_list args; - va_start(args, fmt); + template + static void warn(const char* fmt, Args&&... args) { std::cout << termcolor::on_red << "[Warning] "; - std::vprintf(fmt, args); + std::printf(fmt, args...); std::cout << termcolor::reset << "\n"; - va_end(args); } static constexpr bool buildingInDebugMode() { @@ -57,6 +64,13 @@ namespace Helpers { return true; } + static constexpr bool isUserBuild() { +#ifdef PANDA3DS_USER_BUILD + return true; +#endif + return false; + } + static void debug_printf(const char* fmt, ...) { if constexpr (buildingInDebugMode()) { std::va_list args; diff --git a/src/core/kernel/directory_operations.cpp b/src/core/kernel/directory_operations.cpp index 567d9cb8..fe4f58f4 100644 --- a/src/core/kernel/directory_operations.cpp +++ b/src/core/kernel/directory_operations.cpp @@ -33,7 +33,7 @@ void Kernel::readDirectory(u32 messagePointer, Handle directory) { const u32 entryCount = mem.read32(messagePointer + 4); const u32 outPointer = mem.read32(messagePointer + 12); logFileIO("Directory::Read (handle = %X, entry count = %d, out pointer = %08X)\n", directory, entryCount, outPointer); - Helpers::panic("Unimplemented FsDir::Read"); + Helpers::panicDev("Unimplemented FsDir::Read"); mem.write32(messagePointer + 4, Result::Success); mem.write32(messagePointer + 8, 0); diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 0cbc9cbc..487c9db8 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -695,7 +695,9 @@ void Renderer::bindDepthBuffer() { tex = depthBufferCache.add(sampleBuffer).texture.m_handle; } - if (PICA::DepthFmt::Depth24Stencil8 != depthBufferFormat) Helpers::panic("TODO: Should we remove stencil attachment?"); + if (PICA::DepthFmt::Depth24Stencil8 != depthBufferFormat) { + Helpers::panicDev("TODO: Should we remove stencil attachment?"); + } auto attachment = depthBufferFormat == PICA::DepthFmt::Depth24Stencil8 ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT; glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, tex, 0); } diff --git a/src/core/services/apt.cpp b/src/core/services/apt.cpp index f2c7612d..becf637f 100644 --- a/src/core/services/apt.cpp +++ b/src/core/services/apt.cpp @@ -81,7 +81,10 @@ void APTService::handleSyncRequest(u32 messagePointer) { case APTCommands::SetApplicationCpuTimeLimit: setApplicationCpuTimeLimit(messagePointer); break; case APTCommands::SetScreencapPostPermission: setScreencapPostPermission(messagePointer); break; case APTCommands::TheSmashBrosFunction: theSmashBrosFunction(messagePointer); break; - default: Helpers::panic("APT service requested. Command: %08X\n", command); + default: + Helpers::panicDev("APT service requested. Command: %08X\n", command); + mem.write32(messagePointer + 4, Result::Success); + break; } } diff --git a/src/core/services/cecd.cpp b/src/core/services/cecd.cpp index f641e40d..dd9ccb2f 100644 --- a/src/core/services/cecd.cpp +++ b/src/core/services/cecd.cpp @@ -16,7 +16,10 @@ void CECDService::handleSyncRequest(u32 messagePointer) { const u32 command = mem.read32(messagePointer); switch (command) { case CECDCommands::GetInfoEventHandle: getInfoEventHandle(messagePointer); break; - default: Helpers::panic("CECD service requested. Command: %08X\n", command); + default: + Helpers::panicDev("CECD service requested. Command: %08X\n", command); + mem.write32(messagePointer + 4, Result::Success); + break; } } From d5d8f869e4b0b76c7bbaf5db88b854f9ffd253fc Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 2 Jul 2023 23:00:15 +0300 Subject: [PATCH 09/19] Update idle_thread.cpp --- src/core/kernel/idle_thread.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/core/kernel/idle_thread.cpp b/src/core/kernel/idle_thread.cpp index 5389fecc..5abba373 100644 --- a/src/core/kernel/idle_thread.cpp +++ b/src/core/kernel/idle_thread.cpp @@ -59,12 +59,12 @@ void Kernel::setupIdleThread() { t.fpscr = FPSCR::ThreadDefault; // Our idle thread should have as low of a priority as possible, because, well, it's an idle thread. - // We handle this by giving it a priority of 0xff, which is lower than is actually allowed for user threads - // (High priority value = low priority) - t.priority = 0xff; + // We handle this by giving it a priority of 0x40, which is lower than is actually allowed for user threads + // (High priority value = low priority). This is the same priority used in the retail kernel. + t.priority = 0x40; t.status = ThreadStatus::Ready; // Add idle thread to the list of thread indices threadIndices.push_back(idleThreadIndex); sortThreads(); -} \ No newline at end of file +} From 12dbaeaeb01286c673d6d9e463f41e860f32918d Mon Sep 17 00:00:00 2001 From: Sky Date: Sun, 2 Jul 2023 16:14:11 -0700 Subject: [PATCH 10/19] [GL] Initial Implementation of Lighting LUTs --- include/PICA/gpu.hpp | 32 +++++++++++++++ include/PICA/regs.hpp | 11 +++++ include/renderer_gl/renderer_gl.hpp | 2 + src/core/PICA/gpu.cpp | 1 + src/core/PICA/regs.cpp | 33 ++++++++++++++- src/core/renderer_gl/renderer_gl.cpp | 61 ++++++++++++++++++++-------- 6 files changed, 122 insertions(+), 18 deletions(-) diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index 2b000320..2640ccce 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -70,6 +70,38 @@ class GPU { Renderer renderer; PicaVertex getImmediateModeVertex(); public: + enum : u32 { + LIGHT_LUT_D0=0, + LIGHT_LUT_D1, + LIGHT_LUT_FR, + LIGHT_LUT_RB, + LIGHT_LUT_RG, + LIGHT_LUT_RR, + LIGHT_LUT_SP0=0x8, + LIGHT_LUT_SP1, + LIGHT_LUT_SP2, + LIGHT_LUT_SP3, + LIGHT_LUT_SP4, + LIGHT_LUT_SP5, + LIGHT_LUT_SP6, + LIGHT_LUT_SP7, + LIGHT_LUT_DA0=0x10, + LIGHT_LUT_DA1, + LIGHT_LUT_DA2, + LIGHT_LUT_DA3, + LIGHT_LUT_DA4, + LIGHT_LUT_DA5, + LIGHT_LUT_DA6, + LIGHT_LUT_DA7, + LIGHT_LUT_COUNT + }; + //256 entries per LUT with each LUT as its own row forming a 2D image 256xLIGHT_LUT_COUNT + //Encoded in PICA native format + std::array lightingLUT; + //Used to prevent uploading the lighting_lut on every draw call + //Set to true when the CPU writes to the lighting_lut + //Set to false by the renderer when the lighting_lut is uploaded ot the GPU + bool lightingLUTDirty = false; GPU(Memory& mem); void initGraphicsContext() { renderer.initGraphicsContext(); } void getGraphicsContext() { renderer.getGraphicsContext(); } diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index 6c868484..b4ff3498 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -55,6 +55,17 @@ namespace PICA { ColourBufferLoc = 0x11D, FramebufferSize = 0x11E, + //LightingRegs + LightingLUTIndex = 0x01C5, + LightingLUTData0 = 0x01C8, + LightingLUTData1 = 0x01C9, + LightingLUTData2 = 0x01CA, + LightingLUTData3 = 0x01CB, + LightingLUTData4 = 0x01CC, + LightingLUTData5 = 0x01CD, + LightingLUTData6 = 0x01CE, + LightingLUTData7 = 0x01CF, + // Geometry pipeline registers VertexAttribLoc = 0x200, AttribFormatLow = 0x201, diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 2f3b2119..bf85904b 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -67,6 +67,7 @@ class Renderer { const std::array& regs; OpenGL::Texture screenTexture; + GLuint lightLUTTextureArray; OpenGL::Framebuffer screenFramebuffer; OpenGL::Framebuffer getColourFBO(); @@ -77,6 +78,7 @@ class Renderer { void bindDepthBuffer(); void setupTextureEnvState(); void bindTexturesToSlots(); + void updateLightingLUT(); public: Renderer(GPU& gpu, const std::array& internalRegs) : gpu(gpu), regs(internalRegs) {} diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index 99e5221f..2efc4195 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -21,6 +21,7 @@ void GPU::reset() { shaderUnit.reset(); shaderJIT.reset(); std::memset(vram, 0, vramSize); + lightingLUT.fill(0); totalAttribCount = 0; fixedAttribMask = 0; diff --git a/src/core/PICA/regs.cpp b/src/core/PICA/regs.cpp index 610cfe16..f04f44f4 100644 --- a/src/core/PICA/regs.cpp +++ b/src/core/PICA/regs.cpp @@ -28,7 +28,19 @@ u32 GPU::readInternalReg(u32 index) { Helpers::panic("Tried to read invalid GPU register. Index: %X\n", index); return 0; } - + using namespace PICA::InternalRegs; + if(index>=LightingLUTData0&&index<=LightingLUTData7){ + uint32_t ind = regs[LightingLUTIndex]; + uint32_t lut_id = (ind>>8)&(0x1f); + uint32_t lut_addr = ind&0xff; + uint32_t value = 0xffffffff; + if(lut_id>8)&(0x1f); + uint32_t lut_addr = ind&0xff; + if(lut_id=FR_LUT&&lut<=RR_LUT)lut-=1; + if(lut==SP_LUT)lut=8+light; + return texture(u_tex_lighting_lut,vec2(value,lut)).r; } vec3 regToColor(uint reg){ return vec3( @@ -306,17 +308,17 @@ const char* fragmentShader = R"( vec3 half_vector = normalize(normalize(light_vector)+view); - for(int i=0;i<7u;++i){ - if(bitfieldExtract(GPUREG_LIGHTING_CONFIG1,16+i,1)==0){ - uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE,i*4,3); + for(int c=0;c<7u;++c){ + if(bitfieldExtract(GPUREG_LIGHTING_CONFIG1,16+c,1)==0){ + uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE,c*4,3); float scale = float(1u<=6u) scale/=256.0; - uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT,i*4,3); - if(input_id==0u)d[i] = dot(normal,half_vector); - else if(input_id==1u)d[i] = dot(view,half_vector); - else if(input_id==2u)d[i] = dot(normal,view); - else if(input_id==3u)d[i] = dot(light_vector,normal); + uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT,c*4,3); + if(input_id==0u)d[c] = dot(normal,half_vector); + else if(input_id==1u)d[c] = dot(view,half_vector); + else if(input_id==2u)d[c] = dot(normal,view); + else if(input_id==3u)d[c] = dot(light_vector,normal); else if(input_id==4u){ uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146+0x10*light_id); uint GPUREG_LIGHTi_SPOTDIR_HIGH= readPicaReg(0x0147+0x10*light_id); @@ -325,16 +327,17 @@ const char* fragmentShader = R"( decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW,16,16),1,11), decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH,0,16),1,11) ); - d[i] = dot(-light_vector,spot_light_vector);// -L . P (aka Spotlight aka SP); + d[c] = dot(-light_vector,spot_light_vector);// -L . P (aka Spotlight aka SP); }else if(input_id==5u){ - d[i] = 1.0;//TODO: cos (aka CP); + d[c] = 1.0;//TODO: cos (aka CP); error_unimpl = true; - }else d[i] = 1.0; + }else d[c] = 1.0; - d[i] = lutLookup(i,d[i])*scale; - if(bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS,2*i,1)!=0u)d[i]=abs(d[i]); - }else d[i]=1.0; + d[c] = lutLookup(c,light_id,d[c]*0.5+0.5)*scale; + if(bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS,2*c,1)!=0u)d[c]=abs(d[c]); + }else d[c]=1.0; } + uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG,4,4); if(lookup_config==0){ d[D1_LUT] = 1.0; @@ -421,6 +424,8 @@ const char* fragmentShader = R"( if (tevUnimplementedSourceFlag) { // fragColour = vec4(1.0, 0.0, 1.0, 1.0); } + //fragColour.rg = texture(u_tex_lighting_lut,vec2(gl_FragCoord.x/200.,float(int(gl_FragCoord.y/2)%24))).rr; + // Get original depth value by converting from [near, far] = [0, 1] to [-1, 1] // We do this by converting to [0, 2] first and subtracting 1 to go to [-1, 1] @@ -566,6 +571,7 @@ void Renderer::initGraphicsContext() { glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex0"), 0); glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex1"), 1); glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex2"), 2); + glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex_lighting_lut"), 3); OpenGL::Shader vertDisplay(displayVertexShader, OpenGL::Vertex); OpenGL::Shader fragDisplay(displayFragmentShader, OpenGL::Fragment); @@ -610,6 +616,8 @@ void Renderer::initGraphicsContext() { // Create texture and framebuffer for the 3DS screen const u32 screenTextureWidth = 2 * 400; // Top screen is 400 pixels wide, bottom is 320 const u32 screenTextureHeight = 2 * 240; // Both screens are 240 pixels tall + + glGenTextures(1,&lightLUTTextureArray); auto prevTexture = OpenGL::getTex2D(); screenTexture.create(screenTextureWidth, screenTextureHeight, GL_RGBA8); @@ -739,6 +747,8 @@ void Renderer::bindTexturesToSlots() { tex.bind(); } + glActiveTexture(GL_TEXTURE0+3); + glBindTexture(GL_TEXTURE_1D_ARRAY,lightLUTTextureArray); glActiveTexture(GL_TEXTURE0); // Update the texture unit configuration uniform if it changed @@ -748,6 +758,22 @@ void Renderer::bindTexturesToSlots() { glUniform1ui(texUnitConfigLoc, texUnitConfig); } } +void Renderer::updateLightingLUT(){ + std::array u16_lightinglut; + for(int i=0;i vertices) { // The fourth type is meant to be "Geometry primitive". TODO: Find out what that is @@ -811,6 +837,7 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span bindTexturesToSlots(); //Upload Pica Registers glUniform1uiv(picaRegLoc,0x200-0x47,®s[0x47]); + if(gpu.lightingLUTDirty)updateLightingLUT(); // TODO: Actually use this float viewportWidth = f24::fromRaw(regs[PICA::InternalRegs::ViewportWidth] & 0xffffff).toFloat32() * 2.0; From df555372adcecdf82aff4a5842c089bd3e1041c0 Mon Sep 17 00:00:00 2001 From: Sky Date: Sun, 2 Jul 2023 21:04:29 -0700 Subject: [PATCH 11/19] [GL] Move quaternion to normal, et. al into vs --- src/core/renderer_gl/renderer_gl.cpp | 38 ++++++++++++++++------------ 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index eddc5126..f74488df 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -20,7 +20,9 @@ const char* vertexShader = R"( layout (location = 6) in vec3 a_view; layout (location = 7) in vec2 a_texcoord2; - out vec4 v_quaternion; + out vec3 v_normal; + out vec3 v_tangent; + out vec3 v_bitangent; out vec4 v_colour; out vec3 v_texcoord0; out vec2 v_texcoord1; @@ -49,6 +51,11 @@ const char* vertexShader = R"( float(abgr >> 24) ); } + vec3 rotateVec3ByQuaternion(vec3 v, vec4 q){ + vec3 u=q.xyz; + float s = q.w; + return 2.0*dot(u, v)*u + (s*s - dot(u, u))*v + 2.0*s*cross(u, v); + } void main() { gl_Position = a_coords; @@ -58,9 +65,12 @@ const char* vertexShader = R"( v_texcoord0 = vec3(a_texcoord0.x, 1.0 - a_texcoord0.y, a_texcoord0_w); v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y); v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y); - v_quaternion = a_quaternion; v_view = a_view; + v_normal = normalize(rotateVec3ByQuaternion(vec3(0.0,0.0,1.0), a_quaternion)); + v_tangent = normalize(rotateVec3ByQuaternion(vec3(1.0,0.0,0.0), a_quaternion)); + v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0,1.0,0.0), a_quaternion)); + for (int i = 0; i < 6; i++) { v_textureEnvColor[i] = abgr8888ToVec4(u_textureEnvColor[i]); } @@ -72,7 +82,9 @@ const char* vertexShader = R"( const char* fragmentShader = R"( #version 410 core - in vec4 v_quaternion; + in vec3 v_tangent; + in vec3 v_normal; + in vec3 v_bitangent; in vec4 v_colour; in vec3 v_texcoord0; in vec2 v_texcoord1; @@ -231,11 +243,6 @@ const char* fragmentShader = R"( float(bitfieldExtract(reg,00,8))/255. ); } - vec3 rotateVec3ByQuaternion(vec3 v, vec4 q){ - vec3 u=q.xyz; - float s = q.w; - return 2.0*dot(u, v)*u + (s*s - dot(u, u))*v + 2.0*s*cross(u, v); - } float decodeFP(uint hex, uint E, uint M){ uint width = M + E + 1u; uint bias = 128u - (1u << (E - 1u)); @@ -256,10 +263,9 @@ const char* fragmentShader = R"( // Quaternions describe a transformation from surface-local space to eye space. // In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1), // the tangent vector is (1,0,0), and the bitangent vector is (0,1,0). - vec4 quat = v_quaternion; - vec3 normal = normalize(rotateVec3ByQuaternion(vec3(0.0,0.0,1.0), quat)); - vec3 tangent = normalize(rotateVec3ByQuaternion(vec3(1.0,0.0,0.0), quat)); - vec3 bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0,1.0,0.0), quat)); + vec3 normal = normalize(v_normal ); + vec3 tangent = normalize(v_tangent ); + vec3 bitangent = normalize(v_bitangent); vec3 view = normalize(v_view); uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008F); @@ -297,11 +303,11 @@ const char* fragmentShader = R"( uint GPUREG_LIGHTi_VECTOR_HIGH= readPicaReg(0x0145+0x10*light_id); uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149+0x10*light_id); - vec3 light_vector = vec3( + vec3 light_vector = normalize(vec3( decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW,0,16),5,10), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW,16,16),5,10), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH,0,16),5,10) - ); + )); //Positional Light if(bitfieldExtract(GPUREG_LIGHTi_CONFIG,0,1)==0)error_unimpl = true; @@ -322,11 +328,11 @@ const char* fragmentShader = R"( else if(input_id==4u){ uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146+0x10*light_id); uint GPUREG_LIGHTi_SPOTDIR_HIGH= readPicaReg(0x0147+0x10*light_id); - vec3 spot_light_vector = vec3( + vec3 spot_light_vector = normalize(vec3( decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW,0,16),1,11), decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW,16,16),1,11), decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH,0,16),1,11) - ); + )); d[c] = dot(-light_vector,spot_light_vector);// -L . P (aka Spotlight aka SP); }else if(input_id==5u){ d[c] = 1.0;//TODO: cos (aka CP); From 157254e8a92155a12c1f66f1d3ca40e95331d9ef Mon Sep 17 00:00:00 2001 From: Sky Date: Sun, 2 Jul 2023 22:01:42 -0700 Subject: [PATCH 12/19] [GL] Set disabled light LUTs to 0 Fixes Kirby and Mario Kart --- src/core/renderer_gl/renderer_gl.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index f74488df..4d2778fd 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -346,22 +346,22 @@ const char* fragmentShader = R"( uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG,4,4); if(lookup_config==0){ - d[D1_LUT] = 1.0; - d[FR_LUT] = 1.0; + d[D1_LUT] = 0.0; + d[FR_LUT] = 0.0; d[RG_LUT]=d[RB_LUT]=d[RR_LUT]; }else if(lookup_config==1){ - d[D0_LUT] = 1.0; - d[D1_LUT] = 1.0; + d[D0_LUT] = 0.0; + d[D1_LUT] = 0.0; d[RG_LUT]=d[RB_LUT]=d[RR_LUT]; }else if(lookup_config==2){ - d[FR_LUT] = 1.0; - d[SP_LUT] = 1.0; + d[FR_LUT] = 0.0; + d[SP_LUT] = 0.0; d[RG_LUT]=d[RB_LUT]=d[RR_LUT]; }else if(lookup_config==3){ - d[SP_LUT] = 1.0; + d[SP_LUT] = 0.0; d[RG_LUT]=d[RB_LUT]=d[RR_LUT]=1.0; - }else if(lookup_config==4)d[FR_LUT] = 1.0; - else if(lookup_config==5)d[D1_LUT] = 1.0; + }else if(lookup_config==4)d[FR_LUT] = 0.0; + else if(lookup_config==5)d[D1_LUT] = 0.0; else if(lookup_config==6)d[RG_LUT]=d[RB_LUT]=d[RR_LUT]; float distance_factor = 1.0; //a From 50c0c7fea553c9b46eaa2615608a947686d34fa0 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 3 Jul 2023 02:02:18 +0300 Subject: [PATCH 13/19] Try to tone down warnings Co-Authored-By: Wunk --- CMakeLists.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6596ffe7..cccd9e2c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.10) set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED True) -if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 12) +if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 12) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fbracket-depth=4096") endif() @@ -13,6 +13,10 @@ endif() project(Alber) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format-nonliteral -Wno-format-security") +endif() + option(DISABLE_PANIC_DEV "Make a build with fewer and less intrusive asserts" OFF) option(GPU_DEBUG_INFO "Enable additional GPU debugging info" OFF) option(ENABLE_LTO "Enable link-time optimization" OFF) @@ -177,4 +181,4 @@ endif() if(ENABLE_USER_BUILD OR DISABLE_PANIC_DEV) target_compile_definitions(Alber PRIVATE PANDA3DS_LIMITED_PANICS=1) -endif() \ No newline at end of file +endif() From f262cf2836f9d232c51160b9b8c2a1ab7c5896f7 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 3 Jul 2023 17:25:09 +0300 Subject: [PATCH 14/19] Fix false positives in crypto code hopefully --- src/core/loader/ncch.cpp | 72 ++++++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 32 deletions(-) diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp index 5a94dd55..75e0196a 100644 --- a/src/core/loader/ncch.cpp +++ b/src/core/loader/ncch.cpp @@ -44,15 +44,20 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn exheaderInfo.offset = info.offset + 0x200; exheaderInfo.size = exheaderSize; exheaderInfo.hashRegionSize = 0; + exheaderInfo.encryptionInfo = std::nullopt; exeFS.offset = info.offset + u64(*(u32*)&header[0x1A0]) * mediaUnit; exeFS.size = u64(*(u32*)&header[0x1A4]) * mediaUnit; exeFS.hashRegionSize = u64(*(u32*)&header[0x1A8]) * mediaUnit; + exeFS.encryptionInfo = std::nullopt; romFS.offset = info.offset + u64(*(u32*)&header[0x1B0]) * mediaUnit; romFS.size = u64(*(u32*)&header[0x1B4]) * mediaUnit; romFS.hashRegionSize = u64(*(u32*)&header[0x1B8]) * mediaUnit; + romFS.encryptionInfo = std::nullopt; + // Shows whether we got the primary and secondary keys correctly + bool gotCryptoKeys = true; if (encrypted) { Crypto::AESKey primaryKeyY; Crypto::AESKey secondaryKeyY; @@ -61,44 +66,36 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn if (!seedCrypto) { secondaryKeyY = primaryKeyY; } else { - Helpers::panic("Seed crypto is not supported"); - return false; + Helpers::warn("Seed crypto is not supported"); + gotCryptoKeys = false; } auto primaryResult = getPrimaryKey(aesEngine, primaryKeyY); - - if (!primaryResult.first) { - Helpers::panic("getPrimaryKey failed!"); - return false; - } - - Crypto::AESKey primaryKey = primaryResult.second; - auto secondaryResult = getSecondaryKey(aesEngine, secondaryKeyY); - if (!secondaryResult.first) { - Helpers::panic("getSecondaryKey failed!"); - return false; + if (!primaryResult.first || !secondaryResult.first) { + gotCryptoKeys = false; + } else { + Crypto::AESKey primaryKey = primaryResult.second; + Crypto::AESKey secondaryKey = secondaryResult.second; + + EncryptionInfo encryptionInfoTmp; + encryptionInfoTmp.normalKey = primaryKey; + encryptionInfoTmp.initialCounter.fill(0); + + for (std::size_t i = 1; i <= sizeof(std::uint64_t) - 1; i++) { + encryptionInfoTmp.initialCounter[i] = header[0x108 + sizeof(std::uint64_t) - 1 - i]; + } + encryptionInfoTmp.initialCounter[8] = 1; + exheaderInfo.encryptionInfo = encryptionInfoTmp; + + encryptionInfoTmp.initialCounter[8] = 2; + exeFS.encryptionInfo = encryptionInfoTmp; + + encryptionInfoTmp.normalKey = secondaryKey; + encryptionInfoTmp.initialCounter[8] = 3; + romFS.encryptionInfo = encryptionInfoTmp; } - - Crypto::AESKey secondaryKey = secondaryResult.second; - - EncryptionInfo encryptionInfoTmp; - encryptionInfoTmp.normalKey = primaryKey; - encryptionInfoTmp.initialCounter.fill(0); - - for (std::size_t i = 1; i <= sizeof(std::uint64_t) - 1; i++) { - encryptionInfoTmp.initialCounter[i] = header[0x108 + sizeof(std::uint64_t) - 1 - i]; - } - encryptionInfoTmp.initialCounter[8] = 1; - exheaderInfo.encryptionInfo = encryptionInfoTmp; - - encryptionInfoTmp.initialCounter[8] = 2; - exeFS.encryptionInfo = encryptionInfoTmp; - - encryptionInfoTmp.normalKey = secondaryKey; - encryptionInfoTmp.initialCounter[8] = 3; - romFS.encryptionInfo = encryptionInfoTmp; } if (exheaderSize != 0) { @@ -117,7 +114,13 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn if (u32(programID) == u32(jumpID) && encrypted) { printf("NCSD is supposedly ecrypted but not actually encrypted\n"); encrypted = false; + + // Cartridge is not actually encrypted, set all of our encryption info structures to nullopt + exheaderInfo.encryptionInfo = std::nullopt; + romFS.encryptionInfo = std::nullopt; + exeFS.encryptionInfo = std::nullopt; } + // If it's truly encrypted, we need to read section again. if (encrypted) { if (!aesEngine.haveKeys()) { @@ -128,6 +131,11 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn return false; } + if (!gotCryptoKeys) { + Helpers::panic("ROM is encrypted but it seems we couldn't get either the primary or the secondary key"); + return false; + } + auto [success, bytes] = readFromFile(file, exheaderInfo, &exheader[0], 0, exheaderSize); if (!success || bytes != exheaderSize) { printf("Failed to read Extended NCCH header\n"); From 14356902abed2adc203ffb42e6dfa8f16435ef26 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 3 Jul 2023 19:50:42 +0300 Subject: [PATCH 15/19] [HID] Implement DisableAccelerometer/DisableGyroscopeLow --- include/services/hid.hpp | 2 ++ src/core/services/hid.cpp | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/include/services/hid.hpp b/include/services/hid.hpp index beadf7f1..70bae750 100644 --- a/include/services/hid.hpp +++ b/include/services/hid.hpp @@ -63,6 +63,8 @@ class HIDService { MAKE_LOG_FUNCTION(log, hidLogger) // Service commands + void disableAccelerometer(u32 messagePointer); + void disableGyroscopeLow(u32 messagePointer); void enableAccelerometer(u32 messagePointer); void enableGyroscopeLow(u32 messagePointer); void getGyroscopeLowCalibrateParam(u32 messagePointer); diff --git a/src/core/services/hid.cpp b/src/core/services/hid.cpp index 7993dc7a..27a078f2 100644 --- a/src/core/services/hid.cpp +++ b/src/core/services/hid.cpp @@ -7,7 +7,9 @@ namespace HIDCommands { enum : u32 { GetIPCHandles = 0x000A0000, EnableAccelerometer = 0x00110000, + DisableAccelerometer = 0x00120000, EnableGyroscopeLow = 0x00130000, + DisableGyroscopeLow = 0x00140000, GetGyroscopeLowRawToDpsCoefficient = 0x00150000, GetGyroscopeLowCalibrateParam = 0x00160000 }; @@ -36,6 +38,8 @@ void HIDService::reset() { void HIDService::handleSyncRequest(u32 messagePointer) { const u32 command = mem.read32(messagePointer); switch (command) { + case HIDCommands::DisableAccelerometer: disableAccelerometer(messagePointer); break; + case HIDCommands::DisableGyroscopeLow: disableGyroscopeLow(messagePointer); break; case HIDCommands::EnableAccelerometer: enableAccelerometer(messagePointer); break; case HIDCommands::EnableGyroscopeLow: enableGyroscopeLow(messagePointer); break; case HIDCommands::GetGyroscopeLowCalibrateParam: getGyroscopeLowCalibrateParam(messagePointer); break; @@ -53,6 +57,14 @@ void HIDService::enableAccelerometer(u32 messagePointer) { mem.write32(messagePointer + 4, Result::Success); } +void HIDService::disableAccelerometer(u32 messagePointer) { + log("HID::DisableAccelerometer\n"); + accelerometerEnabled = false; + + mem.write32(messagePointer, IPC::responseHeader(0x12, 1, 0)); + mem.write32(messagePointer + 4, Result::Success); +} + void HIDService::enableGyroscopeLow(u32 messagePointer) { log("HID::EnableGyroscopeLow\n"); gyroEnabled = true; @@ -61,6 +73,14 @@ void HIDService::enableGyroscopeLow(u32 messagePointer) { mem.write32(messagePointer + 4, Result::Success); } +void HIDService::disableGyroscopeLow(u32 messagePointer) { + log("HID::DisableGyroscopeLow\n"); + gyroEnabled = false; + + mem.write32(messagePointer, IPC::responseHeader(0x14, 1, 0)); + mem.write32(messagePointer + 4, Result::Success); +} + void HIDService::getGyroscopeLowCalibrateParam(u32 messagePointer) { log("HID::GetGyroscopeLowCalibrateParam\n"); constexpr s16 unit = 6700; // Approximately from Citra which took it from hardware From e854d4ba63c96e6930028947ce5aacb042b7b636 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 4 Jul 2023 18:15:27 +0300 Subject: [PATCH 16/19] Formatting --- include/PICA/gpu.hpp | 43 +++++++------------------ include/PICA/regs.hpp | 28 +++++++++++++++++ src/core/PICA/regs.cpp | 47 +++++++++++++++++----------- src/core/renderer_gl/renderer_gl.cpp | 41 ++++++++++++++---------- 4 files changed, 92 insertions(+), 67 deletions(-) diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index 2640ccce..ced2c557 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -69,39 +69,18 @@ class GPU { Renderer renderer; PicaVertex getImmediateModeVertex(); -public: - enum : u32 { - LIGHT_LUT_D0=0, - LIGHT_LUT_D1, - LIGHT_LUT_FR, - LIGHT_LUT_RB, - LIGHT_LUT_RG, - LIGHT_LUT_RR, - LIGHT_LUT_SP0=0x8, - LIGHT_LUT_SP1, - LIGHT_LUT_SP2, - LIGHT_LUT_SP3, - LIGHT_LUT_SP4, - LIGHT_LUT_SP5, - LIGHT_LUT_SP6, - LIGHT_LUT_SP7, - LIGHT_LUT_DA0=0x10, - LIGHT_LUT_DA1, - LIGHT_LUT_DA2, - LIGHT_LUT_DA3, - LIGHT_LUT_DA4, - LIGHT_LUT_DA5, - LIGHT_LUT_DA6, - LIGHT_LUT_DA7, - LIGHT_LUT_COUNT - }; - //256 entries per LUT with each LUT as its own row forming a 2D image 256xLIGHT_LUT_COUNT - //Encoded in PICA native format - std::array lightingLUT; - //Used to prevent uploading the lighting_lut on every draw call - //Set to true when the CPU writes to the lighting_lut - //Set to false by the renderer when the lighting_lut is uploaded ot the GPU + + public: + // 256 entries per LUT with each LUT as its own row forming a 2D image 256 * LUT_COUNT + // Encoded in PICA native format + static constexpr size_t LightingLutSize = PICA::Lights::LUT_Count * 256; + std::array lightingLUT; + + // Used to prevent uploading the lighting_lut on every draw call + // Set to true when the CPU writes to the lighting_lut + // Set to false by the renderer when the lighting_lut is uploaded ot the GPU bool lightingLUTDirty = false; + GPU(Memory& mem); void initGraphicsContext() { renderer.initGraphicsContext(); } void getGraphicsContext() { renderer.getGraphicsContext(); } diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index b4ff3498..0337cc1f 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -167,6 +167,34 @@ namespace PICA { }; } + namespace Lights { + enum : u32 { + LUT_D0 = 0, + LUT_D1, + LUT_FR, + LUT_RB, + LUT_RG, + LUT_RR, + LUT_SP0 = 0x8, + LUT_SP1, + LUT_SP2, + LUT_SP3, + LUT_SP4, + LUT_SP5, + LUT_SP6, + LUT_SP7, + LUT_DA0 = 0x10, + LUT_DA1, + LUT_DA2, + LUT_DA3, + LUT_DA4, + LUT_DA5, + LUT_DA6, + LUT_DA7, + LUT_Count + }; + } + enum class TextureFmt : u32 { RGBA8 = 0x0, RGB8 = 0x1, diff --git a/src/core/PICA/regs.cpp b/src/core/PICA/regs.cpp index f04f44f4..7cc097de 100644 --- a/src/core/PICA/regs.cpp +++ b/src/core/PICA/regs.cpp @@ -24,23 +24,29 @@ void GPU::writeReg(u32 address, u32 value) { } u32 GPU::readInternalReg(u32 index) { + using namespace PICA::InternalRegs; + if (index > regNum) { Helpers::panic("Tried to read invalid GPU register. Index: %X\n", index); return 0; } - using namespace PICA::InternalRegs; - if(index>=LightingLUTData0&&index<=LightingLUTData7){ - uint32_t ind = regs[LightingLUTIndex]; - uint32_t lut_id = (ind>>8)&(0x1f); - uint32_t lut_addr = ind&0xff; - uint32_t value = 0xffffffff; - if(lut_id= LightingLUTData0 && index <= LightingLUTData7) { + const uint32_t index = regs[LightingLUTIndex]; // Get full LUT index register + const uint32_t lutID = getBits<8, 5>(index); // Get which LUT we're actually writing to + uint32_t lutIndex = getBits<0, 8>(index); // And get the index inside the LUT we're writing to + uint32_t value = 0xffffffff; // Return value + + if (lutID < PICA::Lights::LUT_Count) { + value = lightingLUT[lutID * 256 + lutIndex]; } - lut_addr+=1; - regs[LightingLUTIndex]=(ind&~0xff)|(lut_addr&0xff); + + // Increment the bottom 8 bits of the lighting LUT index register + lutIndex += 1; + regs[LightingLUTIndex] = (index & ~0xff) | (lutIndex & 0xff); return value; } + return regs[index]; } @@ -111,16 +117,21 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { case LightingLUTData5: case LightingLUTData6: case LightingLUTData7:{ - uint32_t ind = regs[LightingLUTIndex]; - uint32_t lut_id = (ind>>8)&(0x1f); - uint32_t lut_addr = ind&0xff; - if(lut_id(index); // Get which LUT we're actually writing to + uint32_t lutIndex = getBits<0, 8>(index); // And get the index inside the LUT we're writing to + + if (lutID < PICA::Lights::LUT_Count) { + lightingLUT[lutID * 256 + lutIndex] = newValue; lightingLUTDirty = true; } - lut_addr+=1; - regs[LightingLUTIndex]=(ind&~0xff)|(lut_addr&0xff); - } break; + + // Increment the bottom 8 bits of the lighting LUT index register + lutIndex += 1; + regs[LightingLUTIndex] = (index & ~0xff) | (lutIndex & 0xff); + + break; + } case VertexFloatUniformIndex: shaderUnit.vs.setFloatUniformIndex(value); diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 0e7aa4b3..b5350ddc 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -5,8 +5,7 @@ using namespace Floats; using namespace Helpers; - -// This is all hacked up to display our first triangle +using namespace PICA; const char* vertexShader = R"( #version 410 core @@ -223,6 +222,7 @@ const char* fragmentShader = R"( return result; } + #define D0_LUT 0u #define D1_LUT 1u #define SP_LUT 2u @@ -281,7 +281,7 @@ const char* fragmentShader = R"( primary_color = vec4(vec3(0.0),1.0); secondary_color = vec4(vec3(0.0),1.0); - primary_color.rgb+= regToColor(GPUREG_LIGHTING_AMBIENT); + primary_color.rgb += regToColor(GPUREG_LIGHTING_AMBIENT); uint GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0); uint GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1); @@ -753,8 +753,8 @@ void Renderer::bindTexturesToSlots() { tex.bind(); } - glActiveTexture(GL_TEXTURE0+3); - glBindTexture(GL_TEXTURE_1D_ARRAY,lightLUTTextureArray); + glActiveTexture(GL_TEXTURE0 + 3); + glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); glActiveTexture(GL_TEXTURE0); // Update the texture unit configuration uniform if it changed @@ -765,20 +765,22 @@ void Renderer::bindTexturesToSlots() { } } void Renderer::updateLightingLUT(){ - std::array u16_lightinglut; - for(int i=0;i u16_lightinglut; + + for(int i = 0; i < gpu.lightingLUT.size(); i++){ + uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1); + u16_lightinglut[i] = value * 65535 / 4095; } - glActiveTexture(GL_TEXTURE0+3); - glBindTexture(GL_TEXTURE_1D_ARRAY,lightLUTTextureArray); - glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, gpu.LIGHT_LUT_COUNT,0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data()); + + glActiveTexture(GL_TEXTURE0 + 3); + glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); + glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data()); glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glActiveTexture(GL_TEXTURE0+0); - gpu.lightingLUTDirty=false; + glActiveTexture(GL_TEXTURE0); + gpu.lightingLUTDirty = false; } void Renderer::drawVertices(PICA::PrimType primType, std::span vertices) { @@ -841,9 +843,14 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span setupTextureEnvState(); bindTexturesToSlots(); - //Upload Pica Registers - glUniform1uiv(picaRegLoc,0x200-0x47,®s[0x47]); - if(gpu.lightingLUTDirty)updateLightingLUT(); + + // Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x47) + // The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates + glUniform1uiv(picaRegLoc, 0x200 - 0x47, ®s[0x47]); + + if (gpu.lightingLUTDirty) { + updateLightingLUT(); + } // TODO: Actually use this float viewportWidth = f24::fromRaw(regs[PICA::InternalRegs::ViewportWidth] & 0xffffff).toFloat32() * 2.0; From 0cc8d0d8a4cdc25e2ed9cbc5bb7005679dbbef1a Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 4 Jul 2023 18:47:56 +0300 Subject: [PATCH 17/19] Some GLSL formatting --- src/core/renderer_gl/renderer_gl.cpp | 180 +++++++++++++++------------ 1 file changed, 101 insertions(+), 79 deletions(-) diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index b5350ddc..17f02880 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -232,17 +232,25 @@ const char* fragmentShader = R"( #define RR_LUT 6u float lutLookup(uint lut, uint light, float value){ - if(lut>=FR_LUT&&lut<=RR_LUT)lut-=1; - if(lut==SP_LUT)lut=8+light; - return texture(u_tex_lighting_lut,vec2(value,lut)).r; + if (lut >= FR_LUT && lut <= RR_LUT) + lut-=1; + if (lut==SP_LUT) + lut=8+light; + return texture(u_tex_lighting_lut, vec2(value, lut)).r; } - vec3 regToColor(uint reg){ - return vec3( - float(bitfieldExtract(reg,20,8))/255., - float(bitfieldExtract(reg,10,8))/255., - float(bitfieldExtract(reg,00,8))/255. + + vec3 regToColor(uint reg) { + // Normalization scale to convert from [0...255] to [0.0...1.0] + const float scale = 1.0 / 255.0; + + return scale * vec3( + float(bitfieldExtract(reg, 20, 8)), + float(bitfieldExtract(reg, 10, 8)), + float(bitfieldExtract(reg, 00, 8)) ); } + + // Convert an arbitrary-width floating point literal to an f32 float decodeFP(uint hex, uint E, uint M){ uint width = M + E + 1u; uint bias = 128u - (1u << (E - 1u)); @@ -254,11 +262,14 @@ const char* fragmentShader = R"( if (exponent == (1u << E) - 1u) exponent = 255u; else exponent += bias; hex = sign | (mantissa << (23u - M)) | (exponent << 23u); - }else hex = sign; + } else { + hex = sign; + } + return uintBitsToFloat(hex); } - //Implements the following algorthm: https://mathb.in/26766 + // Implements the following algorthm: https://mathb.in/26766 void calcLighting(out vec4 primary_color, out vec4 secondary_color){ // Quaternions describe a transformation from surface-local space to eye space. // In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1), @@ -269,13 +280,13 @@ const char* fragmentShader = R"( vec3 view = normalize(v_view); uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008F); - if(bitfieldExtract(GPUREG_LIGHTING_ENABLE,0,1)==0){ + if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0){ primary_color = secondary_color = vec4(1.0); return; } + uint GPUREG_LIGHTING_AMBIENT = readPicaReg(0x01C0); - - uint GPUREG_LIGHTING_NUM_LIGHTS = (readPicaReg(0x01C2)&0x7u)+1; + uint GPUREG_LIGHTING_NUM_LIGHTS = (readPicaReg(0x01C2) & 0x7u) +1; uint GPUREG_LIGHTING_LIGHT_PERMUTATION = readPicaReg(0x01D9); primary_color = vec4(vec3(0.0),1.0); @@ -292,103 +303,114 @@ const char* fragmentShader = R"( bool error_unimpl = false; - for(uint i = 0; i=6u) scale/=256.0; + for(int c = 0; c < 7; c++){ + if(bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0){ + uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3); + float scale = float(1u << scale_id); + if (scale_id >= 6u) + scale/=256.0; - uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT,c*4,3); - if(input_id==0u)d[c] = dot(normal,half_vector); - else if(input_id==1u)d[c] = dot(view,half_vector); - else if(input_id==2u)d[c] = dot(normal,view); - else if(input_id==3u)d[c] = dot(light_vector,normal); - else if(input_id==4u){ - uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146+0x10*light_id); - uint GPUREG_LIGHTi_SPOTDIR_HIGH= readPicaReg(0x0147+0x10*light_id); + uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3); + if (input_id==0u) d[c] = dot(normal,half_vector); + else if (input_id==1u) d[c] = dot(view,half_vector); + else if (input_id==2u) d[c] = dot(normal,view); + else if (input_id==3u) d[c] = dot(light_vector,normal); + else if (input_id==4u){ + uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146 + 0x10 * light_id); + uint GPUREG_LIGHTi_SPOTDIR_HIGH= readPicaReg(0x0147 + 0x10 * light_id); vec3 spot_light_vector = normalize(vec3( - decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW,0,16),1,11), - decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW,16,16),1,11), - decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH,0,16),1,11) + decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1, 11), + decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1, 11), + decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1, 11) )); - d[c] = dot(-light_vector,spot_light_vector);// -L . P (aka Spotlight aka SP); - }else if(input_id==5u){ - d[c] = 1.0;//TODO: cos (aka CP); + d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP); + } else if (input_id == 5u) { + d[c] = 1.0; // TODO: cos (aka CP); error_unimpl = true; - }else d[c] = 1.0; + } else { + d[c] = 1.0; + } - d[c] = lutLookup(c,light_id,d[c]*0.5+0.5)*scale; - if(bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS,2*c,1)!=0u)d[c]=abs(d[c]); - }else d[c]=1.0; + d[c] = lutLookup(c, light_id, d[c] * 0.5 + 0.5) * scale; + if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) + d[c] = abs(d[c]); + } else { + d[c] = 1.0; + } } uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG,4,4); - if(lookup_config==0){ + if (lookup_config == 0) { d[D1_LUT] = 0.0; d[FR_LUT] = 0.0; - d[RG_LUT]=d[RB_LUT]=d[RR_LUT]; - }else if(lookup_config==1){ + d[RG_LUT]= d[RB_LUT] = d[RR_LUT]; + } else if(lookup_config == 1) { d[D0_LUT] = 0.0; d[D1_LUT] = 0.0; - d[RG_LUT]=d[RB_LUT]=d[RR_LUT]; - }else if(lookup_config==2){ + d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; + } else if(lookup_config == 2) { d[FR_LUT] = 0.0; d[SP_LUT] = 0.0; - d[RG_LUT]=d[RB_LUT]=d[RR_LUT]; - }else if(lookup_config==3){ + d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; + } else if(lookup_config == 3) { d[SP_LUT] = 0.0; - d[RG_LUT]=d[RB_LUT]=d[RR_LUT]=1.0; - }else if(lookup_config==4)d[FR_LUT] = 0.0; - else if(lookup_config==5)d[D1_LUT] = 0.0; - else if(lookup_config==6)d[RG_LUT]=d[RB_LUT]=d[RR_LUT]; + d[RG_LUT]= d[RB_LUT] = d[RR_LUT] = 1.0; + } else if (lookup_config == 4) { + d[FR_LUT] = 0.0; + } else if (lookup_config == 5) { + d[D1_LUT] = 0.0; + } else if (lookup_config == 6) { + d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; + } - float distance_factor = 1.0; //a - float indirect_factor = 1.0;//fi - float shadow_factor = 1.0; //o + float distance_factor = 1.0; // a + float indirect_factor = 1.0; // fi + float shadow_factor = 1.0; // o - float NdotL = dot(normal,light_vector); //Li*N + float NdotL = dot(normal, light_vector); //Li dot N - //Two sided diffuse - if(bitfieldExtract(GPUREG_LIGHTi_CONFIG,1,1)==0)NdotL=max(0.0,NdotL); - else NdotL=abs(NdotL); + // Two sided diffuse + if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0) NdotL = max(0.0, NdotL); + else NdotL = abs(NdotL); float light_factor = distance_factor*d[SP_LUT]*indirect_factor*shadow_factor; - primary_color.rgb += light_factor*(regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE)*NdotL); - secondary_color.rgb += light_factor*( - regToColor(GPUREG_LIGHTi_SPECULAR0)*d[D0_LUT]+ - regToColor(GPUREG_LIGHTi_SPECULAR1)*d[D1_LUT]*vec3(d[RR_LUT],d[RG_LUT],d[RB_LUT]) + primary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE)*NdotL); + secondary_color.rgb += light_factor * ( + regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] + + regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT]) ); } - uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0,2,1); - uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0,3,1); + uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1); + uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1); - if(fresnel_output1==1u) primary_color.a = d[FR_LUT]; - if(fresnel_output2==1u) secondary_color.a = d[FR_LUT]; + if (fresnel_output1 == 1u) primary_color.a = d[FR_LUT]; + if (fresnel_output2 == 1u) secondary_color.a = d[FR_LUT]; - if(error_unimpl){ + if (error_unimpl) { secondary_color = primary_color = vec4(1.0,0.,1.0,1.0); } } @@ -430,7 +452,7 @@ const char* fragmentShader = R"( if (tevUnimplementedSourceFlag) { // fragColour = vec4(1.0, 0.0, 1.0, 1.0); } - //fragColour.rg = texture(u_tex_lighting_lut,vec2(gl_FragCoord.x/200.,float(int(gl_FragCoord.y/2)%24))).rr; + // fragColour.rg = texture(u_tex_lighting_lut,vec2(gl_FragCoord.x/200.,float(int(gl_FragCoord.y/2)%24))).rr; // Get original depth value by converting from [near, far] = [0, 1] to [-1, 1] @@ -573,7 +595,7 @@ void Renderer::initGraphicsContext() { depthmapEnableLoc = OpenGL::uniformLocation(triangleProgram, "u_depthmapEnable"); picaRegLoc = OpenGL::uniformLocation(triangleProgram, "u_picaRegs"); - // Init sampler objects + // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex0"), 0); glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex1"), 1); glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex2"), 2); From f7ad66c7080370ba35db6654a1f142fc7709a6dc Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 4 Jul 2023 19:51:07 +0300 Subject: [PATCH 18/19] [PICA] Add some clipping definitions --- include/PICA/regs.hpp | 7 +++++++ include/opengl.hpp | 5 ++++- src/core/PICA/regs.cpp | 6 +++--- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index 0337cc1f..e1c9a819 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -10,6 +10,13 @@ namespace PICA { ViewportHeight = 0x43, ViewportInvh = 0x44, + // Clipping plane control + ClipEnable = 0x47, + ClipData0 = 0x48, + ClipData1 = 0x49, + ClipData2 = 0x4A, + ClipData3 = 0x4B, + DepthScale = 0x4D, DepthOffset = 0x4E, ShaderOutputCount = 0x4F, diff --git a/include/opengl.hpp b/include/opengl.hpp index 9d93078b..b259381b 100644 --- a/include/opengl.hpp +++ b/include/opengl.hpp @@ -1,5 +1,5 @@ /*************************************************************************** - * Copyright (C) 2022 PCSX-Redux authors * + * Copyright (C) 2022 PCSX-Redux & Panda3DS authors * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * @@ -524,6 +524,9 @@ namespace OpenGL { static void enableStencil() { glEnable(GL_STENCIL_TEST); } static void disableStencil() { glDisable(GL_STENCIL_TEST); } + static void enableClipPlane(GLuint index) { glEnable(GL_CLIP_DISTANCE0 + index); } + static void disableClipPlane(GLuint index) { glDisable(GL_CLIP_DISTANCE0 + index); } + static void setDepthFunc(DepthFunc func) { glDepthFunc(static_cast(func)); } enum Primitives { diff --git a/src/core/PICA/regs.cpp b/src/core/PICA/regs.cpp index 7cc097de..a0eb5adc 100644 --- a/src/core/PICA/regs.cpp +++ b/src/core/PICA/regs.cpp @@ -26,12 +26,12 @@ void GPU::writeReg(u32 address, u32 value) { u32 GPU::readInternalReg(u32 index) { using namespace PICA::InternalRegs; - if (index > regNum) { + if (index > regNum) [[unlikely]] { Helpers::panic("Tried to read invalid GPU register. Index: %X\n", index); return 0; } - else if (index >= LightingLUTData0 && index <= LightingLUTData7) { + else if (index >= LightingLUTData0 && index <= LightingLUTData7) [[unlikely]] { const uint32_t index = regs[LightingLUTIndex]; // Get full LUT index register const uint32_t lutID = getBits<8, 5>(index); // Get which LUT we're actually writing to uint32_t lutIndex = getBits<0, 8>(index); // And get the index inside the LUT we're writing to @@ -53,7 +53,7 @@ u32 GPU::readInternalReg(u32 index) { void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { using namespace PICA::InternalRegs; - if (index > regNum) { + if (index > regNum) [[unlikely]] { Helpers::panic("Tried to write to invalid GPU register. Index: %X, value: %08X\n", index, value); return; } From aa273894734999d902952bbc20b64a5f59e5d907 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 4 Jul 2023 20:33:14 +0300 Subject: [PATCH 19/19] [GL renderer] Implement clipping planes --- src/core/renderer_gl/renderer_gl.cpp | 85 +++++++++++++++++++++------- 1 file changed, 64 insertions(+), 21 deletions(-) diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 17f02880..589457f5 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -30,14 +30,16 @@ const char* vertexShader = R"( flat out vec4 v_textureEnvColor[6]; flat out vec4 v_textureEnvBufferColor; + out float gl_ClipDistance[2]; + // TEV uniforms uniform uint u_textureEnvColor[6]; uniform uint u_textureEnvBufferColor; - uniform uint u_picaRegs[0x200-0x47]; + uniform uint u_picaRegs[0x200 - 0x47]; - //Helper so that the implementation of u_pica_regs can be changed later + // Helper so that the implementation of u_pica_regs can be changed later uint readPicaReg(uint reg_addr){ - return u_picaRegs[reg_addr-0x47]; + return u_picaRegs[reg_addr - 0x47]; } vec4 abgr8888ToVec4(uint abgr) { @@ -50,10 +52,30 @@ const char* vertexShader = R"( float(abgr >> 24) ); } + vec3 rotateVec3ByQuaternion(vec3 v, vec4 q){ - vec3 u=q.xyz; + vec3 u = q.xyz; float s = q.w; - return 2.0*dot(u, v)*u + (s*s - dot(u, u))*v + 2.0*s*cross(u, v); + return 2.0 * dot(u, v) * u + (s * s - dot(u, u))* v + 2.0 * s * cross(u, v); + } + + // Convert an arbitrary-width floating point literal to an f32 + float decodeFP(uint hex, uint E, uint M){ + uint width = M + E + 1u; + uint bias = 128u - (1u << (E - 1u)); + uint exponent = (hex >> M) & ((1u << E) - 1u); + uint mantissa = hex & ((1u << M) - 1u); + uint sign = (hex >> (E + M)) << 31u; + + if ((hex & ((1u << (width - 1u)) - 1u)) != 0) { + if (exponent == (1u << E) - 1u) exponent = 255u; + else exponent += bias; + hex = sign | (mantissa << (23u - M)) | (exponent << 23u); + } else { + hex = sign; + } + + return uintBitsToFloat(hex); } void main() { @@ -66,15 +88,30 @@ const char* vertexShader = R"( v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y); v_view = a_view; - v_normal = normalize(rotateVec3ByQuaternion(vec3(0.0,0.0,1.0), a_quaternion)); - v_tangent = normalize(rotateVec3ByQuaternion(vec3(1.0,0.0,0.0), a_quaternion)); - v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0,1.0,0.0), a_quaternion)); + v_normal = normalize(rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), a_quaternion)); + v_tangent = normalize(rotateVec3ByQuaternion(vec3(1.0, 0.0, 0.0), a_quaternion)); + v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0, 1.0, 0.0), a_quaternion)); for (int i = 0; i < 6; i++) { v_textureEnvColor[i] = abgr8888ToVec4(u_textureEnvColor[i]); } v_textureEnvBufferColor = abgr8888ToVec4(u_textureEnvBufferColor); + + // Parse clipping plane registers + // The plane registers describe a clipping plane in the form of Ax + By + Cz + D = 0 + // With n = (A, B, C) being the normal vector and D being the origin point distance + // Therefore, for the second clipping plane, we can just pass the dot product of the clip vector and the input coordinates to gl_ClipDistance[1] + vec4 clipData = vec4( + decodeFP(readPicaReg(0x48) & 0xffffffu, 7, 16), + decodeFP(readPicaReg(0x49) & 0xffffffu, 7, 16), + decodeFP(readPicaReg(0x4A) & 0xffffffu, 7, 16), + decodeFP(readPicaReg(0x4B) & 0xffffffu, 7, 16) + ); + + // There's also another, always-on clipping plane based on vertex z + gl_ClipDistance[0] = -a_coords.z; + gl_ClipDistance[1] = dot(clipData, a_coords); } )"; @@ -114,11 +151,11 @@ const char* fragmentShader = R"( uniform sampler2D u_tex2; uniform sampler1DArray u_tex_lighting_lut; - uniform uint u_picaRegs[0x200-0x47]; + uniform uint u_picaRegs[0x200 - 0x47]; - //Helper so that the implementation of u_pica_regs can be changed later + // Helper so that the implementation of u_pica_regs can be changed later uint readPicaReg(uint reg_addr){ - return u_picaRegs[reg_addr-0x47]; + return u_picaRegs[reg_addr - 0x47]; } vec4 tevSources[16]; @@ -233,9 +270,9 @@ const char* fragmentShader = R"( float lutLookup(uint lut, uint light, float value){ if (lut >= FR_LUT && lut <= RR_LUT) - lut-=1; + lut -= 1; if (lut==SP_LUT) - lut=8+light; + lut = light + 8; return texture(u_tex_lighting_lut, vec2(value, lut)).r; } @@ -303,12 +340,12 @@ const char* fragmentShader = R"( bool error_unimpl = false; - for(uint i = 0; i < GPUREG_LIGHTING_NUM_LIGHTS; i++){ + for (uint i = 0; i < GPUREG_LIGHTING_NUM_LIGHTS; i++){ uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION,int(i*3),3); uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140 + 0x10 * light_id); uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141 + 0x10 * light_id); - uint GPUREG_LIGHTi_DIFFUSE = readPicaReg(0x0142 +0x10 * light_id); + uint GPUREG_LIGHTi_DIFFUSE = readPicaReg(0x0142 + 0x10 * light_id); uint GPUREG_LIGHTi_AMBIENT = readPicaReg(0x0143 + 0x10 * light_id); uint GPUREG_LIGHTi_VECTOR_LOW = readPicaReg(0x0144 + 0x10 * light_id); uint GPUREG_LIGHTi_VECTOR_HIGH= readPicaReg(0x0145 + 0x10 * light_id); @@ -334,11 +371,11 @@ const char* fragmentShader = R"( scale/=256.0; uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3); - if (input_id==0u) d[c] = dot(normal,half_vector); - else if (input_id==1u) d[c] = dot(view,half_vector); - else if (input_id==2u) d[c] = dot(normal,view); - else if (input_id==3u) d[c] = dot(light_vector,normal); - else if (input_id==4u){ + if (input_id == 0u) d[c] = dot(normal,half_vector); + else if (input_id == 1u) d[c] = dot(view,half_vector); + else if (input_id == 2u) d[c] = dot(normal,view); + else if (input_id == 3u) d[c] = dot(light_vector,normal); + else if (input_id == 4u){ uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146 + 0x10 * light_id); uint GPUREG_LIGHTi_SPOTDIR_HIGH= readPicaReg(0x0147 + 0x10 * light_id); vec3 spot_light_vector = normalize(vec3( @@ -828,6 +865,11 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span glUniform1ui(alphaControlLoc, alphaControl); } + OpenGL::enableClipPlane(0); // Clipping plane 0 is always enabled + if (regs[PICA::InternalRegs::ClipEnable] & 1) { + OpenGL::enableClipPlane(1); + } + setupBlending(); OpenGL::Framebuffer poop = getColourFBO(); poop.bind(OpenGL::DrawAndReadFramebuffer); @@ -904,7 +946,6 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span constexpr u32 topScreenBuffer = 0x1f000000; constexpr u32 bottomScreenBuffer = 0x1f05dc00; -// Quick hack to display top screen for now void Renderer::display() { OpenGL::disableScissor(); @@ -1000,6 +1041,8 @@ void Renderer::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 OpenGL::disableBlend(); OpenGL::disableDepth(); OpenGL::disableScissor(); + OpenGL::disableClipPlane(0); + OpenGL::disableClipPlane(1); displayProgram.use(); // Hack: Detect whether we are writing to the top or bottom screen by checking output gap and drawing to the proper part of the output texture