diff --git a/include/PICA/dynapica/shader_rec_emitter_x64.hpp b/include/PICA/dynapica/shader_rec_emitter_x64.hpp index d22ed371..0338911c 100644 --- a/include/PICA/dynapica/shader_rec_emitter_x64.hpp +++ b/include/PICA/dynapica/shader_rec_emitter_x64.hpp @@ -91,6 +91,7 @@ class ShaderEmitter : public Xbyak::CodeGenerator { void recCMP(const PICAShader& shader, u32 instruction); void recDP3(const PICAShader& shader, u32 instruction); void recDP4(const PICAShader& shader, u32 instruction); + void recDPH(const PICAShader& shader, u32 instruction); void recEMIT(const PICAShader& shader, u32 instruction); void recEND(const PICAShader& shader, u32 instruction); void recEX2(const PICAShader& shader, u32 instruction); @@ -111,7 +112,6 @@ class ShaderEmitter : public Xbyak::CodeGenerator { void recRSQ(const PICAShader& shader, u32 instruction); void recSETEMIT(const PICAShader& shader, u32 instruction); void recSGE(const PICAShader& shader, u32 instruction); - void recSGEI(const PICAShader& shader, u32 instruction); void recSLT(const PICAShader& shader, u32 instruction); MAKE_LOG_FUNCTION(log, shaderJITLogger) diff --git a/include/PICA/shader.hpp b/include/PICA/shader.hpp index 0f3154f1..48db777d 100644 --- a/include/PICA/shader.hpp +++ b/include/PICA/shader.hpp @@ -23,6 +23,7 @@ namespace ShaderOpcodes { LG2 = 0x06, LIT = 0x07, MUL = 0x08, + SGE = 0x09, SLT = 0x0A, FLR = 0x0B, MAX = 0x0C, diff --git a/include/kernel/kernel.hpp b/include/kernel/kernel.hpp index 2db7cdda..99687ee1 100644 --- a/include/kernel/kernel.hpp +++ b/include/kernel/kernel.hpp @@ -52,6 +52,9 @@ class Kernel { // Top 8 bits are the major version, bottom 8 are the minor version u16 kernelVersion = 0; + // Shows whether a reschedule will be need + bool needReschedule = false; + Handle makeArbiter(); Handle makeProcess(u32 id); Handle makePort(const char* name); @@ -73,7 +76,6 @@ private: void switchThread(int newThreadIndex); void sortThreads(); std::optional getNextThread(); - void switchToNextThread(); void rescheduleThreads(); bool canThreadRun(const Thread& t); bool shouldWaitOnObject(KernelObject* object); @@ -168,6 +170,15 @@ public: void serviceSVC(u32 svc); void reset(); + void requireReschedule() { needReschedule = true; } + + void evalReschedule() { + if (needReschedule) { + needReschedule = false; + rescheduleThreads(); + } + } + Handle makeObject(KernelObjectType type) { if (handleCounter > KernelHandles::Max) [[unlikely]] { Helpers::panic("Hlep we somehow created enough kernel objects to overflow this thing"); diff --git a/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp b/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp index 13eb630e..7bcf4b46 100644 --- a/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp +++ b/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp @@ -143,6 +143,7 @@ void ShaderEmitter::compileInstruction(const PICAShader& shaderUnit) { break; case ShaderOpcodes::DP3: recDP3(shaderUnit, instruction); break; case ShaderOpcodes::DP4: recDP4(shaderUnit, instruction); break; + case ShaderOpcodes::DPH: recDPH(shaderUnit, instruction); break; case ShaderOpcodes::END: recEND(shaderUnit, instruction); break; case ShaderOpcodes::EX2: recEX2(shaderUnit, instruction); break; case ShaderOpcodes::FLR: recFLR(shaderUnit, instruction); break; @@ -179,6 +180,10 @@ void ShaderEmitter::compileInstruction(const PICAShader& shaderUnit) { case ShaderOpcodes::SLTI: recSLT(shaderUnit, instruction); break; + case ShaderOpcodes::SGE: + case ShaderOpcodes::SGEI: + recSGE(shaderUnit, instruction); break; + default: Helpers::panic("Shader JIT: Unimplemented PICA opcode %X", opcode); } @@ -525,6 +530,30 @@ void ShaderEmitter::recDP4(const PICAShader& shader, u32 instruction) { storeRegister(src1_xmm, shader, dest, operandDescriptor); } +void ShaderEmitter::recDPH(const PICAShader& shader, u32 instruction) { + const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f]; + const u32 src1 = getBits<12, 7>(instruction); + const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment + const u32 idx = getBits<19, 2>(instruction); + const u32 dest = getBits<21, 5>(instruction); + + // TODO: Safe multiplication equivalent (Multiplication is not IEEE compliant on the PICA) + loadRegister<1>(src1_xmm, shader, src1, idx, operandDescriptor); + loadRegister<2>(src2_xmm, shader, src2, 0, operandDescriptor); + + // Attach 1.0 to the w component of src1 + if (haveSSE4_1) { + blendps(src1_xmm, xword[rip + onesVector], 0b1000); + } else { + movaps(scratch1, src1_xmm); + unpckhps(scratch1, xword[rip + onesVector]); + unpcklpd(src1_xmm, scratch1); + } + + dpps(src1_xmm, src2_xmm, 0b11111111); // 4-lane dot product between the 2 registers, store the result in all lanes of scratch1 similarly to PICA + storeRegister(src1_xmm, shader, dest, operandDescriptor); +} + void ShaderEmitter::recMAX(const PICAShader& shader, u32 instruction) { const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f]; const u32 src1 = getBits<12, 7>(instruction); @@ -656,6 +685,24 @@ void ShaderEmitter::recSLT(const PICAShader& shader, u32 instruction) { storeRegister(src1_xmm, shader, dest, operandDescriptor); } +void ShaderEmitter::recSGE(const PICAShader& shader, u32 instruction) { + const bool isSGEI = (instruction >> 26) == ShaderOpcodes::SGEI; + const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f]; + + const u32 src1 = isSGEI ? getBits<14, 5>(instruction) : getBits<12, 7>(instruction); + const u32 src2 = isSGEI ? getBits<7, 7>(instruction) : getBits<7, 5>(instruction); + const u32 idx = getBits<19, 2>(instruction); + const u32 dest = getBits<21, 5>(instruction); + + loadRegister<1>(src1_xmm, shader, src1, isSGEI ? 0 : idx, operandDescriptor); + loadRegister<2>(src2_xmm, shader, src2, isSGEI ? idx : 0, operandDescriptor); + + // SSE does not have a cmpgeps instruction so we turn src1 >= src2 to src2 <= src1, result in src2 + cmpleps(src2_xmm, src1_xmm); + andps(src2_xmm, xword[rip + onesVector]); + storeRegister(src2_xmm, shader, dest, operandDescriptor); +} + void ShaderEmitter::recCMP(const PICAShader& shader, u32 instruction) { const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f]; const u32 src1 = getBits<12, 7>(instruction); diff --git a/src/core/kernel/address_arbiter.cpp b/src/core/kernel/address_arbiter.cpp index 9fb9f7be..8c07b423 100644 --- a/src/core/kernel/address_arbiter.cpp +++ b/src/core/kernel/address_arbiter.cpp @@ -87,7 +87,7 @@ void Kernel::arbitrateAddress() { Helpers::panic("ArbitrateAddress: Unimplemented type %s", arbitrationTypeToString(type)); } - rescheduleThreads(); + requireReschedule(); } // Signal up to "threadCount" threads waiting on the arbiter indicated by "waitingAddress" diff --git a/src/core/kernel/events.cpp b/src/core/kernel/events.cpp index e117dd62..ca72add1 100644 --- a/src/core/kernel/events.cpp +++ b/src/core/kernel/events.cpp @@ -35,22 +35,15 @@ bool Kernel::signalEvent(Handle handle) { // Check if there's any thread waiting on this event if (event->waitlist != 0) { - // One-shot events get cleared once they are acquired by some thread and only wake up 1 thread at a time + wakeupAllThreads(event->waitlist, handle); + event->waitlist = 0; // No threads waiting; + if (event->resetType == ResetType::OneShot) { - int index = wakeupOneThread(event->waitlist, handle); // Wake up one thread with the highest priority - event->waitlist ^= (1ull << index); // Remove thread from waitlist event->fired = false; - } else { - wakeupAllThreads(event->waitlist, handle); - event->waitlist = 0; // No threads waiting; } - - // We must reschedule our threads if we signalled one. Some games such as FE: Awakening rely on this - // If this does not happen, we can have phenomena such as a thread waiting up a higher priority thread, - // and the higher priority thread just never running - rescheduleThreads(); } - + + rescheduleThreads(); return true; } @@ -121,7 +114,6 @@ void Kernel::waitSynchronization1() { if (!shouldWaitOnObject(object)) { acquireSyncObject(object, threads[currentThreadIndex]); // Acquire the object since it's ready regs[0] = Result::Success; - rescheduleThreads(); } else { // Timeout is 0, don't bother waiting, instantly timeout if (ns == 0) { @@ -141,7 +133,7 @@ void Kernel::waitSynchronization1() { // Add the current thread to the object's wait list object->getWaitlist() |= (1ull << currentThreadIndex); - switchToNextThread(); + requireReschedule(); } } @@ -204,14 +196,13 @@ void Kernel::waitSynchronizationN() { auto& t = threads[currentThreadIndex]; - // We only need to wait on one object. Easy...?! + // We only need to wait on one object. Easy. if (!waitAll) { // If there's ready objects, acquire the first one and return if (oneObjectReady) { regs[0] = Result::Success; regs[1] = firstReadyObjectIndex; // Return index of the acquired object acquireSyncObject(waitObjects[firstReadyObjectIndex].second, t); // Acquire object - rescheduleThreads(); return; } @@ -229,8 +220,8 @@ void Kernel::waitSynchronizationN() { waitObjects[i].second->getWaitlist() |= (1ull << currentThreadIndex); // And add the thread to the object's waitlist } - switchToNextThread(); + requireReschedule(); } else { - Helpers::panic("WaitSynchronizatioN with waitAll"); + Helpers::panic("WaitSynchronizationN with waitAll"); } } \ No newline at end of file diff --git a/src/core/kernel/kernel.cpp b/src/core/kernel/kernel.cpp index 8f3aeda0..c48c8f18 100644 --- a/src/core/kernel/kernel.cpp +++ b/src/core/kernel/kernel.cpp @@ -61,6 +61,8 @@ void Kernel::serviceSVC(u32 svc) { case 0x3D: outputDebugString(); break; default: Helpers::panic("Unimplemented svc: %X @ %08X", svc, regs[15]); break; } + + evalReschedule(); } void Kernel::setVersion(u8 major, u8 minor) { @@ -140,6 +142,8 @@ void Kernel::reset() { threadIndices.clear(); serviceManager.reset(); + needReschedule = false; + // Allocate handle #0 to a dummy object and make a main process object makeObject(KernelObjectType::Dummy); currentProcess = makeProcess(1); // Use ID = 1 for main process diff --git a/src/core/kernel/ports.cpp b/src/core/kernel/ports.cpp index a7351fd0..84c8cc05 100644 --- a/src/core/kernel/ports.cpp +++ b/src/core/kernel/ports.cpp @@ -76,6 +76,11 @@ void Kernel::sendSyncRequest() { u32 messagePointer = getTLSPointer() + 0x80; // The message is stored starting at TLS+0x80 logSVC("SendSyncRequest(session handle = %X)\n", handle); + // Service calls via SendSyncRequest and file access needs to put the caller to sleep for a given amount of time + // To make sure that the other threads don't get starved. Various games rely on this (including Sonic Boom: Shattering Crystal it seems) + constexpr u64 syncRequestDelayNs = 39000; + sleepThread(syncRequestDelayNs); + // The sync request is being sent at a service rather than whatever port, so have the service manager intercept it if (KernelHandles::isServiceHandle(handle)) { // The service call might cause a reschedule and change threads. Hence, set r0 before executing the service call diff --git a/src/core/kernel/threads.cpp b/src/core/kernel/threads.cpp index 587d5fc4..2e39b620 100644 --- a/src/core/kernel/threads.cpp +++ b/src/core/kernel/threads.cpp @@ -82,32 +82,26 @@ std::optional Kernel::getNextThread() { return std::nullopt; } -void Kernel::switchToNextThread() { - std::optional newThreadIndex = getNextThread(); - - if (!newThreadIndex.has_value()) { - log("Kernel tried to switch to the next thread but none found. Switching to random thread\n"); - assert(aliveThreadCount != 0); - Helpers::panic("rpog"); - - int index; - do { - index = rand() % threadCount; - } while (threads[index].status == ThreadStatus::Dead); // TODO: Pray this doesn't hang - - switchThread(index); - } else { - switchThread(newThreadIndex.value()); - } -} - -// See if there;s a higher priority, ready thread and switch to that +// See if there is a higher priority, ready thread and switch to that void Kernel::rescheduleThreads() { + Thread& current = threads[currentThreadIndex]; // Current running thread + + // If the current thread is running and hasn't gone to sleep or whatever, set it to Ready instead of Running + // So that getNextThread will evaluate it properly + if (current.status == ThreadStatus::Running) { + current.status = ThreadStatus::Ready; + } + ThreadStatus currentStatus = current.status; std::optional newThreadIndex = getNextThread(); - if (newThreadIndex.has_value() && newThreadIndex.value() != currentThreadIndex) { - threads[currentThreadIndex].status = ThreadStatus::Ready; + // Case 1: A thread can run + if (newThreadIndex.has_value()) { switchThread(newThreadIndex.value()); + } + + // Case 2: No other thread can run, straight to the idle thread + else { + switchThread(idleThreadIndex); } } @@ -184,6 +178,7 @@ void Kernel::releaseMutex(Mutex* moo) { // If the lock count reached 0 then the thread no longer owns the mootex and it can be given to a new one if (moo->lockCount == 0) { moo->locked = false; + if (moo->waitlist != 0) { int index = wakeupOneThread(moo->waitlist, moo->handle); // Wake up one thread and get its index moo->waitlist ^= (1ull << index); // Remove thread from waitlist @@ -194,7 +189,7 @@ void Kernel::releaseMutex(Mutex* moo) { moo->ownerThread = index; } - rescheduleThreads(); + requireReschedule(); } } @@ -210,7 +205,7 @@ void Kernel::sleepThreadOnArbiter(u32 waitingAddress) { t.status = ThreadStatus::WaitArbiter; t.waitingAddress = waitingAddress; - switchToNextThread(); + requireReschedule(); } // Acquires an object that is **ready to be acquired** without waiting on it @@ -226,7 +221,13 @@ void Kernel::acquireSyncObject(KernelObject* object, const Thread& thread) { case KernelObjectType::Mutex: { Mutex* moo = object->getData(); - moo->locked = true; // Set locked to true, whether it's false or not because who cares + + // Only reschedule if we're acquiring the mutex for the first time + if (!moo->locked) { + moo->locked = true; + requireReschedule(); + } + // Increment lock count by 1. If a thread acquires a mootex multiple times, it needs to release it until count == 0 // For the mootex to be free. moo->lockCount++; @@ -338,20 +339,31 @@ void Kernel::wakeupAllThreads(u64 waitlist, Handle handle) { void Kernel::sleepThread(s64 ns) { if (ns < 0) { Helpers::panic("Sleeping a thread for a negative amount of ns"); - } else if (ns == 0) { // Used when we want to force a thread switch - std::optional newThreadIndex = getNextThread(); - // If there's no other thread waiting, don't bother yielding - if (newThreadIndex.has_value()) { - threads[currentThreadIndex].status = ThreadStatus::Ready; - switchThread(newThreadIndex.value()); - } - } else { // If we're sleeping for > 0 ns + } else if (ns == 0) { + // TODO: This is garbage, but it works so eh we can keep it for now Thread& t = threads[currentThreadIndex]; + + // See if a thread other than this and the idle thread is waiting to run by temp marking the current function as dead and searching + // If there is another thread to run, then run it. Otherwise, go back to this thread, not to the idle thread + t.status = ThreadStatus::Dead; + auto nextThreadIndex = getNextThread(); + t.status = ThreadStatus::Ready; + + if (nextThreadIndex.has_value()) { + const auto index = nextThreadIndex.value(); + + if (index != idleThreadIndex) { + switchThread(index); + } + } + } else { // If we're sleeping for >= 0 ns + Thread& t = threads[currentThreadIndex]; + t.status = ThreadStatus::WaitSleep; t.waitingNanoseconds = ns; t.sleepTick = cpu.getTicks(); - switchToNextThread(); + requireReschedule(); } } @@ -374,7 +386,7 @@ void Kernel::createThread() { regs[0] = Result::Success; regs[1] = makeThread(entrypoint, initialSP, priority, id, arg, ThreadStatus::Ready); - rescheduleThreads(); + requireReschedule(); } // void SleepThread(s64 nanoseconds) @@ -448,7 +460,7 @@ void Kernel::setThreadPriority() { } } sortThreads(); - rescheduleThreads(); + requireReschedule(); } void Kernel::exitThread() { @@ -472,7 +484,7 @@ void Kernel::exitThread() { t.threadsWaitingForTermination = 0; // No other threads waiting } - switchToNextThread(); + requireReschedule(); } void Kernel::svcCreateMutex() { diff --git a/src/emulator.cpp b/src/emulator.cpp index fd5efe6b..2e7cd521 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -357,6 +357,8 @@ void Emulator::run() { hid.updateInputs(cpu.getTicks()); } + // TODO: Should this be uncommented? + // kernel.evalReschedule(); // Update inputs in the HID module SDL_GL_SwapWindow(window); diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag index f6461094..5b6e6830 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -228,10 +228,18 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5, 10) )); - // Positional Light - if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0) error_unimpl = true; + vec3 half_vector; - vec3 half_vector = normalize(normalize(light_vector) + view); + // Positional Light + if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0) { + error_unimpl = true; + // half_vector = normalize(normalize(light_vector + v_view) + view); + } + + // Directional light + else { + half_vector = normalize(normalize(light_vector) + view); + } for (int c = 0; c < 7; c++) { if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0) {