From 9db4642f666c1c46dd4f9f816472929cbe765bb7 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Tue, 2 Dec 2025 22:27:01 +0100 Subject: [PATCH 1/9] video_core: Scheduler priority pending operation queue (#3848) * Priority pending ops * Use priority operations on image download * clang-format * Simplify thread * I'm tired, it's too late :( --- .../renderer_vulkan/vk_scheduler.cpp | 29 +++++++++++++++ src/video_core/renderer_vulkan/vk_scheduler.h | 18 ++++++++++ .../texture_cache/texture_cache.cpp | 35 +++---------------- src/video_core/texture_cache/texture_cache.h | 10 ------ 4 files changed, 52 insertions(+), 40 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index cc8f6956d..fee0b408e 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -3,6 +3,7 @@ #include "common/assert.h" #include "common/debug.h" +#include "common/thread.h" #include "imgui/renderer/texture_manager.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -17,6 +18,8 @@ Scheduler::Scheduler(const Instance& instance) profiler_scope = reinterpret_cast(std::malloc(sizeof(tracy::VkCtxScope))); #endif AllocateWorkerCommandBuffers(); + priority_pending_ops_thread = + std::jthread(std::bind_front(&Scheduler::PriorityPendingOpsThread, this)); } Scheduler::~Scheduler() { @@ -167,6 +170,32 @@ void Scheduler::SubmitExecution(SubmitInfo& info) { PopPendingOperations(); } +void Scheduler::PriorityPendingOpsThread(std::stop_token stoken) { + Common::SetCurrentThreadName("shadPS4:GpuSchedPriorityPendingOpsRunner"); + + while (!stoken.stop_requested()) { + PendingOp op; + { + std::unique_lock lk(priority_pending_ops_mutex); + priority_pending_ops_cv.wait(lk, stoken, + [this] { return !priority_pending_ops.empty(); }); + if (stoken.stop_requested()) { + break; + } + + op = std::move(priority_pending_ops.front()); + priority_pending_ops.pop(); + } + + master_semaphore.Wait(op.gpu_tick); + if (stoken.stop_requested()) { + break; + } + + op.callback(); + } +} + void DynamicState::Commit(const Instance& instance, const vk::CommandBuffer& cmdbuf) { if (dirty_state.viewports) { dirty_state.viewports = false; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 506b84159..aff299e54 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -5,6 +5,7 @@ #include #include +#include #include #include "common/unique_function.h" @@ -401,10 +402,21 @@ public: } /// Defers an operation until the gpu has reached the current cpu tick. + /// Will be run when submitting or calling PopPendingOperations. void DeferOperation(Common::UniqueFunction&& func) { pending_ops.emplace(std::move(func), CurrentTick()); } + /// Defers an operation until the gpu has reached the current cpu tick. + /// Runs as soon as possible in another thread. + void DeferPriorityOperation(Common::UniqueFunction&& func) { + { + std::unique_lock lk(priority_pending_ops_mutex); + priority_pending_ops.emplace(std::move(func), CurrentTick()); + } + priority_pending_ops_cv.notify_one(); + } + static std::mutex submit_mutex; private: @@ -412,6 +424,8 @@ private: void SubmitExecution(SubmitInfo& info); + void PriorityPendingOpsThread(std::stop_token stoken); + private: const Instance& instance; MasterSemaphore master_semaphore; @@ -424,6 +438,10 @@ private: u64 gpu_tick; }; std::queue pending_ops; + std::queue priority_pending_ops; + std::mutex priority_pending_ops_mutex; + std::condition_variable_any priority_pending_ops_cv; + std::jthread priority_pending_ops_thread; RenderState render_state; bool is_rendering = false; tracy::VkCtxScope* profiler_scope{}; diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index c7604995a..17c7e67b3 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -52,9 +52,6 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& std::max(std::min(device_local_memory - min_vacancy_critical, min_spacing_critical), DEFAULT_CRITICAL_GC_MEMORY)); trigger_gc_memory = static_cast((device_local_memory - mem_threshold) / 2); - - downloaded_images_thread = - std::jthread([&](const std::stop_token& token) { DownloadedImagesThread(token); }); } TextureCache::~TextureCache() = default; @@ -125,33 +122,11 @@ void TextureCache::DownloadImageMemory(ImageId image_id) { cmdbuf.copyImageToBuffer(image.GetImage(), vk::ImageLayout::eTransferSrcOptimal, download_buffer.Handle(), image_download); - { - std::unique_lock lock(downloaded_images_mutex); - downloaded_images_queue.emplace(scheduler.CurrentTick(), image.info.guest_address, download, - download_size); - downloaded_images_cv.notify_one(); - } -} - -void TextureCache::DownloadedImagesThread(const std::stop_token& token) { - auto* memory = Core::Memory::Instance(); - while (!token.stop_requested()) { - DownloadedImage image; - { - std::unique_lock lock{downloaded_images_mutex}; - downloaded_images_cv.wait(lock, token, - [this] { return !downloaded_images_queue.empty(); }); - if (token.stop_requested()) { - break; - } - image = downloaded_images_queue.front(); - downloaded_images_queue.pop(); - } - - scheduler.GetMasterSemaphore()->Wait(image.tick); - memory->TryWriteBacking(std::bit_cast(image.device_addr), image.download, - image.download_size); - } + scheduler.DeferPriorityOperation( + [this, device_addr = image.info.guest_address, download, download_size] { + Core::Memory::Instance()->TryWriteBacking(std::bit_cast(device_addr), download, + download_size); + }); } void TextureCache::MarkAsMaybeDirty(ImageId image_id, Image& image) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 9d25069db..141ac938f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -314,16 +314,6 @@ private: Common::LeastRecentlyUsedCache lru_cache; PageTable page_table; std::mutex mutex; - struct DownloadedImage { - u64 tick; - VAddr device_addr; - void* download; - size_t download_size; - }; - std::queue downloaded_images_queue; - std::mutex downloaded_images_mutex; - std::condition_variable_any downloaded_images_cv; - std::jthread downloaded_images_thread; struct MetaDataInfo { enum class Type { CMask, From 98fd0689ac46250debd536dcd16b6cf11dfb159d Mon Sep 17 00:00:00 2001 From: kalaposfos13 <153381648+kalaposfos13@users.noreply.github.com> Date: Wed, 3 Dec 2025 14:05:19 +0100 Subject: [PATCH 2/9] Revert non-Linux parts of #3819 (#3852) * Revert non-Linux parts of #3819 * More OpenOrbis stuff that I couldn't be bothered to put in a new PR --- src/core/libraries/fiber/fiber.cpp | 6 +++--- src/core/libraries/kernel/threads/pthread.cpp | 4 ++++ src/core/linker.cpp | 4 ++-- src/core/tls.cpp | 18 ++++++++++++++++++ src/core/tls.h | 3 +++ 5 files changed, 30 insertions(+), 5 deletions(-) diff --git a/src/core/libraries/fiber/fiber.cpp b/src/core/libraries/fiber/fiber.cpp index 776792041..2ebfbd244 100644 --- a/src/core/libraries/fiber/fiber.cpp +++ b/src/core/libraries/fiber/fiber.cpp @@ -6,8 +6,8 @@ #include "common/elf_info.h" #include "common/logging/log.h" #include "core/libraries/fiber/fiber_error.h" -#include "core/libraries/kernel/threads/pthread.h" #include "core/libraries/libs.h" +#include "core/tls.h" namespace Libraries::Fiber { @@ -20,7 +20,7 @@ static constexpr u64 kFiberStackSizeCheck = 0xdeadbeefdeadbeef; static std::atomic context_size_check = false; OrbisFiberContext* GetFiberContext() { - return Libraries::Kernel::g_curthread->tcb->tcb_fiber; + return Core::GetTcbBase()->tcb_fiber; } extern "C" s32 PS4_SYSV_ABI _sceFiberSetJmp(OrbisFiberContext* ctx) asm("_sceFiberSetJmp"); @@ -269,7 +269,7 @@ s32 PS4_SYSV_ABI sceFiberRunImpl(OrbisFiber* fiber, void* addr_context, u64 size return ORBIS_FIBER_ERROR_INVALID; } - Core::Tcb* tcb = Libraries::Kernel::g_curthread->tcb; + Core::Tcb* tcb = Core::GetTcbBase(); if (tcb->tcb_fiber) { return ORBIS_FIBER_ERROR_PERMISSION; } diff --git a/src/core/libraries/kernel/threads/pthread.cpp b/src/core/libraries/kernel/threads/pthread.cpp index 8ab8b72c3..6c11eebc2 100644 --- a/src/core/libraries/kernel/threads/pthread.cpp +++ b/src/core/libraries/kernel/threads/pthread.cpp @@ -663,6 +663,10 @@ void RegisterThread(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("Z4QosVuAsA0", "libkernel", 1, "libkernel", posix_pthread_once); LIB_FUNCTION("EotR8a3ASf4", "libkernel", 1, "libkernel", posix_pthread_self); LIB_FUNCTION("OxhIB8LB-PQ", "libkernel", 1, "libkernel", posix_pthread_create); + LIB_FUNCTION("lZzFeSxPl08", "libkernel", 1, "libkernel", posix_pthread_setcancelstate); + LIB_FUNCTION("CBNtXOoef-E", "libkernel", 1, "libkernel", posix_sched_get_priority_max); + LIB_FUNCTION("m0iS6jNsXds", "libkernel", 1, "libkernel", posix_sched_get_priority_min); + LIB_FUNCTION("Xs9hdiD7sAA", "libkernel", 1, "libkernel", posix_pthread_setschedparam); LIB_FUNCTION("+U1R4WtXvoc", "libkernel", 1, "libkernel", posix_pthread_detach); LIB_FUNCTION("7Xl257M4VNI", "libkernel", 1, "libkernel", posix_pthread_equal); LIB_FUNCTION("h9CcP3J0oVM", "libkernel", 1, "libkernel", posix_pthread_join); diff --git a/src/core/linker.cpp b/src/core/linker.cpp index b7c9a2895..ac6b37769 100644 --- a/src/core/linker.cpp +++ b/src/core/linker.cpp @@ -368,7 +368,7 @@ bool Linker::Resolve(const std::string& name, Loader::SymbolType sym_type, Modul void* Linker::TlsGetAddr(u64 module_index, u64 offset) { std::scoped_lock lk{mutex}; - DtvEntry* dtv_table = Libraries::Kernel::g_curthread->tcb->tcb_dtv; + DtvEntry* dtv_table = GetTcbBase()->tcb_dtv; if (dtv_table[0].counter != dtv_generation_counter) { // Generation counter changed, a dynamic module was either loaded or unloaded. const u32 old_num_dtvs = dtv_table[1].counter; @@ -381,7 +381,7 @@ void* Linker::TlsGetAddr(u64 module_index, u64 offset) { delete[] dtv_table; // Update TCB pointer. - Libraries::Kernel::g_curthread->tcb->tcb_dtv = new_dtv_table; + GetTcbBase()->tcb_dtv = new_dtv_table; dtv_table = new_dtv_table; } diff --git a/src/core/tls.cpp b/src/core/tls.cpp index bcefd6f25..57ed20f38 100644 --- a/src/core/tls.cpp +++ b/src/core/tls.cpp @@ -46,6 +46,10 @@ void SetTcbBase(void* image_address) { ASSERT(result != 0); } +Tcb* GetTcbBase() { + return reinterpret_cast(TlsGetValue(GetTcbKey())); +} + #elif defined(__APPLE__) && defined(ARCH_X86_64) // Apple x86_64 @@ -145,6 +149,12 @@ void SetTcbBase(void* image_address) { "Failed to store thread LDT page pointer: {}", errno); } +Tcb* GetTcbBase() { + Tcb* tcb; + asm volatile("mov %%fs:0x0, %0" : "=r"(tcb)); + return tcb; +} + #elif defined(ARCH_X86_64) // Other POSIX x86_64 @@ -154,6 +164,10 @@ void SetTcbBase(void* image_address) { ASSERT_MSG(ret == 0, "Failed to set GS base: errno {}", errno); } +Tcb* GetTcbBase() { + return Libraries::Kernel::g_curthread->tcb; +} + #else // POSIX non-x86_64 @@ -176,6 +190,10 @@ void SetTcbBase(void* image_address) { ASSERT(pthread_setspecific(GetTcbKey(), image_address) == 0); } +Tcb* GetTcbBase() { + return static_cast(pthread_getspecific(GetTcbKey())); +} + #endif thread_local std::once_flag init_tls_flag; diff --git a/src/core/tls.h b/src/core/tls.h index 0ae512a04..83940be7a 100644 --- a/src/core/tls.h +++ b/src/core/tls.h @@ -39,6 +39,9 @@ u32 GetTcbKey(); /// Sets the data pointer to the TCB block. void SetTcbBase(void* image_address); +/// Retrieves Tcb structure for the calling thread. +Tcb* GetTcbBase(); + /// Makes sure TLS is initialized for the thread before entering guest. void EnsureThreadInitialized(); From 9e80cde60d6805232653bd803320489674ce12ad Mon Sep 17 00:00:00 2001 From: Odukoya Abdullahi Ademola Date: Thu, 4 Dec 2025 09:50:01 +0100 Subject: [PATCH 3/9] Implement http uri escape unescape (#3853) * Implement sceHttpUriEscape and sceHttpUriUnescape * Implement sceHttpUriEscape and sceHttpUriUnescape * edge case --------- Co-authored-by: Pirky10 --- src/core/libraries/network/http.cpp | 123 +++++++++++++++++++++++++++- src/core/libraries/network/http.h | 2 +- 2 files changed, 121 insertions(+), 4 deletions(-) diff --git a/src/core/libraries/network/http.cpp b/src/core/libraries/network/http.cpp index 1ae48dfed..0fb81c639 100644 --- a/src/core/libraries/network/http.cpp +++ b/src/core/libraries/network/http.cpp @@ -712,8 +712,61 @@ int PS4_SYSV_ABI sceHttpUriCopy() { return ORBIS_OK; } -int PS4_SYSV_ABI sceHttpUriEscape() { - LOG_ERROR(Lib_Http, "(STUBBED) called"); +int PS4_SYSV_ABI sceHttpUriEscape(char* out, u64* require, u64 prepare, const char* in) { + LOG_TRACE(Lib_Http, "called"); + + if (!in) { + LOG_ERROR(Lib_Http, "Invalid input string"); + return ORBIS_HTTP_ERROR_INVALID_VALUE; + } + + auto IsUnreserved = [](unsigned char c) -> bool { + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || + c == '-' || c == '_' || c == '.' || c == '~'; + }; + + u64 needed = 0; + const char* src = in; + while (*src) { + unsigned char c = static_cast(*src); + if (IsUnreserved(c)) { + needed++; + } else { + needed += 3; // %XX format + } + src++; + } + needed++; // null terminator + + if (require) { + *require = needed; + } + + if (!out) { + return ORBIS_OK; + } + + if (prepare < needed) { + LOG_ERROR(Lib_Http, "Buffer too small: need {} but only {} available", needed, prepare); + return ORBIS_HTTP_ERROR_OUT_OF_MEMORY; + } + + static const char hex_chars[] = "0123456789ABCDEF"; + src = in; + char* dst = out; + while (*src) { + unsigned char c = static_cast(*src); + if (IsUnreserved(c)) { + *dst++ = *src; + } else { + *dst++ = '%'; + *dst++ = hex_chars[(c >> 4) & 0x0F]; + *dst++ = hex_chars[c & 0x0F]; + } + src++; + } + *dst = '\0'; + return ORBIS_OK; } @@ -1077,7 +1130,71 @@ int PS4_SYSV_ABI sceHttpUriSweepPath(char* dst, const char* src, u64 srcSize) { } int PS4_SYSV_ABI sceHttpUriUnescape(char* out, u64* require, u64 prepare, const char* in) { - LOG_ERROR(Lib_Http, "(STUBBED) called"); + LOG_TRACE(Lib_Http, "called"); + + if (!in) { + LOG_ERROR(Lib_Http, "Invalid input string"); + return ORBIS_HTTP_ERROR_INVALID_VALUE; + } + + // Locale-independent hex digit check + auto IsHex = [](char c) -> bool { + return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); + }; + + // Convert hex char to int value + auto HexToInt = [](char c) -> int { + if (c >= '0' && c <= '9') + return c - '0'; + if (c >= 'A' && c <= 'F') + return c - 'A' + 10; + if (c >= 'a' && c <= 'f') + return c - 'a' + 10; + return 0; + }; + + // Check for valid percent-encoded sequence (%XX) + auto IsValidPercentSequence = [&](const char* s) -> bool { + return s[0] == '%' && s[1] != '\0' && s[2] != '\0' && IsHex(s[1]) && IsHex(s[2]); + }; + + u64 needed = 0; + const char* src = in; + while (*src) { + if (IsValidPercentSequence(src)) { + src += 3; + } else { + src++; + } + needed++; + } + needed++; // null terminator + + if (require) { + *require = needed; + } + + if (!out) { + return ORBIS_OK; + } + + if (prepare < needed) { + LOG_ERROR(Lib_Http, "Buffer too small: need {} but only {} available", needed, prepare); + return ORBIS_HTTP_ERROR_OUT_OF_MEMORY; + } + + src = in; + char* dst = out; + while (*src) { + if (IsValidPercentSequence(src)) { + *dst++ = static_cast((HexToInt(src[1]) << 4) | HexToInt(src[2])); + src += 3; + } else { + *dst++ = *src++; + } + } + *dst = '\0'; + return ORBIS_OK; } diff --git a/src/core/libraries/network/http.h b/src/core/libraries/network/http.h index 701bb0e05..2ad5e171f 100644 --- a/src/core/libraries/network/http.h +++ b/src/core/libraries/network/http.h @@ -148,7 +148,7 @@ int PS4_SYSV_ABI sceHttpUnsetEpoll(); int PS4_SYSV_ABI sceHttpUriBuild(char* out, u64* require, u64 prepare, const OrbisHttpUriElement* srcElement, u32 option); int PS4_SYSV_ABI sceHttpUriCopy(); -int PS4_SYSV_ABI sceHttpUriEscape(); +int PS4_SYSV_ABI sceHttpUriEscape(char* out, u64* require, u64 prepare, const char* in); int PS4_SYSV_ABI sceHttpUriMerge(char* mergedUrl, char* url, char* relativeUri, u64* require, u64 prepare, u32 option); int PS4_SYSV_ABI sceHttpUriParse(OrbisHttpUriElement* out, const char* srcUri, void* pool, From 5183cbe6867c241e75632afbfe6ea3438fcf1316 Mon Sep 17 00:00:00 2001 From: Odukoya Abdullahi Ademola Date: Thu, 4 Dec 2025 09:50:24 +0100 Subject: [PATCH 4/9] sceHttpUriSweepPath (#3854) --- src/core/libraries/network/http.cpp | 89 ++++++++++++++++++++++++++++- 1 file changed, 88 insertions(+), 1 deletion(-) diff --git a/src/core/libraries/network/http.cpp b/src/core/libraries/network/http.cpp index 0fb81c639..ebb10db68 100644 --- a/src/core/libraries/network/http.cpp +++ b/src/core/libraries/network/http.cpp @@ -1125,7 +1125,94 @@ int PS4_SYSV_ABI sceHttpUriParse(OrbisHttpUriElement* out, const char* srcUri, v } int PS4_SYSV_ABI sceHttpUriSweepPath(char* dst, const char* src, u64 srcSize) { - LOG_ERROR(Lib_Http, "(STUBBED) called"); + LOG_TRACE(Lib_Http, "called"); + + if (!dst || !src) { + LOG_ERROR(Lib_Http, "Invalid parameters"); + return ORBIS_HTTP_ERROR_INVALID_VALUE; + } + + if (srcSize == 0) { + dst[0] = '\0'; + return ORBIS_OK; + } + + u64 len = 0; + while (len < srcSize && src[len] != '\0') { + len++; + } + + for (u64 i = 0; i < len; i++) { + dst[i] = src[i]; + } + dst[len] = '\0'; + + char* read = dst; + char* write = dst; + + while (*read) { + if (read[0] == '.' && read[1] == '.' && read[2] == '/') { + read += 3; + continue; + } + + if (read[0] == '.' && read[1] == '/') { + read += 2; + continue; + } + + if (read[0] == '/' && read[1] == '.' && read[2] == '/') { + read += 2; + continue; + } + + if (read[0] == '/' && read[1] == '.' && read[2] == '\0') { + if (write == dst) { + *write++ = '/'; + } + break; + } + + bool is_dotdot_mid = (read[0] == '/' && read[1] == '.' && read[2] == '.' && read[3] == '/'); + bool is_dotdot_end = + (read[0] == '/' && read[1] == '.' && read[2] == '.' && read[3] == '\0'); + + if (is_dotdot_mid || is_dotdot_end) { + if (write > dst) { + if (*(write - 1) == '/') { + write--; + } + while (write > dst && *(write - 1) != '/') { + write--; + } + + if (is_dotdot_mid && write > dst) { + write--; + } + } + + if (is_dotdot_mid) { + read += 3; + } else { + break; + } + continue; + } + + if ((read[0] == '.' && read[1] == '\0') || + (read[0] == '.' && read[1] == '.' && read[2] == '\0')) { + break; + } + + if (read[0] == '/') { + *write++ = *read++; + } + while (*read && *read != '/') { + *write++ = *read++; + } + } + + *write = '\0'; return ORBIS_OK; } From d3ad728ac0ec033b13310b38269b2622f98b6697 Mon Sep 17 00:00:00 2001 From: TheTurtle Date: Sun, 7 Dec 2025 01:11:29 +0200 Subject: [PATCH 5/9] vector_alu: Handle -1 as src1 in v_cmp_u64 (#3855) --- .../frontend/translate/vector_alu.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 94cefb958..0803647a2 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -1050,7 +1050,14 @@ void Translator::V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const } void Translator::V_CMP_U64(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst) { - ASSERT(inst.src[1].field == OperandField::ConstZero); + const bool is_zero = inst.src[1].field == OperandField::ConstZero; + const bool is_neg_one = inst.src[1].field == OperandField::SignedConstIntNeg; + ASSERT(is_zero || is_neg_one); + if (is_neg_one) { + ASSERT_MSG(-s32(inst.src[1].code) + SignedConstIntNegMin - 1 == -1, + "SignedConstIntNeg must be -1"); + } + const IR::U1 src0 = [&] { switch (inst.src[0].field) { case OperandField::ScalarGPR: @@ -1064,10 +1071,11 @@ void Translator::V_CMP_U64(ConditionOp op, bool is_signed, bool set_exec, const const IR::U1 result = [&] { switch (op) { case ConditionOp::EQ: - return ir.LogicalNot(src0); + return is_zero ? ir.LogicalNot(src0) : src0; case ConditionOp::LG: // NE - return src0; + return is_zero ? src0 : ir.LogicalNot(src0); case ConditionOp::GT: + ASSERT(is_zero); return ir.GroupAny(ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code))); default: UNREACHABLE_MSG("Unsupported V_CMP_U64 condition operation: {}", u32(op)); From 391d30cbb1ba6a9b69166a465387a9c8ea177d91 Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Sun, 7 Dec 2025 00:47:39 -0600 Subject: [PATCH 6/9] cpu_patches: Patch stack canary accesses (#3857) * Patch stack checks done using fs:[0x28] Additionally adds support for multiple patches per instruction, since this makes two separate patches we need to conditionally perform for mov instructions. * Missing include * Disable patches for Apple Mac can use their native FS segment directly, so these patches aren't needed * Oops --- src/core/cpu_patches.cpp | 125 +++++++++++++++++++++++++-------------- 1 file changed, 80 insertions(+), 45 deletions(-) diff --git a/src/core/cpu_patches.cpp b/src/core/cpu_patches.cpp index 8c0897a48..2788cfe58 100644 --- a/src/core/cpu_patches.cpp +++ b/src/core/cpu_patches.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -122,6 +123,30 @@ static void GenerateTcbAccess(void* /* address */, const ZydisDecodedOperand* op #endif } +static bool FilterStackCheck(const ZydisDecodedOperand* operands) { + const auto& dst_op = operands[0]; + const auto& src_op = operands[1]; + + // Some compilers emit stack checks by starting a function with + // 'mov (64-bit register), fs:[0x28]', then checking with `xor (64-bit register), fs:[0x28]` + return src_op.type == ZYDIS_OPERAND_TYPE_MEMORY && src_op.mem.segment == ZYDIS_REGISTER_FS && + src_op.mem.base == ZYDIS_REGISTER_NONE && src_op.mem.index == ZYDIS_REGISTER_NONE && + src_op.mem.disp.value == 0x28 && dst_op.reg.value >= ZYDIS_REGISTER_RAX && + dst_op.reg.value <= ZYDIS_REGISTER_R15; +} + +static void GenerateStackCheck(void* /* address */, const ZydisDecodedOperand* operands, + Xbyak::CodeGenerator& c) { + const auto dst = ZydisToXbyakRegisterOperand(operands[0]); + c.xor_(dst, 0); +} + +static void GenerateStackCanary(void* /* address */, const ZydisDecodedOperand* operands, + Xbyak::CodeGenerator& c) { + const auto dst = ZydisToXbyakRegisterOperand(operands[0]); + c.mov(dst, 0); +} + static bool FilterNoSSE4a(const ZydisDecodedOperand*) { Cpu cpu; return !cpu.has(Cpu::tSSE4a); @@ -440,18 +465,26 @@ struct PatchInfo { bool trampoline; }; -static const std::unordered_map Patches = { +static const std::unordered_map> Patches = { // SSE4a - {ZYDIS_MNEMONIC_EXTRQ, {FilterNoSSE4a, GenerateEXTRQ, true}}, - {ZYDIS_MNEMONIC_INSERTQ, {FilterNoSSE4a, GenerateINSERTQ, true}}, - {ZYDIS_MNEMONIC_MOVNTSS, {FilterNoSSE4a, ReplaceMOVNTSS, false}}, - {ZYDIS_MNEMONIC_MOVNTSD, {FilterNoSSE4a, ReplaceMOVNTSD, false}}, + {ZYDIS_MNEMONIC_EXTRQ, {{FilterNoSSE4a, GenerateEXTRQ, true}}}, + {ZYDIS_MNEMONIC_INSERTQ, {{FilterNoSSE4a, GenerateINSERTQ, true}}}, + {ZYDIS_MNEMONIC_MOVNTSS, {{FilterNoSSE4a, ReplaceMOVNTSS, false}}}, + {ZYDIS_MNEMONIC_MOVNTSD, {{FilterNoSSE4a, ReplaceMOVNTSD, false}}}, +#if !defined(__APPLE__) + // FS segment patches + // These first two patches are for accesses to the stack canary, fs:[0x28] + {ZYDIS_MNEMONIC_XOR, {{FilterStackCheck, GenerateStackCheck, false}}}, + {ZYDIS_MNEMONIC_MOV, + {{FilterStackCheck, GenerateStackCanary, false}, #if defined(_WIN32) - // Windows needs a trampoline. - {ZYDIS_MNEMONIC_MOV, {FilterTcbAccess, GenerateTcbAccess, true}}, -#elif !defined(__APPLE__) - {ZYDIS_MNEMONIC_MOV, {FilterTcbAccess, GenerateTcbAccess, false}}, + // Windows needs a trampoline for Tcb accesses. + {FilterTcbAccess, GenerateTcbAccess, true} +#else + {FilterTcbAccess, GenerateTcbAccess, false} +#endif + }}, #endif }; @@ -503,51 +536,53 @@ static std::pair TryPatch(u8* code, PatchModule* module) { } if (Patches.contains(instruction.mnemonic)) { - const auto& patch_info = Patches.at(instruction.mnemonic); - bool needs_trampoline = patch_info.trampoline; - if (patch_info.filter(operands)) { - auto& patch_gen = module->patch_gen; + const auto& patches = Patches.at(instruction.mnemonic); + for (const auto& patch_info : patches) { + bool needs_trampoline = patch_info.trampoline; + if (patch_info.filter(operands)) { + auto& patch_gen = module->patch_gen; - if (needs_trampoline && instruction.length < 5) { - // Trampoline is needed but instruction is too short to patch. - // Return false and length to signal to AOT compilation that this instruction - // should be skipped and handled at runtime. - return std::make_pair(false, instruction.length); - } + if (needs_trampoline && instruction.length < 5) { + // Trampoline is needed but instruction is too short to patch. + // Return false and length to signal to AOT compilation that this instruction + // should be skipped and handled at runtime. + return std::make_pair(false, instruction.length); + } - // Reset state and move to current code position. - patch_gen.reset(); - patch_gen.setSize(code - patch_gen.getCode()); + // Reset state and move to current code position. + patch_gen.reset(); + patch_gen.setSize(code - patch_gen.getCode()); - if (needs_trampoline) { - auto& trampoline_gen = module->trampoline_gen; - const auto trampoline_ptr = trampoline_gen.getCurr(); + if (needs_trampoline) { + auto& trampoline_gen = module->trampoline_gen; + const auto trampoline_ptr = trampoline_gen.getCurr(); - patch_info.generator(code, operands, trampoline_gen); + patch_info.generator(code, operands, trampoline_gen); - // Return to the following instruction at the end of the trampoline. - trampoline_gen.jmp(code + instruction.length); + // Return to the following instruction at the end of the trampoline. + trampoline_gen.jmp(code + instruction.length); - // Replace instruction with near jump to the trampoline. - patch_gen.jmp(trampoline_ptr, Xbyak::CodeGenerator::LabelType::T_NEAR); - } else { - patch_info.generator(code, operands, patch_gen); - } + // Replace instruction with near jump to the trampoline. + patch_gen.jmp(trampoline_ptr, Xbyak::CodeGenerator::LabelType::T_NEAR); + } else { + patch_info.generator(code, operands, patch_gen); + } - const auto patch_size = patch_gen.getCurr() - code; - if (patch_size > 0) { - ASSERT_MSG(instruction.length >= patch_size, - "Instruction {} with length {} is too short to replace at: {}", - ZydisMnemonicGetString(instruction.mnemonic), instruction.length, - fmt::ptr(code)); + const auto patch_size = patch_gen.getCurr() - code; + if (patch_size > 0) { + ASSERT_MSG(instruction.length >= patch_size, + "Instruction {} with length {} is too short to replace at: {}", + ZydisMnemonicGetString(instruction.mnemonic), instruction.length, + fmt::ptr(code)); - // Fill remaining space with nops. - patch_gen.nop(instruction.length - patch_size); + // Fill remaining space with nops. + patch_gen.nop(instruction.length - patch_size); - module->patched.insert(code); - LOG_DEBUG(Core, "Patched instruction '{}' at: {}", - ZydisMnemonicGetString(instruction.mnemonic), fmt::ptr(code)); - return std::make_pair(true, instruction.length); + module->patched.insert(code); + LOG_DEBUG(Core, "Patched instruction '{}' at: {}", + ZydisMnemonicGetString(instruction.mnemonic), fmt::ptr(code)); + return std::make_pair(true, instruction.length); + } } } } From 2a5910ed5190770b4927b15308ff872045b9d6e3 Mon Sep 17 00:00:00 2001 From: kalaposfos13 <153381648+kalaposfos13@users.noreply.github.com> Date: Sun, 7 Dec 2025 22:56:51 +0100 Subject: [PATCH 7/9] New translations en_us.ts (OpenOrbis) (#3858) --- src/core/libraries/kernel/threads/exception.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/core/libraries/kernel/threads/exception.cpp b/src/core/libraries/kernel/threads/exception.cpp index 5455d425e..95ced79c0 100644 --- a/src/core/libraries/kernel/threads/exception.cpp +++ b/src/core/libraries/kernel/threads/exception.cpp @@ -173,6 +173,8 @@ void RegisterException(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("OMDRKKAZ8I4", "libkernel", 1, "libkernel", sceKernelDebugRaiseException); LIB_FUNCTION("zE-wXIZjLoM", "libkernel", 1, "libkernel", sceKernelDebugRaiseExceptionOnReleaseMode); + LIB_FUNCTION("WkwEd3N7w0Y", "libkernel", 1, "libkernel", sceKernelInstallExceptionHandler); + LIB_FUNCTION("Qhv5ARAoOEc", "libkernel", 1, "libkernel", sceKernelRemoveExceptionHandler); } } // namespace Libraries::Kernel From 65f0b07c34d0d0fe43bbcb25556741bb629b233e Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Mon, 8 Dec 2025 05:46:32 -0600 Subject: [PATCH 8/9] libkernel: Implement sceKernelEnableDmemAliasing, proper mapping type checks in posix_mmap (#3859) * Basic handling for MAP_VOID, MAP_STACK, and MAP_ANON in mmap. * Update memory.cpp * Update memory.cpp * Dmem aliasing check * Oops --- src/core/libraries/kernel/memory.cpp | 45 ++++++++++++++++++++++++---- src/core/memory.h | 3 ++ 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/src/core/libraries/kernel/memory.cpp b/src/core/libraries/kernel/memory.cpp index 92280308d..62903ff72 100644 --- a/src/core/libraries/kernel/memory.cpp +++ b/src/core/libraries/kernel/memory.cpp @@ -5,24 +5,35 @@ #include "common/alignment.h" #include "common/assert.h" +#include "common/elf_info.h" #include "common/logging/log.h" #include "common/scope_exit.h" #include "common/singleton.h" #include "core/libraries/kernel/kernel.h" #include "core/libraries/kernel/memory.h" #include "core/libraries/kernel/orbis_error.h" +#include "core/libraries/kernel/process.h" #include "core/libraries/libs.h" #include "core/linker.h" #include "core/memory.h" namespace Libraries::Kernel { +static s32 g_sdk_version = -1; +static bool g_alias_dmem = false; + u64 PS4_SYSV_ABI sceKernelGetDirectMemorySize() { LOG_TRACE(Kernel_Vmm, "called"); const auto* memory = Core::Memory::Instance(); return memory->GetTotalDirectSize(); } +s32 PS4_SYSV_ABI sceKernelEnableDmemAliasing() { + LOG_DEBUG(Kernel_Vmm, "called"); + g_alias_dmem = true; + return ORBIS_OK; +} + s32 PS4_SYSV_ABI sceKernelAllocateDirectMemory(s64 searchStart, s64 searchEnd, u64 len, u64 alignment, s32 memoryType, s64* physAddrOut) { if (searchStart < 0 || searchEnd < 0) { @@ -197,8 +208,14 @@ s32 PS4_SYSV_ABI sceKernelMapNamedDirectMemory(void** addr, u64 len, s32 prot, s const VAddr in_addr = reinterpret_cast(*addr); auto* memory = Core::Memory::Instance(); - const auto ret = memory->MapMemory(addr, in_addr, len, mem_prot, map_flags, - Core::VMAType::Direct, name, false, phys_addr, alignment); + bool should_check = false; + if (g_sdk_version >= Common::ElfInfo::FW_25 && False(map_flags & Core::MemoryMapFlags::Stack)) { + // Under these conditions, this would normally redirect to sceKernelMapDirectMemory2. + should_check = !g_alias_dmem; + } + const auto ret = + memory->MapMemory(addr, in_addr, len, mem_prot, map_flags, Core::VMAType::Direct, name, + should_check, phys_addr, alignment); LOG_INFO(Kernel_Vmm, "out_addr = {}", fmt::ptr(*addr)); return ret; @@ -244,8 +261,9 @@ s32 PS4_SYSV_ABI sceKernelMapDirectMemory2(void** addr, u64 len, s32 type, s32 p const VAddr in_addr = reinterpret_cast(*addr); auto* memory = Core::Memory::Instance(); - const auto ret = memory->MapMemory(addr, in_addr, len, mem_prot, map_flags, - Core::VMAType::Direct, "anon", true, phys_addr, alignment); + const auto ret = + memory->MapMemory(addr, in_addr, len, mem_prot, map_flags, Core::VMAType::Direct, "anon", + !g_alias_dmem, phys_addr, alignment); if (ret == 0) { // If the map call succeeds, set the direct memory type using the output address. @@ -668,10 +686,21 @@ void* PS4_SYSV_ABI posix_mmap(void* addr, u64 len, s32 prot, s32 flags, s32 fd, } s32 result = ORBIS_OK; - if (fd == -1) { + if (True(mem_flags & Core::MemoryMapFlags::Anon)) { + // Maps flexible memory result = memory->MapMemory(&addr_out, aligned_addr, aligned_size, mem_prot, mem_flags, Core::VMAType::Flexible, "anon", false); + } else if (True(mem_flags & Core::MemoryMapFlags::Stack)) { + // Maps stack memory + result = memory->MapMemory(&addr_out, aligned_addr, aligned_size, mem_prot, mem_flags, + Core::VMAType::Stack, "anon", false); + } else if (True(mem_flags & Core::MemoryMapFlags::Void)) { + // Reserves memory + result = + memory->MapMemory(&addr_out, aligned_addr, aligned_size, Core::MemoryProt::NoAccess, + mem_flags, Core::VMAType::Reserved, "anon", false); } else { + // Default to file mapping result = memory->MapFile(&addr_out, aligned_addr, aligned_size, mem_prot, mem_flags, fd, phys_addr); } @@ -769,6 +798,12 @@ s32 PS4_SYSV_ABI sceKernelGetPrtAperture(s32 id, VAddr* address, u64* size) { } void RegisterMemory(Core::Loader::SymbolsResolver* sym) { + ASSERT_MSG(sceKernelGetCompiledSdkVersion(&g_sdk_version) == ORBIS_OK, + "Failed to get compiled SDK verision."); + + LIB_FUNCTION("usHTMoFoBTM", "libkernel_dmem_aliasing2", 1, "libkernel", + sceKernelEnableDmemAliasing); + LIB_FUNCTION("usHTMoFoBTM", "libkernel", 1, "libkernel", sceKernelEnableDmemAliasing); LIB_FUNCTION("rTXw65xmLIA", "libkernel", 1, "libkernel", sceKernelAllocateDirectMemory); LIB_FUNCTION("B+vc2AO2Zrc", "libkernel", 1, "libkernel", sceKernelAllocateMainDirectMemory); LIB_FUNCTION("C0f7TJcbfac", "libkernel", 1, "libkernel", sceKernelAvailableDirectMemorySize); diff --git a/src/core/memory.h b/src/core/memory.h index db988c305..7ebf9d34c 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -45,7 +45,10 @@ enum class MemoryMapFlags : u32 { Private = 2, Fixed = 0x10, NoOverwrite = 0x80, + Void = 0x100, + Stack = 0x400, NoSync = 0x800, + Anon = 0x1000, NoCore = 0x20000, NoCoalesce = 0x400000, }; From de6c5bbb836f05560a3639810f4ea8e9ef044c3d Mon Sep 17 00:00:00 2001 From: AlpinDale <52078762+AlpinDale@users.noreply.github.com> Date: Tue, 9 Dec 2025 00:32:54 +0430 Subject: [PATCH 9/9] cli: add `--show-fps` to the CLI launcher (#3860) * cli: add `--show-fps` to the CLI launcher * fix: clang-format * nit: PascalCase -> camelCase --- src/common/config.cpp | 12 ++++++++++++ src/common/config.h | 2 ++ src/core/devtools/layer.cpp | 6 ++++++ src/core/devtools/layer.h | 1 + src/main.cpp | 7 +++++-- 5 files changed, 26 insertions(+), 2 deletions(-) diff --git a/src/common/config.cpp b/src/common/config.cpp index 94d8b488c..1af326af7 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -199,6 +199,7 @@ static ConfigEntry isDebugDump(false); static ConfigEntry isShaderDebug(false); static ConfigEntry isSeparateLogFilesEnabled(false); static ConfigEntry isFpsColor(true); +static ConfigEntry showFpsCounter(false); static ConfigEntry logEnabled(true); // GUI @@ -466,6 +467,14 @@ bool fpsColor() { return isFpsColor.get(); } +bool getShowFpsCounter() { + return showFpsCounter.get(); +} + +void setShowFpsCounter(bool enable, bool is_game_specific) { + showFpsCounter.set(enable, is_game_specific); +} + bool isLoggingEnabled() { return logEnabled.get(); } @@ -969,6 +978,7 @@ void load(const std::filesystem::path& path, bool is_game_specific) { isSeparateLogFilesEnabled.setFromToml(debug, "isSeparateLogFilesEnabled", is_game_specific); isShaderDebug.setFromToml(debug, "CollectShader", is_game_specific); isFpsColor.setFromToml(debug, "FPSColor", is_game_specific); + showFpsCounter.setFromToml(debug, "showFpsCounter", is_game_specific); logEnabled.setFromToml(debug, "logEnabled", is_game_specific); current_version = toml::find_or(debug, "ConfigVersion", current_version); } @@ -1188,6 +1198,7 @@ void save(const std::filesystem::path& path, bool is_game_specific) { data["GPU"]["internalScreenHeight"] = internalScreenHeight.base_value; data["GPU"]["patchShaders"] = shouldPatchShaders.base_value; data["Debug"]["FPSColor"] = isFpsColor.base_value; + data["Debug"]["showFpsCounter"] = showFpsCounter.base_value; } // Sorting of TOML sections @@ -1296,6 +1307,7 @@ void setDefaultValues(bool is_game_specific) { // Debug isFpsColor.base_value = true; + showFpsCounter.base_value = false; } } diff --git a/src/common/config.h b/src/common/config.h index 481ef6444..2bd65b783 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -126,6 +126,8 @@ bool getPSNSignedIn(); void setPSNSignedIn(bool sign, bool is_game_specific = false); bool patchShaders(); // no set bool fpsColor(); // no set +bool getShowFpsCounter(); +void setShowFpsCounter(bool enable, bool is_game_specific = false); bool isNeoModeConsole(); void setNeoMode(bool enable, bool is_game_specific = false); bool isDevKitConsole(); diff --git a/src/core/devtools/layer.cpp b/src/core/devtools/layer.cpp index 1fb810030..cfa950568 100644 --- a/src/core/devtools/layer.cpp +++ b/src/core/devtools/layer.cpp @@ -311,6 +311,7 @@ static void LoadSettings(const char* line) { void L::SetupSettings() { frame_graph.is_open = true; + show_simple_fps = Config::getShowFpsCounter(); using SettingLoader = void (*)(const char*); @@ -475,6 +476,11 @@ void ToggleSimpleFps() { visibility_toggled = true; } +void SetSimpleFps(bool enabled) { + show_simple_fps = enabled; + visibility_toggled = true; +} + void ToggleQuitWindow() { show_quit_window = !show_quit_window; } diff --git a/src/core/devtools/layer.h b/src/core/devtools/layer.h index 44afc95bc..96b48a7f0 100644 --- a/src/core/devtools/layer.h +++ b/src/core/devtools/layer.h @@ -30,6 +30,7 @@ private: namespace Overlay { void ToggleSimpleFps(); +void SetSimpleFps(bool enabled); void ToggleQuitWindow(); } // namespace Overlay diff --git a/src/main.cpp b/src/main.cpp index f1e5ce932..b3a8586ba 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -67,6 +67,7 @@ int main(int argc, char* argv[]) { "values, ignores the config file(s) entirely.\n" " --config-global Run the emulator with the base config file " "only, ignores game specific configs.\n" + " --show-fps Enable FPS counter display at startup\n" " -h, --help Display this help message\n"; exit(0); }}, @@ -174,13 +175,15 @@ int main(int argc, char* argv[]) { game_folder = folder; }}, {"--wait-for-debugger", [&](int& i) { waitForDebugger = true; }}, - {"--wait-for-pid", [&](int& i) { + {"--wait-for-pid", + [&](int& i) { if (++i >= argc) { std::cerr << "Error: Missing argument for --wait-for-pid\n"; exit(1); } waitPid = std::stoi(argv[i]); - }}}; + }}, + {"--show-fps", [&](int& i) { Config::setShowFpsCounter(true); }}}; if (argc == 1) { if (!SDL_ShowSimpleMessageBox(