From fe3051f6b35d458acba5f2beda30c318d8e2786a Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sun, 11 May 2025 14:31:52 -0700 Subject: [PATCH] core: Implement Linux TCB access using guest<->host swapping. --- src/core/cpu_patches.cpp | 33 +++++---------------------------- src/core/cpu_patches.h | 3 --- src/core/module.cpp | 5 ----- src/core/tls.cpp | 17 +++++++++++++++++ src/core/tls.h | 26 ++++++++++++++++++++++++-- 5 files changed, 46 insertions(+), 38 deletions(-) diff --git a/src/core/cpu_patches.cpp b/src/core/cpu_patches.cpp index 8937ef04b..64b1b2b94 100644 --- a/src/core/cpu_patches.cpp +++ b/src/core/cpu_patches.cpp @@ -402,10 +402,7 @@ static const std::unordered_map Patches = { {ZYDIS_MNEMONIC_INSERTQ, {FilterNoSSE4a, GenerateINSERTQ, true}}, #if defined(_WIN32) - // Windows needs a trampoline. {ZYDIS_MNEMONIC_MOV, {FilterTcbAccess, GenerateTcbAccess, true}}, -#elif !defined(__APPLE__) - {ZYDIS_MNEMONIC_MOV, {FilterTcbAccess, GenerateTcbAccess, false}}, #endif }; @@ -674,24 +671,12 @@ static bool TryPatchJit(void* code_address) { return TryPatch(code, module).first; } -static void TryPatchAot(void* code_address, u64 code_size) { - auto* code = static_cast(code_address); - auto* module = GetModule(code); - if (module == nullptr) { - return; - } - - std::unique_lock lock{module->mutex}; - - const auto* end = code + code_size; - while (code < end) { - code += TryPatch(code, module).second; - } -} - +#ifdef _WIN32 +// Used for patching TCB accesses on Windows. static bool PatchesAccessViolationHandler(void* context, void* /* fault_address */) { return TryPatchJit(Common::GetRip(context)); } +#endif static bool PatchesIllegalInstructionHandler(void* context) { void* code_address = Common::GetRip(context); @@ -719,7 +704,9 @@ static void PatchesInit() { auto* signals = Signals::Instance(); // Should be called last. constexpr auto priority = std::numeric_limits::max(); +#ifdef _WIN32 signals->RegisterAccessViolationHandler(PatchesAccessViolationHandler, priority); +#endif signals->RegisterIllegalInstructionHandler(PatchesIllegalInstructionHandler, priority); } } @@ -735,14 +722,4 @@ void RegisterPatchModule(void* module_ptr, u64 module_size, void* trampoline_are trampoline_area_size)); } -void PrePatchInstructions(u64 segment_addr, u64 segment_size) { -#if !defined(_WIN32) && !defined(__APPLE__) - // Linux and others have an FS segment pointing to valid memory, so continue to do full - // ahead-of-time patching for now until a better solution is worked out. - if (!Patches.empty()) { - TryPatchAot(reinterpret_cast(segment_addr), segment_size); - } -#endif -} - } // namespace Core diff --git a/src/core/cpu_patches.h b/src/core/cpu_patches.h index 7a0546046..fd1dec732 100644 --- a/src/core/cpu_patches.h +++ b/src/core/cpu_patches.h @@ -11,7 +11,4 @@ namespace Core { void RegisterPatchModule(void* module_ptr, u64 module_size, void* trampoline_area_ptr, u64 trampoline_area_size); -/// Applies CPU patches that need to be done before beginning executions. -void PrePatchInstructions(u64 segment_addr, u64 segment_size); - } // namespace Core diff --git a/src/core/module.cpp b/src/core/module.cpp index f31bbed6c..e86010687 100644 --- a/src/core/module.cpp +++ b/src/core/module.cpp @@ -163,11 +163,6 @@ void Module::LoadModuleToMemory(u32& max_tls_index) { LOG_INFO(Core_Linker, "segment_mode ..........: {}", segment_mode); add_segment(elf_pheader[i]); -#ifdef ARCH_X86_64 - if (elf_pheader[i].p_flags & PF_EXEC) { - PrePatchInstructions(segment_addr, segment_file_size); - } -#endif break; } case PT_DYNAMIC: diff --git a/src/core/tls.cpp b/src/core/tls.cpp index e13c683e1..4e2d11a50 100644 --- a/src/core/tls.cpp +++ b/src/core/tls.cpp @@ -46,6 +46,8 @@ Tcb* GetTcbBase() { return reinterpret_cast(TlsGetValue(GetTcbKey())); } +void SwapTcb() {} + #elif defined(__APPLE__) && defined(ARCH_X86_64) // Apple x86_64 @@ -151,6 +153,8 @@ Tcb* GetTcbBase() { return tcb; } +void SwapTcb() {} + #elif defined(ARCH_X86_64) // Other POSIX x86_64 @@ -165,6 +169,17 @@ Tcb* GetTcbBase() { return tcb; } +void SwapTcb() { + void* fs; + void* gs; + asm volatile("rdfsbase %0" : "=r"(fs)::"memory"); + asm volatile("rdgsbase %0" : "=r"(gs)::"memory"); + + // Swap FS and GS + asm volatile("wrfsbase %0" ::"r"(gs) : "memory"); + asm volatile("wrgsbase %0" ::"r"(fs) : "memory"); +} + #else // POSIX non-x86_64 @@ -191,6 +206,8 @@ Tcb* GetTcbBase() { return static_cast(pthread_getspecific(GetTcbKey())); } +void SwapTcb() {} + #endif thread_local std::once_flag init_tls_flag; diff --git a/src/core/tls.h b/src/core/tls.h index d1d490465..2481fe3b9 100644 --- a/src/core/tls.h +++ b/src/core/tls.h @@ -40,6 +40,9 @@ void SetTcbBase(void* image_address); /// Retrieves Tcb structure for the calling thread. Tcb* GetTcbBase(); +/// Swaps the TCB in or out of the fs register, if required by the platform. +void SwapTcb(); + /// Makes sure TLS is initialized for the thread before entering guest. void EnsureThreadInitialized(); @@ -55,7 +58,17 @@ ReturnType ExecuteGuest(PS4_SYSV_ABI ReturnType (*func)(FuncArgs...), CallArgs&& EnsureThreadInitialized(); // clear stack to avoid trash from EnsureThreadInitialized ClearStack<13_KB>(); - return func(std::forward(args)...); + + if constexpr (std::is_same_v) { + SwapTcb(); + func(std::forward(args)...); + SwapTcb(); + } else { + SwapTcb(); + auto ret = func(std::forward(args)...); + SwapTcb(); + return ret; + } } template @@ -64,7 +77,16 @@ struct HostCallWrapperImpl; template struct HostCallWrapperImpl { static ReturnType PS4_SYSV_ABI wrap(Args... args) { - return func(args...); + if constexpr (std::is_same_v) { + SwapTcb(); + func(args...); + SwapTcb(); + } else { + SwapTcb(); + auto ret = func(args...); + SwapTcb(); + return ret; + } } };