From f86ef649729ba78ac133aada453239aa9901fe6c Mon Sep 17 00:00:00 2001 From: raphaelthegreat <47210458+raphaelthegreat@users.noreply.github.com> Date: Wed, 5 Jun 2024 19:34:21 +0300 Subject: [PATCH] linker: Properly implement thread local storage --- src/core/libraries/kernel/libkernel.cpp | 3 + .../libraries/kernel/memory_management.cpp | 6 ++ src/core/libraries/kernel/memory_management.h | 1 + .../libraries/kernel/thread_management.cpp | 15 ++++ src/core/linker.cpp | 68 ++++++++++++++++++- src/core/linker.h | 10 +++ src/core/tls.cpp | 10 +-- src/core/tls.h | 18 ++++- 8 files changed, 124 insertions(+), 7 deletions(-) diff --git a/src/core/libraries/kernel/libkernel.cpp b/src/core/libraries/kernel/libkernel.cpp index a8c3975e2..3fc70d6bf 100644 --- a/src/core/libraries/kernel/libkernel.cpp +++ b/src/core/libraries/kernel/libkernel.cpp @@ -212,6 +212,9 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("cQke9UuBQOk", "libkernel", 1, "libkernel", 1, 1, sceKernelMunmap); LIB_FUNCTION("mL8NDH86iQI", "libkernel", 1, "libkernel", 1, 1, sceKernelMapNamedFlexibleMemory); LIB_FUNCTION("IWIBBdTHit4", "libkernel", 1, "libkernel", 1, 1, sceKernelMapFlexibleMemory); + LIB_FUNCTION("p5EcQeEeJAE", "libkernel", 1, "libkernel", 1, 1, + _sceKernelRtldSetApplicationHeapAPI); + // equeue LIB_FUNCTION("D0OdFMjp46I", "libkernel", 1, "libkernel", 1, 1, sceKernelCreateEqueue); LIB_FUNCTION("jpFjmgAC5AE", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteEqueue); diff --git a/src/core/libraries/kernel/memory_management.cpp b/src/core/libraries/kernel/memory_management.cpp index 5c39f525f..88525241d 100644 --- a/src/core/libraries/kernel/memory_management.cpp +++ b/src/core/libraries/kernel/memory_management.cpp @@ -7,6 +7,7 @@ #include "common/singleton.h" #include "core/libraries/error_codes.h" #include "core/libraries/kernel/memory_management.h" +#include "core/linker.h" #include "core/memory.h" namespace Libraries::Kernel { @@ -127,4 +128,9 @@ int PS4_SYSV_ABI sceKernelDirectMemoryQuery(u64 offset, int flags, OrbisQueryInf return memory->DirectMemoryQuery(offset, flags == 1, query_info); } +void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func) { + auto* linker = Common::Singleton::Instance(); + linker->SetHeapApiFunc(func); +} + } // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/memory_management.h b/src/core/libraries/kernel/memory_management.h index be0d8514b..9433c1aaa 100644 --- a/src/core/libraries/kernel/memory_management.h +++ b/src/core/libraries/kernel/memory_management.h @@ -49,5 +49,6 @@ int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** int PS4_SYSV_ABI sceKernelDirectMemoryQuery(u64 offset, int flags, OrbisQueryInfo* query_info, size_t infoSize); +void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func); } // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/thread_management.cpp b/src/core/libraries/kernel/thread_management.cpp index 32f179a59..aa51c6359 100644 --- a/src/core/libraries/kernel/thread_management.cpp +++ b/src/core/libraries/kernel/thread_management.cpp @@ -5,10 +5,12 @@ #include #include "common/assert.h" #include "common/logging/log.h" +#include "common/singleton.h" #include "common/thread.h" #include "core/libraries/error_codes.h" #include "core/libraries/kernel/thread_management.h" #include "core/libraries/libs.h" +#include "core/linker.h" #ifdef _WIN64 #include #endif @@ -829,6 +831,8 @@ static void cleanup_thread(void* arg) { static void* run_thread(void* arg) { auto* thread = static_cast(arg); Common::SetCurrentThreadName(thread->name.c_str()); + auto* linker = Common::Singleton::Instance(); + linker->InitTlsForThread(false); void* ret = nullptr; g_pthread_self = thread; pthread_cleanup_push(cleanup_thread, thread); @@ -1022,6 +1026,16 @@ int PS4_SYSV_ABI scePthreadEqual(ScePthread thread1, ScePthread thread2) { return (thread1 == thread2 ? 1 : 0); } +struct TlsIndex { + u64 ti_module; + u64 ti_offset; +}; + +void* PS4_SYSV_ABI __tls_get_addr(TlsIndex* index) { + auto* linker = Common::Singleton::Instance(); + return linker->TlsGetAddr(index->ti_module, index->ti_offset); +} + void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("4+h9EzwKF4I", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetschedpolicy); LIB_FUNCTION("-Wreprtu0Qs", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetdetachstate); @@ -1038,6 +1052,7 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("8+s5BzZjxSg", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGetaffinity); LIB_FUNCTION("x1X76arYMxU", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGet); LIB_FUNCTION("UTXzJbWhhTE", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetstacksize); + LIB_FUNCTION("vNe1w4diLCs", "libkernel", 1, "libkernel", 1, 1, __tls_get_addr); LIB_FUNCTION("bt3CTBKmGyI", "libkernel", 1, "libkernel", 1, 1, scePthreadSetaffinity); LIB_FUNCTION("6UgtwV+0zb4", "libkernel", 1, "libkernel", 1, 1, scePthreadCreate); diff --git a/src/core/linker.cpp b/src/core/linker.cpp index ed953bb0b..8cae916d8 100644 --- a/src/core/linker.cpp +++ b/src/core/linker.cpp @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include +#include "common/alignment.h" #include "common/assert.h" #include "common/config.h" #include "common/logging/log.h" @@ -10,8 +10,10 @@ #include "common/thread.h" #include "core/aerolib/aerolib.h" #include "core/aerolib/stubs.h" +#include "core/libraries/kernel/memory_management.h" #include "core/libraries/kernel/thread_management.h" #include "core/linker.h" +#include "core/tls.h" #include "core/virtual_memory.h" namespace Core { @@ -70,6 +72,7 @@ void Linker::Execute() { // Init primary thread. Common::SetCurrentThreadName("GAME_MainThread"); Libraries::Kernel::pthreadInitSelfMainThread(); + InitTlsForThread(true); // Start shared library modules for (auto& m : m_modules) { @@ -248,6 +251,69 @@ void Linker::Resolve(const std::string& name, Loader::SymbolType sym_type, Modul return_info->name, library->name, module->name); } +void* Linker::TlsGetAddr(u64 module_index, u64 offset) { + DtvEntry* dtv_table = GetTcbBase()->tcb_dtv; + ASSERT_MSG(dtv_table[0].counter == dtv_generation_counter, + "Reallocation of DTV table is not supported"); + + void* module = (u8*)dtv_table[module_index + 1].pointer + offset; + ASSERT_MSG(module, "DTV allocation is not supported"); + return module; +} + +void Linker::InitTlsForThread(bool is_primary) { + static constexpr size_t TcbSize = 0x40; + static constexpr size_t TlsAllocAlign = 0x20; + const size_t total_tls_size = Common::AlignUp(static_tls_size, TlsAllocAlign) + TcbSize; + + // The kernel module has a few different paths for TLS allocation. + // For SDK < 1.7 it allocates both main and secondary thread blocks using libc mspace/malloc. + // In games compiled with newer SDK, the main thread gets mapped from flexible memory, + // with addr = 0, so system managed area. Here we will only implement the latter. + void* addr_out{}; + if (is_primary) { + const size_t tls_aligned = Common::AlignUp(total_tls_size, 16_KB); + const int ret = Libraries::Kernel::sceKernelMapNamedFlexibleMemory( + &addr_out, tls_aligned, 3, 0, "SceKernelPrimaryTcbTls"); + ASSERT_MSG(ret == 0, "Unable to allocate TLS+TCB for the primary thread"); + } else { + if (heap_api_func) { + addr_out = heap_api_func(total_tls_size); + } else { + addr_out = std::malloc(total_tls_size); + } + } + + // Initialize allocated memory and allocate DTV table. + const u32 num_dtvs = max_tls_index; + std::memset(addr_out, 0, total_tls_size); + DtvEntry* dtv_table = new DtvEntry[num_dtvs + 2]; + + // Initialize thread control block + u8* addr = reinterpret_cast(addr_out); + Tcb* tcb = reinterpret_cast(addr + static_tls_size); + tcb->tcb_self = tcb; + tcb->tcb_dtv = dtv_table; + + // Dtv[0] is the generation counter. libkernel puts their number into dtv[1] (why?) + dtv_table[0].counter = dtv_generation_counter; + dtv_table[1].counter = num_dtvs; + + // Copy init images to TLS thread blocks and map them to DTV slots. + for (const auto& module : m_modules) { + if (module->tls.image_size == 0) { + continue; + } + u8* dest = reinterpret_cast(addr + static_tls_size - module->tls.offset); + const u8* src = reinterpret_cast(module->tls.image_virtual_addr); + std::memcpy(dest, src, module->tls.init_image_size); + tcb->tcb_dtv[module->tls.modid + 1].pointer = dest; + } + + // Set pointer to FS base + SetTcbBase(tcb); +} + void Linker::DebugDump() { const auto& log_dir = Common::FS::GetUserPath(Common::FS::PathType::LogDir); const std::filesystem::path debug(log_dir / "debugdump"); diff --git a/src/core/linker.h b/src/core/linker.h index 3c2cb9876..f5414ec6b 100644 --- a/src/core/linker.h +++ b/src/core/linker.h @@ -18,6 +18,8 @@ struct EntryParams { const char* argv[3]; }; +using HeapApiFunc = PS4_SYSV_ABI void* (*)(size_t); + class Linker { public: explicit Linker(); @@ -35,6 +37,13 @@ public: return m_modules.at(index).get(); } + void SetHeapApiFunc(void* func) { + heap_api_func = *reinterpret_cast(func); + } + + void* TlsGetAddr(u64 module_index, u64 offset); + void InitTlsForThread(bool is_primary = false); + s32 LoadModule(const std::filesystem::path& elf_name); void Relocate(Module* module); @@ -50,6 +59,7 @@ private: u32 dtv_generation_counter{1}; size_t static_tls_size{}; size_t max_tls_index{}; + HeapApiFunc heap_api_func{}; std::vector> m_modules; Loader::SymbolsResolver m_hle_symbols{}; }; diff --git a/src/core/tls.cpp b/src/core/tls.cpp index b945baefb..57bce1d9b 100644 --- a/src/core/tls.cpp +++ b/src/core/tls.cpp @@ -44,13 +44,15 @@ constexpr static TLSPattern TlsPatterns[] = { #ifdef _WIN32 static DWORD slot = 0; -void SetTLSStorage(u64 image_address) { - // Guest apps will use both positive and negative offsets to the TLS pointer. - // User data at probably in negative offsets, while pthread data at positive offset. - const BOOL result = TlsSetValue(slot, reinterpret_cast(image_address)); +void SetTcbBase(void* image_address) { + const BOOL result = TlsSetValue(slot, image_address); ASSERT(result != 0); } +Tcb* GetTcbBase() { + return reinterpret_cast(TlsGetValue(slot)); +} + void PatchTLS(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c) { using namespace Xbyak::util; diff --git a/src/core/tls.h b/src/core/tls.h index e9825bf6a..3f1544208 100644 --- a/src/core/tls.h +++ b/src/core/tls.h @@ -11,8 +11,22 @@ class CodeGenerator; namespace Core { -/// Sets the data pointer that contains the TLS image. -void SetTLSStorage(u64 image_address); +union DtvEntry { + size_t counter; + void* pointer; +}; + +struct Tcb { + Tcb* tcb_self; + DtvEntry* tcb_dtv; + void* tcb_thread; +}; + +/// Sets the data pointer to the TCB block. +void SetTcbBase(void* image_address); + +/// Retrieves Tcb structure for the calling thread. +Tcb* GetTcbBase(); /// Patches any instructions that access guest TLS to use provided storage. void PatchTLS(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c);