linker: Properly implement thread local storage

This commit is contained in:
raphaelthegreat 2024-06-05 19:34:21 +03:00
parent 40b68acfb1
commit f86ef64972
8 changed files with 124 additions and 7 deletions

View File

@ -212,6 +212,9 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("cQke9UuBQOk", "libkernel", 1, "libkernel", 1, 1, sceKernelMunmap);
LIB_FUNCTION("mL8NDH86iQI", "libkernel", 1, "libkernel", 1, 1, sceKernelMapNamedFlexibleMemory);
LIB_FUNCTION("IWIBBdTHit4", "libkernel", 1, "libkernel", 1, 1, sceKernelMapFlexibleMemory);
LIB_FUNCTION("p5EcQeEeJAE", "libkernel", 1, "libkernel", 1, 1,
_sceKernelRtldSetApplicationHeapAPI);
// equeue
LIB_FUNCTION("D0OdFMjp46I", "libkernel", 1, "libkernel", 1, 1, sceKernelCreateEqueue);
LIB_FUNCTION("jpFjmgAC5AE", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteEqueue);

View File

@ -7,6 +7,7 @@
#include "common/singleton.h"
#include "core/libraries/error_codes.h"
#include "core/libraries/kernel/memory_management.h"
#include "core/linker.h"
#include "core/memory.h"
namespace Libraries::Kernel {
@ -127,4 +128,9 @@ int PS4_SYSV_ABI sceKernelDirectMemoryQuery(u64 offset, int flags, OrbisQueryInf
return memory->DirectMemoryQuery(offset, flags == 1, query_info);
}
void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func) {
auto* linker = Common::Singleton<Core::Linker>::Instance();
linker->SetHeapApiFunc(func);
}
} // namespace Libraries::Kernel

View File

@ -49,5 +49,6 @@ int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void**
int PS4_SYSV_ABI sceKernelDirectMemoryQuery(u64 offset, int flags, OrbisQueryInfo* query_info,
size_t infoSize);
void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func);
} // namespace Libraries::Kernel

View File

@ -5,10 +5,12 @@
#include <thread>
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/singleton.h"
#include "common/thread.h"
#include "core/libraries/error_codes.h"
#include "core/libraries/kernel/thread_management.h"
#include "core/libraries/libs.h"
#include "core/linker.h"
#ifdef _WIN64
#include <windows.h>
#endif
@ -829,6 +831,8 @@ static void cleanup_thread(void* arg) {
static void* run_thread(void* arg) {
auto* thread = static_cast<ScePthread>(arg);
Common::SetCurrentThreadName(thread->name.c_str());
auto* linker = Common::Singleton<Core::Linker>::Instance();
linker->InitTlsForThread(false);
void* ret = nullptr;
g_pthread_self = thread;
pthread_cleanup_push(cleanup_thread, thread);
@ -1022,6 +1026,16 @@ int PS4_SYSV_ABI scePthreadEqual(ScePthread thread1, ScePthread thread2) {
return (thread1 == thread2 ? 1 : 0);
}
struct TlsIndex {
u64 ti_module;
u64 ti_offset;
};
void* PS4_SYSV_ABI __tls_get_addr(TlsIndex* index) {
auto* linker = Common::Singleton<Core::Linker>::Instance();
return linker->TlsGetAddr(index->ti_module, index->ti_offset);
}
void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("4+h9EzwKF4I", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetschedpolicy);
LIB_FUNCTION("-Wreprtu0Qs", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetdetachstate);
@ -1038,6 +1052,7 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("8+s5BzZjxSg", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGetaffinity);
LIB_FUNCTION("x1X76arYMxU", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrGet);
LIB_FUNCTION("UTXzJbWhhTE", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetstacksize);
LIB_FUNCTION("vNe1w4diLCs", "libkernel", 1, "libkernel", 1, 1, __tls_get_addr);
LIB_FUNCTION("bt3CTBKmGyI", "libkernel", 1, "libkernel", 1, 1, scePthreadSetaffinity);
LIB_FUNCTION("6UgtwV+0zb4", "libkernel", 1, "libkernel", 1, 1, scePthreadCreate);

View File

@ -1,7 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <windows.h>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/config.h"
#include "common/logging/log.h"
@ -10,8 +10,10 @@
#include "common/thread.h"
#include "core/aerolib/aerolib.h"
#include "core/aerolib/stubs.h"
#include "core/libraries/kernel/memory_management.h"
#include "core/libraries/kernel/thread_management.h"
#include "core/linker.h"
#include "core/tls.h"
#include "core/virtual_memory.h"
namespace Core {
@ -70,6 +72,7 @@ void Linker::Execute() {
// Init primary thread.
Common::SetCurrentThreadName("GAME_MainThread");
Libraries::Kernel::pthreadInitSelfMainThread();
InitTlsForThread(true);
// Start shared library modules
for (auto& m : m_modules) {
@ -248,6 +251,69 @@ void Linker::Resolve(const std::string& name, Loader::SymbolType sym_type, Modul
return_info->name, library->name, module->name);
}
void* Linker::TlsGetAddr(u64 module_index, u64 offset) {
DtvEntry* dtv_table = GetTcbBase()->tcb_dtv;
ASSERT_MSG(dtv_table[0].counter == dtv_generation_counter,
"Reallocation of DTV table is not supported");
void* module = (u8*)dtv_table[module_index + 1].pointer + offset;
ASSERT_MSG(module, "DTV allocation is not supported");
return module;
}
void Linker::InitTlsForThread(bool is_primary) {
static constexpr size_t TcbSize = 0x40;
static constexpr size_t TlsAllocAlign = 0x20;
const size_t total_tls_size = Common::AlignUp(static_tls_size, TlsAllocAlign) + TcbSize;
// The kernel module has a few different paths for TLS allocation.
// For SDK < 1.7 it allocates both main and secondary thread blocks using libc mspace/malloc.
// In games compiled with newer SDK, the main thread gets mapped from flexible memory,
// with addr = 0, so system managed area. Here we will only implement the latter.
void* addr_out{};
if (is_primary) {
const size_t tls_aligned = Common::AlignUp(total_tls_size, 16_KB);
const int ret = Libraries::Kernel::sceKernelMapNamedFlexibleMemory(
&addr_out, tls_aligned, 3, 0, "SceKernelPrimaryTcbTls");
ASSERT_MSG(ret == 0, "Unable to allocate TLS+TCB for the primary thread");
} else {
if (heap_api_func) {
addr_out = heap_api_func(total_tls_size);
} else {
addr_out = std::malloc(total_tls_size);
}
}
// Initialize allocated memory and allocate DTV table.
const u32 num_dtvs = max_tls_index;
std::memset(addr_out, 0, total_tls_size);
DtvEntry* dtv_table = new DtvEntry[num_dtvs + 2];
// Initialize thread control block
u8* addr = reinterpret_cast<u8*>(addr_out);
Tcb* tcb = reinterpret_cast<Tcb*>(addr + static_tls_size);
tcb->tcb_self = tcb;
tcb->tcb_dtv = dtv_table;
// Dtv[0] is the generation counter. libkernel puts their number into dtv[1] (why?)
dtv_table[0].counter = dtv_generation_counter;
dtv_table[1].counter = num_dtvs;
// Copy init images to TLS thread blocks and map them to DTV slots.
for (const auto& module : m_modules) {
if (module->tls.image_size == 0) {
continue;
}
u8* dest = reinterpret_cast<u8*>(addr + static_tls_size - module->tls.offset);
const u8* src = reinterpret_cast<const u8*>(module->tls.image_virtual_addr);
std::memcpy(dest, src, module->tls.init_image_size);
tcb->tcb_dtv[module->tls.modid + 1].pointer = dest;
}
// Set pointer to FS base
SetTcbBase(tcb);
}
void Linker::DebugDump() {
const auto& log_dir = Common::FS::GetUserPath(Common::FS::PathType::LogDir);
const std::filesystem::path debug(log_dir / "debugdump");

View File

@ -18,6 +18,8 @@ struct EntryParams {
const char* argv[3];
};
using HeapApiFunc = PS4_SYSV_ABI void* (*)(size_t);
class Linker {
public:
explicit Linker();
@ -35,6 +37,13 @@ public:
return m_modules.at(index).get();
}
void SetHeapApiFunc(void* func) {
heap_api_func = *reinterpret_cast<HeapApiFunc*>(func);
}
void* TlsGetAddr(u64 module_index, u64 offset);
void InitTlsForThread(bool is_primary = false);
s32 LoadModule(const std::filesystem::path& elf_name);
void Relocate(Module* module);
@ -50,6 +59,7 @@ private:
u32 dtv_generation_counter{1};
size_t static_tls_size{};
size_t max_tls_index{};
HeapApiFunc heap_api_func{};
std::vector<std::unique_ptr<Module>> m_modules;
Loader::SymbolsResolver m_hle_symbols{};
};

View File

@ -44,13 +44,15 @@ constexpr static TLSPattern TlsPatterns[] = {
#ifdef _WIN32
static DWORD slot = 0;
void SetTLSStorage(u64 image_address) {
// Guest apps will use both positive and negative offsets to the TLS pointer.
// User data at probably in negative offsets, while pthread data at positive offset.
const BOOL result = TlsSetValue(slot, reinterpret_cast<LPVOID>(image_address));
void SetTcbBase(void* image_address) {
const BOOL result = TlsSetValue(slot, image_address);
ASSERT(result != 0);
}
Tcb* GetTcbBase() {
return reinterpret_cast<Tcb*>(TlsGetValue(slot));
}
void PatchTLS(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c) {
using namespace Xbyak::util;

View File

@ -11,8 +11,22 @@ class CodeGenerator;
namespace Core {
/// Sets the data pointer that contains the TLS image.
void SetTLSStorage(u64 image_address);
union DtvEntry {
size_t counter;
void* pointer;
};
struct Tcb {
Tcb* tcb_self;
DtvEntry* tcb_dtv;
void* tcb_thread;
};
/// Sets the data pointer to the TCB block.
void SetTcbBase(void* image_address);
/// Retrieves Tcb structure for the calling thread.
Tcb* GetTcbBase();
/// Patches any instructions that access guest TLS to use provided storage.
void PatchTLS(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c);