tls: Implement for linux

This commit is contained in:
IndecisiveTurtle 2024-06-13 21:50:08 +03:00
parent e62690759d
commit 77b7f66ee2
9 changed files with 88 additions and 63 deletions

View File

@ -1,10 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <sstream>
#include <unordered_map> #include <unordered_map>
#include "common/logging/log.h" #include "common/logging/log.h"
#include "common/path_util.h" #include "common/path_util.h"

View File

@ -1,11 +1,10 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <fstream>
#include <zlib-ng.h> #include <zlib-ng.h>
#include "common/io_file.h" #include "common/io_file.h"
#include "pkg.h" #include "core/file_format/pkg.h"
#include "pkg_type.h" #include "core/file_format/pkg_type.h"
static void DecompressPFSC(std::span<const char> compressed_data, static void DecompressPFSC(std::span<const char> compressed_data,
std::span<char> decompressed_data) { std::span<char> decompressed_data) {

View File

@ -4,7 +4,6 @@
#pragma once #pragma once
#include <array> #include <array>
#include <cstdio>
#include <filesystem> #include <filesystem>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>

View File

@ -2,11 +2,8 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <cstring> #include <cstring>
#include <fstream>
#include <iostream>
#include "common/io_file.h" #include "common/io_file.h"
#include "psf.h" #include "core/file_format/psf.h"
PSF::PSF() = default; PSF::PSF() = default;

View File

@ -8,6 +8,8 @@
#ifdef _WIN32 #ifdef _WIN32
#include <windows.h> #include <windows.h>
#else
#include <pthread.h>
#endif #endif
namespace Core { namespace Core {
@ -53,9 +55,83 @@ Tcb* GetTcbBase() {
return reinterpret_cast<Tcb*>(TlsGetValue(slot)); return reinterpret_cast<Tcb*>(TlsGetValue(slot));
} }
void PatchTLS(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c) { static void AllocTcbKey() {
using namespace Xbyak::util; slot = TlsAlloc();
}
static void PatchFsAccess(u8* code, const TLSPattern& tls_pattern, Xbyak::CodeGenerator& c) {
using namespace Xbyak::util;
const auto total_size = tls_pattern.pattern_size + tls_pattern.imm_size;
// Replace mov instruction with near jump to the trampoline.
static constexpr u32 NearJmpSize = 5;
auto patch = Xbyak::CodeGenerator(total_size, code);
patch.jmp(c.getCurr(), Xbyak::CodeGenerator::LabelType::T_NEAR);
patch.nop(total_size - NearJmpSize);
// Write the trampoline.
// The following logic is based on the wine implementation of TlsGetValue
// https://github.com/wine-mirror/wine/blob/a27b9551/dlls/kernelbase/thread.c#L719
static constexpr u32 TlsSlotsOffset = 0x1480;
static constexpr u32 TlsExpansionSlotsOffset = 0x1780;
static constexpr u32 TlsMinimumAvailable = 64;
const u32 teb_offset = slot < TlsMinimumAvailable ? TlsSlotsOffset : TlsExpansionSlotsOffset;
const u32 tls_index = slot < TlsMinimumAvailable ? slot : slot - TlsMinimumAvailable;
const auto target_reg = Xbyak::Reg64(tls_pattern.target_reg);
c.mov(target_reg, teb_offset);
c.putSeg(gs);
c.mov(target_reg, ptr[target_reg]); // Load the pointer to the table of tls slots.
c.mov(target_reg,
qword[target_reg + tls_index * sizeof(LPVOID)]); // Load the pointer to our buffer.
c.jmp(code + total_size); // Return to the instruction right after the mov.
}
#else
static pthread_key_t slot = 0;
void SetTcbBase(void* image_address) {
ASSERT(pthread_setspecific(slot, image_address) == 0);
}
Tcb* GetTcbBase() {
return reinterpret_cast<Tcb*>(pthread_getspecific(slot));
}
static void AllocTcbKey() {
slot = pthread_key_create(&slot, nullptr);
}
static void PatchFsAccess(u8* code, const TLSPattern& tls_pattern, Xbyak::CodeGenerator& c) {
using namespace Xbyak::util;
const auto total_size = tls_pattern.pattern_size + tls_pattern.imm_size;
// Replace mov instruction with near jump to the trampoline.
static constexpr u32 NearJmpSize = 5;
auto patch = Xbyak::CodeGenerator(total_size, code);
patch.jmp(c.getCurr(), Xbyak::CodeGenerator::LabelType::T_NEAR);
patch.nop(total_size - NearJmpSize);
// Write the trampoline.
// The following logic is based on the glibc implementation of pthread_getspecific
// https://github.com/bminor/glibc/blob/29807a27/nptl/pthread_getspecific.c#L23
static constexpr u32 PthreadKeySecondLevelSize = 32;
static constexpr u32 SpecificFirstBlockOffset = 0x308;
static constexpr u32 SelfInTcbheadOffset = 16;
static constexpr u32 PthreadKeyDataSize = 16;
ASSERT(slot < PthreadKeySecondLevelSize);
const auto target_reg = Xbyak::Reg64(tls_pattern.target_reg);
c.putSeg(fs);
c.mov(target_reg, qword[SelfInTcbheadOffset]); // Load self member pointer of tcbhead_t.
c.lea(target_reg,
ptr[SpecificFirstBlockOffset + slot * PthreadKeyDataSize +
sizeof(uintptr_t)]); // Load the pointer to our data.
c.jmp(code + total_size); // Return to the instruction right after the mov.
}
void PatchTLS(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c) {
u8* code = reinterpret_cast<u8*>(segment_addr); u8* code = reinterpret_cast<u8*>(segment_addr);
auto remaining_size = segment_size; auto remaining_size = segment_size;
@ -89,7 +165,7 @@ void PatchTLS(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c) {
// Allocate slot in the process if not done already. // Allocate slot in the process if not done already.
if (slot == 0) { if (slot == 0) {
slot = TlsAlloc(); AllocTcbKey();
} }
// Replace bogus instruction prefix with nops if it exists. // Replace bogus instruction prefix with nops if it exists.
@ -98,30 +174,8 @@ void PatchTLS(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c) {
patch.nop(BadPrefix.size()); patch.nop(BadPrefix.size());
} }
// Replace mov instruction with near jump to the trampoline. // Patch access to FS register to a trampoline.
static constexpr u32 NearJmpSize = 5; PatchFsAccess(code, tls_pattern, c);
auto patch = Xbyak::CodeGenerator(total_size, code);
patch.jmp(c.getCurr(), Xbyak::CodeGenerator::LabelType::T_NEAR);
patch.nop(total_size - NearJmpSize);
// Write the trampoline.
// The following logic is based on the wine implementation of TlsGetValue
// https://github.com/wine-mirror/wine/blob/a27b9551/dlls/kernelbase/thread.c#L719
static constexpr u32 TlsSlotsOffset = 0x1480;
static constexpr u32 TlsExpansionSlotsOffset = 0x1780;
static constexpr u32 TlsMinimumAvailable = 64;
const u32 teb_offset =
slot < TlsMinimumAvailable ? TlsSlotsOffset : TlsExpansionSlotsOffset;
const u32 tls_index = slot < TlsMinimumAvailable ? slot : slot - TlsMinimumAvailable;
const auto target_reg = Xbyak::Reg64(tls_pattern.target_reg);
c.mov(target_reg, teb_offset);
c.putSeg(gs);
c.mov(target_reg, ptr[target_reg]); // Load the pointer to the table of tls slots.
c.mov(
target_reg,
qword[target_reg + tls_index * sizeof(LPVOID)]); // Load the pointer to our buffer.
c.jmp(code + total_size); // Return to the instruction right after the mov.
// Move ahead in module. // Move ahead in module.
code += total_size - 1; code += total_size - 1;
@ -133,20 +187,6 @@ void PatchTLS(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c) {
} }
} }
#else
void SetTcbBase(void* image_address) {
UNREACHABLE_MSG("Thread local storage is unimplemented on posix platforms!");
}
Tcb* GetTcbBase() {
UNREACHABLE_MSG("Thread local storage is unimplemented on posix platforms!");
}
void PatchTLS(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c) {
UNREACHABLE_MSG("Thread local storage is unimplemented on posix platforms!");
}
#endif #endif
} // namespace Core } // namespace Core

View File

@ -40,10 +40,6 @@ Emulator::Emulator() : window{WindowWidth, WindowHeight, controller} {
Common::Log::Initialize(); Common::Log::Initialize();
Common::Log::Start(); Common::Log::Start();
// Start discord integration
discord_rpc.init();
discord_rpc.update(Discord::RPCStatus::Idling, "");
// Initialize kernel and library facilities. // Initialize kernel and library facilities.
Libraries::Kernel::init_pthreads(); Libraries::Kernel::init_pthreads();
Libraries::InitHLELibs(&linker->GetHLESymbols()); Libraries::InitHLELibs(&linker->GetHLESymbols());
@ -52,7 +48,6 @@ Emulator::Emulator() : window{WindowWidth, WindowHeight, controller} {
Emulator::~Emulator() { Emulator::~Emulator() {
const auto config_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); const auto config_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir);
Config::save(config_dir / "config.toml"); Config::save(config_dir / "config.toml");
discord_rpc.stop();
} }
void Emulator::Run(const std::filesystem::path& file) { void Emulator::Run(const std::filesystem::path& file) {

View File

@ -7,7 +7,6 @@
#include <thread> #include <thread>
#include <common/singleton.h> #include <common/singleton.h>
#include "common/discord.h"
#include "core/linker.h" #include "core/linker.h"
#include "input/controller.h" #include "input/controller.h"
#include "sdl_window.h" #include "sdl_window.h"
@ -23,7 +22,6 @@ public:
private: private:
void LoadSystemModules(const std::filesystem::path& file); void LoadSystemModules(const std::filesystem::path& file);
Discord::RPC discord_rpc;
Input::GameController* controller = Common::Singleton<Input::GameController>::Instance(); Input::GameController* controller = Common::Singleton<Input::GameController>::Instance();
Core::Linker* linker = Common::Singleton<Core::Linker>::Instance(); Core::Linker* linker = Common::Singleton<Core::Linker>::Instance();
Frontend::WindowSDL window; Frontend::WindowSDL window;

View File

@ -695,7 +695,7 @@ struct Liverpool {
NumberFormat NumFormat() const { NumberFormat NumFormat() const {
// There is a small difference between T# and CB number types, account for it. // There is a small difference between T# and CB number types, account for it.
return info.number_type == AmdGpu::NumberFormat::SnormNz ? AmdGpu::NumberFormat::Srgb return info.number_type == AmdGpu::NumberFormat::SnormNz ? AmdGpu::NumberFormat::Srgb
: info.number_type; : info.number_type.Value();
} }
}; };

View File

@ -32,7 +32,7 @@ struct Buffer {
}; };
u32 GetStride() const noexcept { u32 GetStride() const noexcept {
return stride == 0 ? 1U : stride; return stride == 0 ? 1U : stride.Value();
} }
u32 GetStrideElements(u32 element_size) const noexcept { u32 GetStrideElements(u32 element_size) const noexcept {