From 69b7088120587e3fd6f8bd53767940f0d705beed Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sun, 8 Sep 2024 00:05:03 -0700 Subject: [PATCH] cpu_patches: Patch just-in-time using illegal instruction handler. --- src/core/cpu_patches.cpp | 231 ++++++++++++++++++++++++++++++--------- src/core/cpu_patches.h | 12 +- src/core/module.cpp | 7 +- 3 files changed, 191 insertions(+), 59 deletions(-) diff --git a/src/core/cpu_patches.cpp b/src/core/cpu_patches.cpp index 91b3bcd40..48bf66e5e 100644 --- a/src/core/cpu_patches.cpp +++ b/src/core/cpu_patches.cpp @@ -1,10 +1,13 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include +#include #include #include #include #include +#include "common/alignment.h" #include "common/assert.h" #include "common/types.h" #include "core/tls.h" @@ -593,14 +596,7 @@ struct PatchInfo { bool trampoline; }; -static const std::unordered_map Patches = { -#if defined(_WIN32) - // Windows needs a trampoline. - {ZYDIS_MNEMONIC_MOV, {FilterTcbAccess, GenerateTcbAccess, true}}, -#elif !defined(__APPLE__) - {ZYDIS_MNEMONIC_MOV, {FilterTcbAccess, GenerateTcbAccess, false}}, -#endif - +static const std::unordered_map OnDemandPatches = { #ifdef __APPLE__ // Patches for instruction sets not supported by Rosetta 2. // BMI1 @@ -615,64 +611,199 @@ static const std::unordered_map Patches = { #endif }; -void PatchInstructions(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c) { - if (Patches.empty()) { - // Nothing to patch on this platform. - return; - } +// TODO: Currently only illegal instruction patches are set up to be caught at runtime. +// TODO: These other patches like TCB access should be moved into the same system in the future. +static const std::unordered_map StartupPatches = { +#if defined(_WIN32) + // Windows needs a trampoline. + {ZYDIS_MNEMONIC_MOV, {FilterTcbAccess, GenerateTcbAccess, true}}, +#elif !defined(__APPLE__) + {ZYDIS_MNEMONIC_MOV, {FilterTcbAccess, GenerateTcbAccess, false}}, +#endif +}; + +static std::once_flag init_flag; +static ZydisDecoder instr_decoder; +static ZydisFormatter instr_formatter; + +struct PatchModule { + /// Mutex controlling access to module code regions. + std::mutex mutex{}; + + /// Start of the module. + u8* start; + + /// End of the module. + u8* end; + + /// Code generator for patching the module. + Xbyak::CodeGenerator patch_gen; + + /// Code generator for writing trampoline patches. + Xbyak::CodeGenerator trampoline_gen; + + PatchModule(u8* module_ptr, const u64 module_size, u8* trampoline_ptr, + const u64 trampoline_size) + : start(module_ptr), end(module_ptr + module_size), patch_gen(module_size, module_ptr), + trampoline_gen(trampoline_size, trampoline_ptr) {} +}; +static std::map modules; + +static PatchModule& GetModule(const void* ptr) { + auto upper_bound = modules.upper_bound(reinterpret_cast(ptr)); + ASSERT_MSG(upper_bound != modules.begin(), "Unable to find module for code at: {}", + fmt::ptr(ptr)); + return std::prev(upper_bound)->second; +} + +static u64 TryPatch(u8* code, PatchModule& module, + const std::unordered_map& patches, + bool required = false) { + std::unique_lock lock{module.mutex}; - ZydisDecoder instr_decoder; ZydisDecodedInstruction instruction; ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT]; - ZydisDecoderInit(&instr_decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_STACK_WIDTH_64); - - u8* code = reinterpret_cast(segment_addr); - u8* end = code + segment_size; - while (code < end) { - ZyanStatus status = - ZydisDecoderDecodeFull(&instr_decoder, code, end - code, &instruction, operands); - if (!ZYAN_SUCCESS(status)) { - code++; - continue; + const auto status = + ZydisDecoderDecodeFull(&instr_decoder, code, module.end - code, &instruction, operands); + if (!ZYAN_SUCCESS(status)) { + if (required) { + UNREACHABLE_MSG("Unable to decode instruction at {}", fmt::ptr(code)); } + return 1; + } - if (Patches.contains(instruction.mnemonic)) { - auto patch_info = Patches.at(instruction.mnemonic); - if (patch_info.filter(operands)) { - auto patch_gen = Xbyak::CodeGenerator(instruction.length, code); + // Assume a jmp is an existing patch, in case multiple threads signaled at the same time. + if (instruction.mnemonic == ZYDIS_MNEMONIC_JMP) { + if (required) { + LOG_INFO(Core, "Skipping already patched instruction at {}", fmt::ptr(code)); + } + return instruction.length; + } - if (patch_info.trampoline) { - const auto trampoline_ptr = c.getCurr(); + if (patches.contains(instruction.mnemonic)) { + const auto& patch_info = patches.at(instruction.mnemonic); + if (patch_info.filter(operands)) { + auto& patch_gen = module.patch_gen; - patch_info.generator(operands, c); + // Reset state and move to current code position. + patch_gen.reset(); + patch_gen.setSize(code - patch_gen.getCode()); - // Return to the following instruction at the end of the trampoline. - c.jmp(code + instruction.length); + if (patch_info.trampoline) { + auto& trampoline_gen = module.trampoline_gen; + const auto trampoline_ptr = trampoline_gen.getCurr(); - // Replace instruction with near jump to the trampoline. - patch_gen.jmp(trampoline_ptr, Xbyak::CodeGenerator::LabelType::T_NEAR); - } else { - patch_info.generator(operands, patch_gen); - } + patch_info.generator(operands, trampoline_gen); - const auto patch_size = patch_gen.getCurr() - code; - if (patch_size > 0) { - ASSERT_MSG(instruction.length >= patch_size, - "Instruction {} with length {} is too short to replace at: {}", - ZydisMnemonicGetString(instruction.mnemonic), instruction.length, - fmt::ptr(code)); + // Return to the following instruction at the end of the trampoline. + trampoline_gen.jmp(code + instruction.length); - // Fill remaining space with nops. - patch_gen.nop(instruction.length - patch_size); + // Replace instruction with near jump to the trampoline. + patch_gen.jmp(trampoline_ptr, Xbyak::CodeGenerator::LabelType::T_NEAR); + } else { + patch_info.generator(operands, patch_gen); + } - LOG_DEBUG(Core, "Patched instruction '{}' at: {}", - ZydisMnemonicGetString(instruction.mnemonic), fmt::ptr(code)); - } + const auto patch_size = patch_gen.getCurr() - code; + if (patch_size > 0) { + ASSERT_MSG(instruction.length >= patch_size, + "Instruction {} with length {} is too short to replace at: {}", + ZydisMnemonicGetString(instruction.mnemonic), instruction.length, + fmt::ptr(code)); + + // Fill remaining space with nops. + patch_gen.nop(instruction.length - patch_size); + + LOG_DEBUG(Core, "Patched instruction '{}' at: {}", + ZydisMnemonicGetString(instruction.mnemonic), fmt::ptr(code)); + return instruction.length; } } + } - code += instruction.length; + if (required) { + char buffer[256]; + ZydisFormatterFormatInstruction(&instr_formatter, &instruction, operands, + instruction.operand_count_visible, buffer, sizeof(buffer), + reinterpret_cast(code), ZYAN_NULL); + UNIMPLEMENTED_MSG("Encountered instruction at {} without patch: {}", fmt::ptr(code), + buffer); + } + + return instruction.length; +} + +#if defined(_WIN32) +static LONG WINAPI SignalHandler(EXCEPTION_POINTERS* pExp) noexcept { + const u32 ec = pExp->ExceptionRecord->ExceptionCode; + if (ec == EXCEPTION_ILLEGAL_INSTRUCTION) { + auto* code = reinterpret_cast(pExp->ExceptionRecord->ExceptionAddress); + auto& module = GetModule(code); + TryPatch(code, module, OnDemandPatches, true); + return EXCEPTION_CONTINUE_EXECUTION; + } + return EXCEPTION_CONTINUE_SEARCH; +} +#else +static void SignalHandler(int sig, siginfo_t* info, void* raw_context) { + auto* code = static_cast(info->si_addr); + auto& module = GetModule(code); + TryPatch(code, module, OnDemandPatches, true); +} +#endif + +static void PatchesInit() { + ZydisDecoderInit(&instr_decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_STACK_WIDTH_64); + ZydisFormatterInit(&instr_formatter, ZYDIS_FORMATTER_STYLE_INTEL); + + if (!OnDemandPatches.empty()) { +#if defined(_WIN32) + ASSERT_MSG(AddVectoredExceptionHandler(0, SignalHandler), + "Failed to register code patching exception handler."); +#else + constexpr struct sigaction action { + .sa_flags = SA_SIGINFO | SA_ONSTACK, .sa_sigaction = SignalHandler, .sa_mask = 0, + }; + ASSERT_MSG(sigaction(SIGILL, &action, nullptr) == 0, + "Failed to register code patching signal handler."); +#endif } } +void RegisterPatchModule(void* module_ptr, u64 module_size, void* trampoline_area_ptr, + u64 trampoline_area_size) { + std::call_once(init_flag, PatchesInit); + + const auto module_addr = reinterpret_cast(module_ptr); + modules.emplace(std::piecewise_construct, std::forward_as_tuple(module_addr), + std::forward_as_tuple(static_cast(module_ptr), module_size, + static_cast(trampoline_area_ptr), + trampoline_area_size)); +} + +void PrePatchInstructions(u64 segment_addr, u64 segment_size) { + auto& module = GetModule(reinterpret_cast(segment_addr)); + + if (!StartupPatches.empty()) { + u8* code = reinterpret_cast(segment_addr); + u8* end = code + segment_size; + while (code < end) { + code += TryPatch(code, module, StartupPatches); + } + } + +#ifdef __APPLE__ + // HACK: For some reason patching in the signal handler at the start of a page does not work + // under Rosetta 2. Patch any instructions at the start of a page ahead of time. + if (!OnDemandPatches.empty()) { + u8* code_page = reinterpret_cast(Common::AlignUp(segment_addr, 0x1000)); + u8* end_page = code_page + Common::AlignUp(segment_size, 0x1000); + while (code_page < end_page) { + TryPatch(code_page, module, OnDemandPatches); + code_page += 0x1000; + } + } +#endif +} + } // namespace Core diff --git a/src/core/cpu_patches.h b/src/core/cpu_patches.h index 9126074ed..f9f7fe646 100644 --- a/src/core/cpu_patches.h +++ b/src/core/cpu_patches.h @@ -3,10 +3,6 @@ #pragma once -namespace Xbyak { -class CodeGenerator; -} - namespace Core { /// Initializes a stack for the current thread for use by patch implementations. @@ -15,7 +11,11 @@ void InitializeThreadPatchStack(); /// Cleans up the patch stack for the current thread. void CleanupThreadPatchStack(); -/// Patches CPU instructions that cannot run as-is on the host. -void PatchInstructions(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c); +/// Registers a module for patching, providing an area to generate trampoline code. +void RegisterPatchModule(void* module_ptr, u64 module_size, void* trampoline_area_ptr, + u64 trampoline_area_size); + +/// Applies CPU patches that need to be done before beginning executions. +void PrePatchInstructions(u64 segment_addr, u64 segment_size); } // namespace Core diff --git a/src/core/module.cpp b/src/core/module.cpp index c28ac1061..553f050e6 100644 --- a/src/core/module.cpp +++ b/src/core/module.cpp @@ -1,7 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include #include "common/alignment.h" #include "common/arch.h" #include "common/assert.h" @@ -94,9 +93,11 @@ void Module::LoadModuleToMemory(u32& max_tls_index) { LoadOffset += CODE_BASE_INCR * (1 + aligned_base_size / CODE_BASE_INCR); LOG_INFO(Core_Linker, "Loading module {} to {}", name, fmt::ptr(*out_addr)); +#ifdef ARCH_X86_64 // Initialize trampoline generator. void* trampoline_addr = std::bit_cast(base_virtual_addr + aligned_base_size); - Xbyak::CodeGenerator c(TrampolineSize, trampoline_addr); + RegisterPatchModule(*out_addr, aligned_base_size, trampoline_addr, TrampolineSize); +#endif LOG_INFO(Core_Linker, "======== Load Module to Memory ========"); LOG_INFO(Core_Linker, "base_virtual_addr ......: {:#018x}", base_virtual_addr); @@ -137,7 +138,7 @@ void Module::LoadModuleToMemory(u32& max_tls_index) { add_segment(elf_pheader[i]); #ifdef ARCH_X86_64 if (elf_pheader[i].p_flags & PF_EXEC) { - PatchInstructions(segment_addr, segment_file_size, c); + PrePatchInstructions(segment_addr, segment_file_size); } #endif break;