Patch EXTRQ

This commit is contained in:
offtkp 2024-09-16 20:05:54 +03:00
parent b42034dad1
commit 75ce77468b
7 changed files with 343 additions and 45 deletions

View File

@ -369,6 +369,8 @@ set(COMMON src/common/logging/backend.cpp
src/common/polyfill_thread.h src/common/polyfill_thread.h
src/common/rdtsc.cpp src/common/rdtsc.cpp
src/common/rdtsc.h src/common/rdtsc.h
src/common/signal_context.h
src/common/signal_context.cpp
src/common/singleton.h src/common/singleton.h
src/common/slot_vector.h src/common/slot_vector.h
src/common/string_util.cpp src/common/string_util.cpp

View File

@ -0,0 +1,90 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/arch.h"
#include "common/assert.h"
#include "common/signal_context.h"
#ifdef _WIN32
#include <windows.h>
#else
#include <sys/ucontext.h>
#endif
namespace Common {
void* GetXmmPointer(void* ctx, u8 index) {
#if defined(_WIN32)
#define CASE(index) \
case index: \
return (void*)(&((EXCEPTION_POINTERS*)ctx)->ContextRecord->Xmm##index.Low)
#elif defined(__APPLE__)
#define CASE(index) \
case index: \
return (void*)(&((ucontext_t*)ctx)->uc_mcontext->__fs.__fpu_xmm##index);
#else
#define CASE(index) \
case index: \
return (void*)(&((ucontext_t*)ctx)->uc_mcontext.fpregs->_xmm[index].element[0])
#endif
switch (index) {
CASE(0);
CASE(1);
CASE(2);
CASE(3);
CASE(4);
CASE(5);
CASE(6);
CASE(7);
CASE(8);
CASE(9);
CASE(10);
CASE(11);
CASE(12);
CASE(13);
CASE(14);
CASE(15);
default: {
UNREACHABLE_MSG("Invalid XMM register index: {}", index);
return nullptr;
}
}
#undef CASE
}
void* GetRip(void* ctx) {
#if defined(_WIN32)
return &((EXCEPTION_POINTERS*)ctx)->ContextRecord->Rip;
#elif defined(__APPLE__)
return &((ucontext_t*)ctx)->uc_mcontext->__ss.__rip;
#else
return &((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RIP];
#endif
}
void IncrementRip(void* ctx, u64 length) {
void* rip = GetRip(ctx);
u64 new_rip;
memcpy(&new_rip, rip, sizeof(u64));
new_rip += length;
memcpy(rip, &new_rip, sizeof(u64));
}
bool IsWriteError(void* ctx) {
#if defined(_WIN32)
return ((EXCEPTION_POINTERS*)ctx)->ExceptionRecord->ExceptionInformation[0] == 1;
#elif defined(__APPLE__)
#if defined(ARCH_X86_64)
return ((ucontext_t*)ctx)->uc_mcontext->__es.__err & 0x2;
#elif defined(ARCH_ARM64)
return ((ucontext_t*)ctx)->uc_mcontext->__es.__esr & 0x40;
#endif
#else
#if defined(ARCH_X86_64)
return ((ucontext_t*)ctx)->uc_mcontext.gregs[REG_ERR] & 0x2;
#else
#error "Unsupported architecture"
#endif
#endif
}
} // namespace Common

View File

@ -0,0 +1,18 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/types.h"
namespace Common {
void* GetXmmPointer(void* ctx, u8 index);
void* GetRip(void* ctx);
void IncrementRip(void* ctx, u64 length);
bool IsWriteError(void* ctx);
} // namespace Common

View File

@ -7,9 +7,12 @@
#include <set> #include <set>
#include <Zydis/Zydis.h> #include <Zydis/Zydis.h>
#include <xbyak/xbyak.h> #include <xbyak/xbyak.h>
#include <xbyak/xbyak_util.h>
#include "common/alignment.h" #include "common/alignment.h"
#include "common/arch.h"
#include "common/assert.h" #include "common/assert.h"
#include "common/decoder.h" #include "common/decoder.h"
#include "common/signal_context.h"
#include "common/types.h" #include "common/types.h"
#include "core/signals.h" #include "core/signals.h"
#include "core/tls.h" #include "core/tls.h"
@ -27,6 +30,16 @@
using namespace Xbyak::util; using namespace Xbyak::util;
#define MAYBE_AVX(OPCODE, ...) \
[&] { \
Cpu cpu; \
if (cpu.has(Cpu::tAVX)) { \
c.v##OPCODE(__VA_ARGS__); \
} else { \
c.OPCODE(__VA_ARGS__); \
} \
}()
namespace Core { namespace Core {
static Xbyak::Reg ZydisToXbyakRegister(const ZydisRegister reg) { static Xbyak::Reg ZydisToXbyakRegister(const ZydisRegister reg) {
@ -587,6 +600,114 @@ static void GenerateTcbAccess(const ZydisDecodedOperand* operands, Xbyak::CodeGe
#endif // __APPLE__ #endif // __APPLE__
static bool FilterNoSSE4a(const ZydisDecodedOperand*) {
Cpu cpu;
return !cpu.has(Cpu::tSSE4a);
}
static void GenerateEXTRQ(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
bool immediateForm = operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE &&
operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE;
ASSERT_MSG(operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER, "operand 0 must be a register");
const auto dst = ZydisToXbyakRegisterOperand(operands[0]);
ASSERT_MSG(dst.isXMM(), "operand 0 must be an XMM register");
Xbyak::Xmm xmm_dst = *reinterpret_cast<const Xbyak::Xmm*>(&dst);
if (immediateForm) {
u8 length = operands[1].imm.value.u & 0x3F;
u8 index = operands[2].imm.value.u & 0x3F;
if (length == 0) {
length = 64;
}
LOG_DEBUG(Core, "Patching immediate form EXTRQ, length: {}, index: {}", length, index);
const Xbyak::Reg64 scratch1 = rax;
const Xbyak::Reg64 scratch2 = rcx;
// Set rsp to before red zone and save scratch registers
c.lea(rsp, ptr[rsp - 128]);
c.pushfq();
c.push(scratch1);
c.push(scratch2);
u64 mask = (1ULL << length) - 1;
// Get lower qword from xmm register
MAYBE_AVX(movq, scratch1, xmm_dst);
if (index != 0) {
c.shr(scratch1, index);
}
// We need to move mask to a register because we can't use all the possible
// immediate values with `and reg, imm32`
c.mov(scratch2, mask);
c.and_(scratch1, scratch2);
// Writeback to xmm register, extrq instruction says top 64-bits are undefined so we don't
// care to preserve them
MAYBE_AVX(movq, xmm_dst, scratch1);
c.pop(scratch2);
c.pop(scratch1);
c.popfq();
c.lea(rsp, ptr[rsp + 128]);
} else {
ASSERT_MSG(operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER &&
operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER &&
operands[0].reg.value >= ZYDIS_REGISTER_XMM0 &&
operands[0].reg.value <= ZYDIS_REGISTER_XMM15 &&
operands[1].reg.value >= ZYDIS_REGISTER_XMM0 &&
operands[1].reg.value <= ZYDIS_REGISTER_XMM15,
"Unexpected operand types for EXTRQ instruction");
const auto src = ZydisToXbyakRegisterOperand(operands[1]);
ASSERT_MSG(src.isXMM(), "operand 1 must be an XMM register");
Xbyak::Xmm xmm_src = *reinterpret_cast<const Xbyak::Xmm*>(&src);
const Xbyak::Reg64 scratch1 = rax;
const Xbyak::Reg64 scratch2 = rcx;
const Xbyak::Reg64 mask = rdx;
c.lea(rsp, ptr[rsp - 128]);
c.pushfq();
c.push(scratch1);
c.push(scratch2);
c.push(mask);
// Construct the mask out of the length that resides in bottom 6 bits of source xmm
MAYBE_AVX(movq, scratch1, xmm_src);
c.mov(scratch2, scratch1);
c.and_(scratch2, 0x3F);
c.mov(mask, 1);
c.shl(mask, cl);
c.dec(mask);
// Get the shift amount and store it in scratch2
c.shr(scratch1, 8);
c.and_(scratch1, 0x3F);
c.mov(scratch2, scratch1); // cl now contains the shift amount
MAYBE_AVX(movq, scratch1, xmm_dst);
c.shr(scratch1, cl);
c.and_(scratch1, mask);
MAYBE_AVX(movq, xmm_dst, scratch1);
c.pop(mask);
c.pop(scratch2);
c.pop(scratch1);
c.popfq();
c.lea(rsp, ptr[rsp + 128]);
}
}
using PatchFilter = bool (*)(const ZydisDecodedOperand*); using PatchFilter = bool (*)(const ZydisDecodedOperand*);
using InstructionGenerator = void (*)(const ZydisDecodedOperand*, Xbyak::CodeGenerator&); using InstructionGenerator = void (*)(const ZydisDecodedOperand*, Xbyak::CodeGenerator&);
struct PatchInfo { struct PatchInfo {
@ -608,6 +729,8 @@ static const std::unordered_map<ZydisMnemonic, PatchInfo> Patches = {
{ZYDIS_MNEMONIC_MOV, {FilterTcbAccess, GenerateTcbAccess, false}}, {ZYDIS_MNEMONIC_MOV, {FilterTcbAccess, GenerateTcbAccess, false}},
#endif #endif
{ZYDIS_MNEMONIC_EXTRQ, {FilterNoSSE4a, GenerateEXTRQ, true}},
#ifdef __APPLE__ #ifdef __APPLE__
// Patches for instruction sets not supported by Rosetta 2. // Patches for instruction sets not supported by Rosetta 2.
// BMI1 // BMI1
@ -671,14 +794,23 @@ static std::pair<bool, u64> TryPatch(u8* code, PatchModule* module) {
if (Patches.contains(instruction.mnemonic)) { if (Patches.contains(instruction.mnemonic)) {
const auto& patch_info = Patches.at(instruction.mnemonic); const auto& patch_info = Patches.at(instruction.mnemonic);
bool needs_trampoline = patch_info.trampoline;
if (patch_info.filter(operands)) { if (patch_info.filter(operands)) {
auto& patch_gen = module->patch_gen; auto& patch_gen = module->patch_gen;
if (needs_trampoline && instruction.length < 5) {
// Trampoline is needed but instruction is too short to patch.
// Return false and length to fall back to the illegal instruction handler,
// or to signal to AOT compilation that this instruction should be skipped and
// handled at runtime.
return std::make_pair(false, instruction.length);
}
// Reset state and move to current code position. // Reset state and move to current code position.
patch_gen.reset(); patch_gen.reset();
patch_gen.setSize(code - patch_gen.getCode()); patch_gen.setSize(code - patch_gen.getCode());
if (patch_info.trampoline) { if (needs_trampoline) {
auto& trampoline_gen = module->trampoline_gen; auto& trampoline_gen = module->trampoline_gen;
const auto trampoline_ptr = trampoline_gen.getCurr(); const auto trampoline_ptr = trampoline_gen.getCurr();
@ -714,6 +846,78 @@ static std::pair<bool, u64> TryPatch(u8* code, PatchModule* module) {
return std::make_pair(false, instruction.length); return std::make_pair(false, instruction.length);
} }
#if defined(ARCH_X86_64)
static bool TryExecuteIllegalInstruction(void* ctx, void* code_address) {
ZydisDecodedInstruction instruction;
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
const auto status =
Common::Decoder::Instance()->decodeInstruction(instruction, operands, code_address);
switch (instruction.mnemonic) {
case ZYDIS_MNEMONIC_EXTRQ: {
bool immediateForm = operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE &&
operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE;
if (immediateForm) {
LOG_ERROR(Core, "EXTRQ immediate form should have been patched at code address: {}",
fmt::ptr(code_address));
return false;
} else {
ASSERT_MSG(operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER &&
operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER &&
operands[0].reg.value >= ZYDIS_REGISTER_XMM0 &&
operands[0].reg.value <= ZYDIS_REGISTER_XMM15 &&
operands[1].reg.value >= ZYDIS_REGISTER_XMM0 &&
operands[1].reg.value <= ZYDIS_REGISTER_XMM15,
"Unexpected operand types for EXTRQ instruction");
const auto dstIndex = operands[0].reg.value - ZYDIS_REGISTER_XMM0;
const auto srcIndex = operands[1].reg.value - ZYDIS_REGISTER_XMM0;
const auto dst = Common::GetXmmPointer(ctx, dstIndex);
const auto src = Common::GetXmmPointer(ctx, srcIndex);
u64 lowQWordSrc;
memcpy(&lowQWordSrc, src, sizeof(lowQWordSrc));
u64 lowQWordDst;
memcpy(&lowQWordDst, dst, sizeof(lowQWordDst));
u64 mask = lowQWordSrc & 0x3F;
mask = (1ULL << mask) - 1;
u64 shift = (lowQWordSrc >> 8) & 0x3F;
lowQWordDst >>= shift;
lowQWordDst &= mask;
memcpy(dst, &lowQWordDst, sizeof(lowQWordDst));
Common::IncrementRip(ctx, instruction.length);
return true;
}
break;
}
default: {
LOG_ERROR(Core, "Unhandled illegal instruction at code address {}: {}",
fmt::ptr(code_address), ZydisMnemonicGetString(instruction.mnemonic));
return false;
}
}
UNREACHABLE();
}
#elif defined(ARCH_ARM64)
// These functions shouldn't be needed for ARM as it will use a JIT so there's no need to patch
// instructions.
static bool TryExecuteIllegalInstruction(void*, void*) {
return false;
}
#else
#error "Unsupported architecture"
#endif
static bool TryPatchJit(void* code_address) { static bool TryPatchJit(void* code_address) {
auto* code = static_cast<u8*>(code_address); auto* code = static_cast<u8*>(code_address);
auto* module = GetModule(code); auto* module = GetModule(code);
@ -746,12 +950,16 @@ static void TryPatchAot(void* code_address, u64 code_size) {
} }
} }
static bool PatchesAccessViolationHandler(void* code_address, void* fault_address, bool is_write) { static bool PatchesAccessViolationHandler(void* context, void* /* fault_address */) {
return TryPatchJit(code_address); return TryPatchJit(Common::GetRip(context));
} }
static bool PatchesIllegalInstructionHandler(void* code_address) { static bool PatchesIllegalInstructionHandler(void* context) {
return TryPatchJit(code_address); void* code_address = Common::GetRip(context);
if (!TryPatchJit(code_address)) {
return TryExecuteIllegalInstruction(context, code_address);
}
return true;
} }
static void PatchesInit() { static void PatchesInit() {

View File

@ -4,6 +4,7 @@
#include "common/arch.h" #include "common/arch.h"
#include "common/assert.h" #include "common/assert.h"
#include "common/decoder.h" #include "common/decoder.h"
#include "common/signal_context.h"
#include "core/signals.h" #include "core/signals.h"
#ifdef _WIN32 #ifdef _WIN32
@ -22,17 +23,14 @@ namespace Core {
static LONG WINAPI SignalHandler(EXCEPTION_POINTERS* pExp) noexcept { static LONG WINAPI SignalHandler(EXCEPTION_POINTERS* pExp) noexcept {
const auto* signals = Signals::Instance(); const auto* signals = Signals::Instance();
auto* code_address = reinterpret_cast<void*>(pExp->ContextRecord->Rip);
bool handled = false; bool handled = false;
switch (pExp->ExceptionRecord->ExceptionCode) { switch (pExp->ExceptionRecord->ExceptionCode) {
case EXCEPTION_ACCESS_VIOLATION: case EXCEPTION_ACCESS_VIOLATION:
handled = signals->DispatchAccessViolation( handled = signals->DispatchAccessViolation(
code_address, reinterpret_cast<void*>(pExp->ExceptionRecord->ExceptionInformation[1]), pExp, reinterpret_cast<void*>(pExp->ExceptionRecord->ExceptionInformation[1]));
pExp->ExceptionRecord->ExceptionInformation[0] == 1);
break; break;
case EXCEPTION_ILLEGAL_INSTRUCTION: case EXCEPTION_ILLEGAL_INSTRUCTION:
handled = signals->DispatchIllegalInstruction(code_address); handled = signals->DispatchIllegalInstruction(pExp);
break; break;
default: default:
break; break;
@ -43,25 +41,6 @@ static LONG WINAPI SignalHandler(EXCEPTION_POINTERS* pExp) noexcept {
#else #else
#ifdef __APPLE__
#if defined(ARCH_X86_64)
#define CODE_ADDRESS(ctx) reinterpret_cast<void*>((ctx)->uc_mcontext->__ss.__rip)
#define IS_WRITE_ERROR(ctx) ((ctx)->uc_mcontext->__es.__err & 0x2)
#elif defined(ARCH_ARM64)
#define CODE_ADDRESS(ctx) reinterpret_cast<void*>((ctx)->uc_mcontext->__ss.__pc)
#define IS_WRITE_ERROR(ctx) ((ctx)->uc_mcontext->__es.__esr & 0x40)
#endif
#else
#if defined(ARCH_X86_64)
#define CODE_ADDRESS(ctx) reinterpret_cast<void*>((ctx)->uc_mcontext.gregs[REG_RIP])
#define IS_WRITE_ERROR(ctx) ((ctx)->uc_mcontext.gregs[REG_ERR] & 0x2)
#endif
#endif
#ifndef IS_WRITE_ERROR
#error "Missing IS_WRITE_ERROR() implementation for target OS and CPU architecture."
#endif
static std::string DisassembleInstruction(void* code_address) { static std::string DisassembleInstruction(void* code_address) {
char buffer[256] = "<unable to decode>"; char buffer[256] = "<unable to decode>";
@ -83,23 +62,23 @@ static std::string DisassembleInstruction(void* code_address) {
} }
static void SignalHandler(int sig, siginfo_t* info, void* raw_context) { static void SignalHandler(int sig, siginfo_t* info, void* raw_context) {
const auto* ctx = static_cast<ucontext_t*>(raw_context);
const auto* signals = Signals::Instance(); const auto* signals = Signals::Instance();
auto* code_address = CODE_ADDRESS(ctx); auto* code_address = Common::GetRip(raw_context);
switch (sig) { switch (sig) {
case SIGSEGV: case SIGSEGV:
case SIGBUS: case SIGBUS: {
if (const bool is_write = IS_WRITE_ERROR(ctx); const bool is_write = Common::IsWriteError(raw_context);
!signals->DispatchAccessViolation(code_address, info->si_addr, is_write)) { if (!signals->DispatchAccessViolation(raw_context, info->si_addr)) {
UNREACHABLE_MSG("Unhandled access violation at code address {}: {} address {}", UNREACHABLE_MSG("Unhandled access violation at code address {}: {} address {}",
fmt::ptr(code_address), is_write ? "Write to" : "Read from", fmt::ptr(code_address), is_write ? "Write to" : "Read from",
fmt::ptr(info->si_addr)); fmt::ptr(info->si_addr));
} }
break; break;
}
case SIGILL: case SIGILL:
if (!signals->DispatchIllegalInstruction(code_address)) { if (!signals->DispatchIllegalInstruction(raw_context)) {
UNREACHABLE_MSG("Unhandled illegal instruction at code address {}: {}", UNREACHABLE_MSG("Unhandled illegal instruction at code address {}: {}",
fmt::ptr(code_address), DisassembleInstruction(code_address)); fmt::ptr(code_address), DisassembleInstruction(code_address));
} }
@ -146,19 +125,18 @@ SignalDispatch::~SignalDispatch() {
#endif #endif
} }
bool SignalDispatch::DispatchAccessViolation(void* code_address, void* fault_address, bool SignalDispatch::DispatchAccessViolation(void* context, void* fault_address) const {
bool is_write) const {
for (const auto& [handler, _] : access_violation_handlers) { for (const auto& [handler, _] : access_violation_handlers) {
if (handler(code_address, fault_address, is_write)) { if (handler(context, fault_address)) {
return true; return true;
} }
} }
return false; return false;
} }
bool SignalDispatch::DispatchIllegalInstruction(void* code_address) const { bool SignalDispatch::DispatchIllegalInstruction(void* context) const {
for (const auto& [handler, _] : illegal_instruction_handlers) { for (const auto& [handler, _] : illegal_instruction_handlers) {
if (handler(code_address)) { if (handler(context)) {
return true; return true;
} }
} }

View File

@ -8,8 +8,8 @@
namespace Core { namespace Core {
using AccessViolationHandler = bool (*)(void* code_address, void* fault_address, bool is_write); using AccessViolationHandler = bool (*)(void* context, void* fault_address);
using IllegalInstructionHandler = bool (*)(void* code_address); using IllegalInstructionHandler = bool (*)(void* context);
/// Receives OS signals and dispatches to the appropriate handlers. /// Receives OS signals and dispatches to the appropriate handlers.
class SignalDispatch { class SignalDispatch {
@ -28,10 +28,10 @@ public:
} }
/// Dispatches an access violation signal, returning whether it was successfully handled. /// Dispatches an access violation signal, returning whether it was successfully handled.
bool DispatchAccessViolation(void* code_address, void* fault_address, bool is_write) const; bool DispatchAccessViolation(void* context, void* fault_address) const;
/// Dispatches an illegal instruction signal, returning whether it was successfully handled. /// Dispatches an illegal instruction signal, returning whether it was successfully handled.
bool DispatchIllegalInstruction(void* code_address) const; bool DispatchIllegalInstruction(void* context) const;
private: private:
template <typename T> template <typename T>

View File

@ -6,6 +6,7 @@
#include "common/alignment.h" #include "common/alignment.h"
#include "common/assert.h" #include "common/assert.h"
#include "common/error.h" #include "common/error.h"
#include "common/signal_context.h"
#include "core/signals.h" #include "core/signals.h"
#include "video_core/page_manager.h" #include "video_core/page_manager.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_rasterizer.h"
@ -152,8 +153,9 @@ struct PageManager::Impl {
#endif #endif
} }
static bool GuestFaultSignalHandler(void* code_address, void* fault_address, bool is_write) { static bool GuestFaultSignalHandler(void* code_address, void* fault_address) {
const auto addr = reinterpret_cast<VAddr>(fault_address); const auto addr = reinterpret_cast<VAddr>(fault_address);
const bool is_write = Common::IsWriteError(code_address);
if (is_write && owned_ranges.find(addr) != owned_ranges.end()) { if (is_write && owned_ranges.find(addr) != owned_ranges.end()) {
const VAddr addr_aligned = Common::AlignDown(addr, PAGESIZE); const VAddr addr_aligned = Common::AlignDown(addr, PAGESIZE);
rasterizer->InvalidateMemory(addr_aligned, PAGESIZE); rasterizer->InvalidateMemory(addr_aligned, PAGESIZE);