Allow our BMI1 emulation to work on non-macOS CPUs

This commit is contained in:
offtkp 2025-02-24 15:27:10 +02:00
parent 76b4da6212
commit 6103d49f98

View File

@ -202,21 +202,19 @@ static void RestoreStack(Xbyak::CodeGenerator& c) {
/// Switches to the patch stack, saves registers, and restores the original stack. /// Switches to the patch stack, saves registers, and restores the original stack.
static void SaveRegisters(Xbyak::CodeGenerator& c, const std::initializer_list<Xbyak::Reg> regs) { static void SaveRegisters(Xbyak::CodeGenerator& c, const std::initializer_list<Xbyak::Reg> regs) {
SaveStack(c); c.lea(rsp, ptr[rsp - 128]); // red zone
for (const auto& reg : regs) { for (const auto& reg : regs) {
c.push(reg.cvt64()); c.push(reg.cvt64());
} }
RestoreStack(c);
} }
/// Switches to the patch stack, restores registers, and restores the original stack. /// Switches to the patch stack, restores registers, and restores the original stack.
static void RestoreRegisters(Xbyak::CodeGenerator& c, static void RestoreRegisters(Xbyak::CodeGenerator& c,
const std::initializer_list<Xbyak::Reg> regs) { const std::initializer_list<Xbyak::Reg> regs) {
SaveStack(c);
for (const auto& reg : regs) { for (const auto& reg : regs) {
c.pop(reg.cvt64()); c.pop(reg.cvt64());
} }
RestoreStack(c); c.lea(rsp, ptr[rsp + 128]);
} }
/// Switches to the patch stack and stores all registers. /// Switches to the patch stack and stores all registers.
@ -257,8 +255,6 @@ static void RestoreContext(Xbyak::CodeGenerator& c, const Xbyak::Operand& dst,
RestoreStack(c); RestoreStack(c);
} }
#ifdef __APPLE__
static void GenerateANDN(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) { static void GenerateANDN(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
const auto dst = ZydisToXbyakRegisterOperand(operands[0]); const auto dst = ZydisToXbyakRegisterOperand(operands[0]);
const auto src1 = ZydisToXbyakRegisterOperand(operands[1]); const auto src1 = ZydisToXbyakRegisterOperand(operands[1]);
@ -426,6 +422,8 @@ static void GenerateBLSR(const ZydisDecodedOperand* operands, Xbyak::CodeGenerat
RestoreRegisters(c, {scratch}); RestoreRegisters(c, {scratch});
} }
#ifdef __APPLE__
static __attribute__((sysv_abi)) void PerformVCVTPH2PS(float* out, const half_float::half* in, static __attribute__((sysv_abi)) void PerformVCVTPH2PS(float* out, const half_float::half* in,
const u32 count) { const u32 count) {
for (u32 i = 0; i < count; i++) { for (u32 i = 0; i < count; i++) {
@ -616,6 +614,15 @@ static bool FilterNoSSE4a(const ZydisDecodedOperand*) {
return !cpu.has(Cpu::tSSE4a); return !cpu.has(Cpu::tSSE4a);
} }
static bool FilterNoBMI1(const ZydisDecodedOperand*) {
#ifdef __APPLE__
return FilterRosetta2Only(nullptr);
#else
Cpu cpu;
return !cpu.has(Cpu::tBMI1);
#endif
}
static void GenerateEXTRQ(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) { static void GenerateEXTRQ(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
bool immediateForm = operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE && bool immediateForm = operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE &&
operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE; operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE;
@ -897,14 +904,15 @@ static const std::unordered_map<ZydisMnemonic, PatchInfo> Patches = {
{ZYDIS_MNEMONIC_EXTRQ, {FilterNoSSE4a, GenerateEXTRQ, true}}, {ZYDIS_MNEMONIC_EXTRQ, {FilterNoSSE4a, GenerateEXTRQ, true}},
{ZYDIS_MNEMONIC_INSERTQ, {FilterNoSSE4a, GenerateINSERTQ, true}}, {ZYDIS_MNEMONIC_INSERTQ, {FilterNoSSE4a, GenerateINSERTQ, true}},
// BMI1
{ZYDIS_MNEMONIC_ANDN, {FilterNoBMI1, GenerateANDN, true}},
{ZYDIS_MNEMONIC_BEXTR, {FilterNoBMI1, GenerateBEXTR, true}},
{ZYDIS_MNEMONIC_BLSI, {FilterNoBMI1, GenerateBLSI, true}},
{ZYDIS_MNEMONIC_BLSMSK, {FilterNoBMI1, GenerateBLSMSK, true}},
{ZYDIS_MNEMONIC_BLSR, {FilterNoBMI1, GenerateBLSR, true}},
#ifdef __APPLE__ #ifdef __APPLE__
// Patches for instruction sets not supported by Rosetta 2. // Patches for instruction sets not supported by Rosetta 2.
// BMI1
{ZYDIS_MNEMONIC_ANDN, {FilterRosetta2Only, GenerateANDN, true}},
{ZYDIS_MNEMONIC_BEXTR, {FilterRosetta2Only, GenerateBEXTR, true}},
{ZYDIS_MNEMONIC_BLSI, {FilterRosetta2Only, GenerateBLSI, true}},
{ZYDIS_MNEMONIC_BLSMSK, {FilterRosetta2Only, GenerateBLSMSK, true}},
{ZYDIS_MNEMONIC_BLSR, {FilterRosetta2Only, GenerateBLSR, true}},
// F16C // F16C
{ZYDIS_MNEMONIC_VCVTPH2PS, {FilterRosetta2Only, GenerateVCVTPH2PS, true}}, {ZYDIS_MNEMONIC_VCVTPH2PS, {FilterRosetta2Only, GenerateVCVTPH2PS, true}},
{ZYDIS_MNEMONIC_VCVTPS2PH, {FilterRosetta2Only, GenerateVCVTPS2PH, true}}, {ZYDIS_MNEMONIC_VCVTPS2PH, {FilterRosetta2Only, GenerateVCVTPS2PH, true}},