Allow our BMI1 emulation to work on non-macOS CPUs

This commit is contained in:
offtkp 2025-02-24 15:27:10 +02:00
parent 76b4da6212
commit 6103d49f98

View File

@ -202,21 +202,19 @@ static void RestoreStack(Xbyak::CodeGenerator& c) {
/// Switches to the patch stack, saves registers, and restores the original stack.
static void SaveRegisters(Xbyak::CodeGenerator& c, const std::initializer_list<Xbyak::Reg> regs) {
SaveStack(c);
c.lea(rsp, ptr[rsp - 128]); // red zone
for (const auto& reg : regs) {
c.push(reg.cvt64());
}
RestoreStack(c);
}
/// Switches to the patch stack, restores registers, and restores the original stack.
static void RestoreRegisters(Xbyak::CodeGenerator& c,
const std::initializer_list<Xbyak::Reg> regs) {
SaveStack(c);
for (const auto& reg : regs) {
c.pop(reg.cvt64());
}
RestoreStack(c);
c.lea(rsp, ptr[rsp + 128]);
}
/// Switches to the patch stack and stores all registers.
@ -257,8 +255,6 @@ static void RestoreContext(Xbyak::CodeGenerator& c, const Xbyak::Operand& dst,
RestoreStack(c);
}
#ifdef __APPLE__
static void GenerateANDN(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
const auto dst = ZydisToXbyakRegisterOperand(operands[0]);
const auto src1 = ZydisToXbyakRegisterOperand(operands[1]);
@ -426,6 +422,8 @@ static void GenerateBLSR(const ZydisDecodedOperand* operands, Xbyak::CodeGenerat
RestoreRegisters(c, {scratch});
}
#ifdef __APPLE__
static __attribute__((sysv_abi)) void PerformVCVTPH2PS(float* out, const half_float::half* in,
const u32 count) {
for (u32 i = 0; i < count; i++) {
@ -616,6 +614,15 @@ static bool FilterNoSSE4a(const ZydisDecodedOperand*) {
return !cpu.has(Cpu::tSSE4a);
}
static bool FilterNoBMI1(const ZydisDecodedOperand*) {
#ifdef __APPLE__
return FilterRosetta2Only(nullptr);
#else
Cpu cpu;
return !cpu.has(Cpu::tBMI1);
#endif
}
static void GenerateEXTRQ(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
bool immediateForm = operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE &&
operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE;
@ -897,14 +904,15 @@ static const std::unordered_map<ZydisMnemonic, PatchInfo> Patches = {
{ZYDIS_MNEMONIC_EXTRQ, {FilterNoSSE4a, GenerateEXTRQ, true}},
{ZYDIS_MNEMONIC_INSERTQ, {FilterNoSSE4a, GenerateINSERTQ, true}},
// BMI1
{ZYDIS_MNEMONIC_ANDN, {FilterNoBMI1, GenerateANDN, true}},
{ZYDIS_MNEMONIC_BEXTR, {FilterNoBMI1, GenerateBEXTR, true}},
{ZYDIS_MNEMONIC_BLSI, {FilterNoBMI1, GenerateBLSI, true}},
{ZYDIS_MNEMONIC_BLSMSK, {FilterNoBMI1, GenerateBLSMSK, true}},
{ZYDIS_MNEMONIC_BLSR, {FilterNoBMI1, GenerateBLSR, true}},
#ifdef __APPLE__
// Patches for instruction sets not supported by Rosetta 2.
// BMI1
{ZYDIS_MNEMONIC_ANDN, {FilterRosetta2Only, GenerateANDN, true}},
{ZYDIS_MNEMONIC_BEXTR, {FilterRosetta2Only, GenerateBEXTR, true}},
{ZYDIS_MNEMONIC_BLSI, {FilterRosetta2Only, GenerateBLSI, true}},
{ZYDIS_MNEMONIC_BLSMSK, {FilterRosetta2Only, GenerateBLSMSK, true}},
{ZYDIS_MNEMONIC_BLSR, {FilterRosetta2Only, GenerateBLSR, true}},
// F16C
{ZYDIS_MNEMONIC_VCVTPH2PS, {FilterRosetta2Only, GenerateVCVTPH2PS, true}},
{ZYDIS_MNEMONIC_VCVTPS2PH, {FilterRosetta2Only, GenerateVCVTPS2PH, true}},