diff --git a/CMakeLists.txt b/CMakeLists.txt index c8596f317..e067040b9 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -905,6 +905,7 @@ if (ARCHITECTURE STREQUAL "x86_64") src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp + src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h src/shader_recompiler/backend/asm_x64/emit_x64_shared_memory.cpp diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp index def2974e2..14d6d77ac 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp @@ -16,32 +16,24 @@ void EmitBitCastU16F16(EmitContext& ctx, const Operands& dest, const Operands& s } void EmitBitCastU32F32(EmitContext& ctx, const Operands& dest, const Operands& src) { - if (dest[0].isMEM() && src[0].isMEM()) { - Reg32 tmp = ctx.TempGPReg(false).getReg().cvt32(); - ctx.Code().mov(tmp, src[0]); - ctx.Code().mov(dest[0], tmp); - } else if (src[0].isMEM()) { - ctx.Code().mov(dest[0], src[0]); + if (src[0].isMEM()) { + MovGP(ctx, dest[0], src[0]); } else if (dest[0].isMEM()) { ctx.Code().movd(dest[0].getAddress(), src[0].getReg().cvt128()); } else { ctx.Code().movd(dword[rsp - 4], src[0].getReg().cvt128()); - ctx.Code().mov(dest[0], dword[rsp - 4]); + MovGP(ctx, dest[0], dword[rsp - 4]); } } void EmitBitCastU64F64(EmitContext& ctx, const Operands& dest, const Operands& src) { - if (dest[0].isMEM() && src[0].isMEM()) { - Reg tmp = ctx.TempGPReg(false); - ctx.Code().mov(tmp, src[0]); - ctx.Code().mov(dest[0], tmp); - } else if (src[0].isMEM()) { - ctx.Code().mov(dest[0], src[0]); + if (src[0].isMEM()) { + MovGP(ctx, dest[0], src[0]); } else if (dest[0].isMEM()) { ctx.Code().movq(dest[0].getAddress(), src[0].getReg().cvt128()); } else { ctx.Code().movq(qword[rsp - 8], src[0].getReg().cvt128()); - ctx.Code().mov(dest[0], qword[rsp - 8]); + MovGP(ctx, dest[0], qword[rsp - 8]); } } @@ -50,31 +42,23 @@ void EmitBitCastF16U16(EmitContext& ctx, const Operands& dest, const Operands& s } void EmitBitCastF32U32(EmitContext& ctx, const Operands& dest, const Operands& src) { - if (dest[0].isMEM() && src[0].isMEM()) { - Reg32 tmp = ctx.TempGPReg(false).getReg().cvt32(); - ctx.Code().mov(tmp, src[0]); - ctx.Code().mov(dest[0], tmp); - } else if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], src[0]); + if (dest[0].isMEM()) { + MovGP(ctx, dest[0], src[0]); } else if (src[0].isMEM()) { ctx.Code().movd(dest[0].getReg().cvt128(), src[0].getAddress()); } else { - ctx.Code().mov(dword[rsp - 4], src[0]); + MovGP(ctx, dword[rsp - 4], src[0]); ctx.Code().movd(dest[0].getReg().cvt128(), dword[rsp - 4]); } } void EmitBitCastF64U64(EmitContext& ctx, const Operands& dest, const Operands& src) { - if (dest[0].isMEM() && src[0].isMEM()) { - Reg tmp = ctx.TempGPReg(false); - ctx.Code().mov(tmp, src[0]); - ctx.Code().mov(dest[0], tmp); - } else if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], src[0]); + if (dest[0].isMEM()) { + MovGP(ctx, dest[0], src[0]); } else if (src[0].isMEM()) { ctx.Code().movq(dest[0].getReg().cvt128(), src[0].getAddress()); } else { - ctx.Code().mov(qword[rsp - 8], src[0].getReg()); + MovGP(ctx, qword[rsp - 8], src[0]); ctx.Code().mov(dest[0].getReg().cvt128(), qword[rsp - 8]); } } @@ -82,35 +66,27 @@ void EmitBitCastF64U64(EmitContext& ctx, const Operands& dest, const Operands& s void EmitPackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& src) { const bool is_mem = dest[0].isMEM() && (src[0].isMEM() || src[1].isMEM()); Reg tmp = is_mem ? ctx.TempGPReg(false) : dest[0].getReg(); - ctx.Code().mov(tmp, src[0]); + MovGP(ctx, tmp, src[1]); ctx.Code().shl(tmp, 32); ctx.Code().or_(tmp, src[0]); - if (is_mem) { - ctx.Code().mov(dest[0], tmp); - } + MovGP(ctx, dest[0], tmp); } void EmitUnpackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& src) { Reg src0 = src[0].isMEM() ? ctx.TempGPReg() : src[0].getReg(); - if (src[0].isMEM()) { - ctx.Code().mov(src0, src[0]); - } + MovGP(ctx, src0, src[0]); Reg dest1 = dest[1].isMEM() ? ctx.TempGPReg(false) : dest[1].getReg().changeBit(64); - ctx.Code().mov(dest1, src0); + MovGP(ctx, dest1, src0); ctx.Code().shr(dest1, 32); - if (dest[1].isMEM()) { - ctx.Code().mov(dest[1], dest1.cvt32()); - } - ctx.Code().mov(dest[0], src0.cvt32()); + MovGP(ctx, dest[1], dest1); + MovGP(ctx, dest[0], src0.cvt32()); } void EmitPackFloat2x32(EmitContext& ctx, const Operands& dest, const Operands& src) { Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); MovFloat(ctx, tmp, src[0]); ctx.Code().pinsrd(tmp, src[1], 1); - if (dest[0].isMEM()) { - ctx.Code().movss(dest[0].getAddress(), tmp); - } + MovFloat(ctx, dest[0], tmp); } void EmitPackUnorm2x16(EmitContext& ctx) { diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp index 192570d8f..3669b3708 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp @@ -19,7 +19,7 @@ void EmitGetUserData(EmitContext& ctx, const Operands& dest, IR::ScalarReg reg) void EmitSetUserData(EmitContext& ctx, const Operands& offset, const Operands& value) { Reg& tmp = ctx.TempGPReg(); - ctx.Code().mov(tmp, offset[0]); + MovGP(ctx, tmp, offset[0]); ctx.Code().shl(tmp, 2); ctx.Code().lea(tmp, ptr[ctx.UserData() + tmp]); MovGP(ctx, ptr[tmp], value[0]); @@ -59,7 +59,7 @@ void EmitGetGotoVariable(EmitContext&) { void EmitReadConst(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset) { Reg& tmp = ctx.TempGPReg(false); - ctx.Code().mov(tmp, base[1]); + MovGP(ctx, tmp, base[1]); ctx.Code().shl(tmp, 32); ctx.Code().or_(tmp, base[0]); if (offset[0].isMEM()) { diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp index 52726342e..f9ca78432 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp @@ -1,7 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include "shader_recompiler/exception.h" #include "shader_recompiler/backend/asm_x64/x64_emit_context.h" #include "shader_recompiler/backend/asm_x64/x64_utils.h" @@ -10,169 +9,27 @@ namespace Shader::Backend::X64 { using namespace Xbyak; using namespace Xbyak::util; -namespace { - -static void EmitInlineF16ToF32(EmitContext& ctx, const Operand& dest, const Operand& src) { - CodeGenerator& c = ctx.Code(); - Label nonzero_exp, zero_mantissa, norm_loop, norm_done, normal, done; - Reg sign = ctx.TempGPReg().cvt32(); - Reg exponent = ctx.TempGPReg().cvt32(); - Reg mantissa = ctx.TempGPReg().cvt32(); - - c.movzx(mantissa, src); - - // Extract sign, exponent, and mantissa - c.mov(sign, mantissa); - c.and_(sign, 0x8000); - c.shl(sign, 16); - c.mov(exponent, mantissa); - c.and_(exponent, 0x7C00); - c.shr(exponent, 10); - c.and_(mantissa, 0x03FF); - - // Check for zero exponent and mantissa - c.test(exponent, exponent); - c.jnz(nonzero_exp); - c.test(mantissa, mantissa); - c.jz(zero_mantissa); - - // Nromalize subnormal number - c.mov(exponent, 1); - c.L(norm_loop); - c.test(mantissa, 0x400); - c.jnz(norm_done); - c.shl(mantissa, 1); - c.dec(exponent); - c.jmp(norm_loop); - c.L(norm_done); - c.and_(mantissa, 0x03FF); - c.jmp(normal); - - // Zero mantissa - c.L(zero_mantissa); - c.and_(mantissa, sign); - c.jmp(done); - - // Non-zero exponent - c.L(nonzero_exp); - c.cmp(exponent, 0x1F); - c.jne(normal); - - // Infinite or NaN - c.shl(mantissa, 13); - c.or_(mantissa, sign); - c.or_(mantissa, 0x7F800000); - c.jmp(done); - - // Normal number - c.L(normal); - c.add(exponent, 112); - c.shl(exponent, 23); - c.shl(mantissa, 13); - c.or_(mantissa, sign); - c.or_(mantissa, exponent); - - c.L(done); - if (dest.isMEM()) { - c.mov(dest, mantissa); - } else { - c.movd(dest.getReg().cvt128(), mantissa); - } -} - -static void EmitInlineF32ToF16(EmitContext& ctx, const Operand& dest, const Operand& src) { - CodeGenerator& c = ctx.Code(); - Label zero_exp, underflow, overflow, done; - Reg sign = ctx.TempGPReg().cvt32(); - Reg exponent = ctx.TempGPReg().cvt32(); - Reg mantissa = dest.isMEM() ? ctx.TempGPReg().cvt32() : dest.getReg().cvt32(); - - if (src.isMEM()) { - c.mov(mantissa, src); - } else { - c.movd(mantissa, src.getReg().cvt128()); - } - - // Extract sign, exponent, and mantissa - c.mov(exponent, mantissa); - c.mov(sign, mantissa); - c.and_(exponent, 0x7F800000); - c.and_(mantissa, 0x007FFFFF); - c.shr(exponent, 23); - c.shl(mantissa, 3); - c.shr(sign, 16); - c.and_(sign, 0x8000); - - // Subnormal numbers will be zero - c.test(exponent, exponent); - c.jz(zero_exp); - - // Check for overflow and underflow - c.sub(exponent, 112); - c.cmp(exponent, 0); - c.jle(underflow); - c.cmp(exponent, 0x1F); - c.jge(overflow); - - // Normal number - c.shl(exponent, 10); - c.shr(mantissa, 13); - c.or_(mantissa, exponent); - c.or_(mantissa, sign); - c.jmp(done); - - // Undeflow - c.L(underflow); - c.xor_(mantissa, mantissa); - c.jmp(done); - - // Overflow - c.L(overflow); - c.mov(mantissa, 0x7C00); - c.or_(mantissa, sign); - c.jmp(done); - - // Zero value - c.L(zero_exp); - c.and_(mantissa, sign); - - c.L(done); - if (dest.isMEM()) { - c.mov(dest, mantissa); - } else { - c.and_(mantissa, 0xFFFF); - } -} - -} - void EmitConvertS16F16(EmitContext& ctx, const Operands& dest, const Operands& src) { Xmm tmp_xmm = ctx.TempXmmReg(false); Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); EmitInlineF16ToF32(ctx, tmp_xmm, src[0]); ctx.Code().cvttss2si(tmp_reg, tmp_xmm); ctx.Code().and_(tmp_reg, 0xFFFF); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp_reg.cvt16()); - } + MovGP(ctx, dest[0], tmp_reg); } void EmitConvertS16F32(EmitContext& ctx, const Operands& dest, const Operands& src) { Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); ctx.Code().cvttss2si(tmp, src[0]); ctx.Code().and_(tmp, 0xFFFF); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp.cvt16()); - } + MovGP(ctx, dest[0], tmp); } void EmitConvertS16F64(EmitContext& ctx, const Operands& dest, const Operands& src) { Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); ctx.Code().cvttsd2si(tmp, src[0]); ctx.Code().and_(tmp, 0xFFFF); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp.cvt16()); - } + MovGP(ctx, dest[0], tmp); } void EmitConvertS32F16(EmitContext& ctx, const Operands& dest, const Operands& src) { @@ -180,25 +37,19 @@ void EmitConvertS32F16(EmitContext& ctx, const Operands& dest, const Operands& s Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); EmitInlineF16ToF32(ctx, tmp_xmm, src[0]); ctx.Code().cvttss2si(tmp_reg, tmp_xmm); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp_reg); - } + MovGP(ctx, dest[0], tmp_reg); } void EmitConvertS32F32(EmitContext& ctx, const Operands& dest, const Operands& src) { Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); ctx.Code().cvttss2si(tmp, src[0]); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp); - } + MovGP(ctx, dest[0], tmp); } void EmitConvertS32F64(EmitContext& ctx, const Operands& dest, const Operands& src) { Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); ctx.Code().cvttsd2si(tmp, src[0]); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp); - } + MovGP(ctx, dest[0], tmp); } void EmitConvertS64F16(EmitContext& ctx, const Operands& dest, const Operands& src) { @@ -206,25 +57,19 @@ void EmitConvertS64F16(EmitContext& ctx, const Operands& dest, const Operands& s Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg() : dest[0].getReg(); EmitInlineF16ToF32(ctx, tmp_xmm, src[0]); ctx.Code().cvttss2si(tmp_reg, tmp_xmm); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp_reg); - } + MovGP(ctx, dest[0], tmp_reg); } void EmitConvertS64F32(EmitContext& ctx, const Operands& dest, const Operands& src) { Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); ctx.Code().cvttss2si(tmp, src[0]); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp); - } + MovGP(ctx, dest[0], tmp); } void EmitConvertS64F64(EmitContext& ctx, const Operands& dest, const Operands& src) { Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); ctx.Code().cvttsd2si(tmp, src[0]); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp); - } + MovGP(ctx, dest[0], tmp); } void EmitConvertU16F16(EmitContext& ctx, const Operands& dest, const Operands& src) { @@ -282,17 +127,13 @@ void EmitConvertF32F16(EmitContext& ctx, const Operands& dest, const Operands& s void EmitConvertF32F64(EmitContext& ctx, const Operands& dest, const Operands& src) { Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); ctx.Code().cvtsd2ss(tmp, src[0]); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp); - } + MovFloat(ctx, dest[0], tmp); } void EmitConvertF64F32(EmitContext& ctx, const Operands& dest, const Operands& src) { Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); ctx.Code().cvtss2sd(tmp, src[0]); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp); - } + MovDouble(ctx, dest[0], tmp); } void EmitConvertF16S8(EmitContext& ctx, const Operands& dest, const Operands& src) { @@ -344,9 +185,7 @@ void EmitConvertF32S8(EmitContext& ctx, const Operands& dest, const Operands& sr Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); ctx.Code().movsx(tmp_reg, src[0]); ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp_xmm); - } + MovFloat(ctx, dest[0], tmp_xmm); } void EmitConvertF32S16(EmitContext& ctx, const Operands& dest, const Operands& src) { @@ -354,25 +193,19 @@ void EmitConvertF32S16(EmitContext& ctx, const Operands& dest, const Operands& s Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); ctx.Code().movsx(tmp_reg, src[0]); ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp_xmm); - } + MovFloat(ctx, dest[0], tmp_xmm); } void EmitConvertF32S32(EmitContext& ctx, const Operands& dest, const Operands& src) { Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); ctx.Code().cvtsi2ss(tmp, src[0]); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp); - } + MovFloat(ctx, dest[0], tmp); } void EmitConvertF32S64(EmitContext& ctx, const Operands& dest, const Operands& src) { Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); ctx.Code().cvtsi2ss(tmp, src[0]); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp); - } + MovFloat(ctx, dest[0], tmp); } void EmitConvertF32U8(EmitContext& ctx, const Operands& dest, const Operands& src) { @@ -396,9 +229,7 @@ void EmitConvertF64S8(EmitContext& ctx, const Operands& dest, const Operands& sr Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); ctx.Code().movsx(tmp_reg, src[0]); ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp_xmm); - } + MovDouble(ctx, dest[0], tmp_xmm); } void EmitConvertF64S16(EmitContext& ctx, const Operands& dest, const Operands& src) { @@ -406,25 +237,19 @@ void EmitConvertF64S16(EmitContext& ctx, const Operands& dest, const Operands& s Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); ctx.Code().movsx(tmp_reg, src[0]); ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp_xmm); - } + MovDouble(ctx, dest[0], tmp_xmm); } void EmitConvertF64S32(EmitContext& ctx, const Operands& dest, const Operands& src) { Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); ctx.Code().cvtsi2sd(tmp, src[0]); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp); - } + MovDouble(ctx, dest[0], tmp); } void EmitConvertF64S64(EmitContext& ctx, const Operands& dest, const Operands& src) { Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); ctx.Code().cvtsi2sd(tmp, src[0]); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp); - } + MovDouble(ctx, dest[0], tmp); } void EmitConvertF64U8(EmitContext& ctx, const Operands& dest, const Operands& src) { diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp new file mode 100644 index 000000000..d209b1e36 --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp @@ -0,0 +1,723 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/backend/asm_x64/emit_x64_instructions.h" +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" +#include "shader_recompiler/backend/asm_x64/x64_utils.h" + +namespace Shader::Backend::X64 { + +using namespace Xbyak; +using namespace Xbyak::util; + + +void EmitFPAbs16(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt16() : dest[0].getReg().cvt16(); + MovGP(ctx, tmp, src[0]); + ctx.Code().and_(tmp, 0x7FFF); + MovGP(ctx, dest[0], tmp); +} + +void EmitFPAbs32(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg reg_tmp = ctx.TempXmmReg(false); + Xmm xmm_tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().mov(reg_tmp, 0x7FFFFFFF); + ctx.Code().movd(xmm_tmp, reg_tmp); + ctx.Code().andps(xmm_tmp, src[0]); + MovFloat(ctx, dest[0], xmm_tmp); +} + +void EmitFPAbs64(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg reg_tmp = ctx.TempGPReg(false); + Xmm xmm_tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().mov(reg_tmp, 0x7FFFFFFFFFFFFFFF); + ctx.Code().movq(xmm_tmp, reg_tmp); + ctx.Code().andpd(xmm_tmp, src[0]); + MovFloat(ctx, dest[0], xmm_tmp); +} + +void EmitFPAdd16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Xmm tmp1 = ctx.TempXmmReg(); + Xmm tmp2 = ctx.TempXmmReg(); + EmitInlineF16ToF32(ctx, tmp1, op1[0]); + EmitInlineF16ToF32(ctx, tmp2, op2[0]); + ctx.Code().addss(tmp1, tmp2); + EmitInlineF32ToF16(ctx, dest[0], tmp1); +} + +void EmitFPAdd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovFloat(ctx, tmp, op1[0]); + ctx.Code().addss(tmp, op2[0]); + MovFloat(ctx, dest[0], tmp); +} + +void EmitFPAdd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovDouble(ctx, tmp, op1[0]); + ctx.Code().addsd(tmp, op2[0]); + MovDouble(ctx, dest[0], tmp); +} + +void EmitFPSub32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovFloat(ctx, tmp, op1[0]); + ctx.Code().subss(tmp, op2[0]); + MovFloat(ctx, dest[0], tmp); +} + +void EmitFPFma16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) { + Xmm tmp1 = ctx.TempXmmReg(); + Xmm tmp2 = ctx.TempXmmReg(); + Xmm tmp3 = ctx.TempXmmReg(); + EmitInlineF16ToF32(ctx, tmp1, op1[0]); + EmitInlineF16ToF32(ctx, tmp2, op2[0]); + EmitInlineF16ToF32(ctx, tmp3, op3[0]); + ctx.Code().vfmadd132ss(tmp3, tmp1, tmp2); + EmitInlineF32ToF16(ctx, dest[0], tmp3); +} + +void EmitFPFma32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) { + Xmm tmp1 = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Xmm tmp2 = op1[0].isMEM() ? ctx.TempXmmReg() : op1[0].getReg().cvt128(); + Xmm tmp3 = op2[0].isMEM() ? ctx.TempXmmReg() : op2[0].getReg().cvt128(); + MovFloat(ctx, tmp1, op3[0]); + MovFloat(ctx, tmp2, op1[0]); + MovFloat(ctx, tmp3, op2[0]); + ctx.Code().vfmadd132ss(tmp3, tmp1, tmp2); + MovFloat(ctx, dest[0], tmp3); +} + +void EmitFPFma64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) { + Xmm tmp1 = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Xmm tmp2 = op1[0].isMEM() ? ctx.TempXmmReg() : op1[0].getReg().cvt128(); + Xmm tmp3 = op2[0].isMEM() ? ctx.TempXmmReg() : op2[0].getReg().cvt128(); + MovDouble(ctx, tmp1, op3[0]); + MovDouble(ctx, tmp2, op1[0]); + MovDouble(ctx, tmp3, op2[0]); + ctx.Code().vfmadd132sd(tmp3, tmp1, tmp2); + MovDouble(ctx, dest[0], tmp3); +} + +void EmitFPMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, bool is_legacy) { + if (is_legacy) { + Xmm tmp1 = ctx.TempXmmReg(); + Xmm tmp2 = ctx.TempXmmReg(); + MovFloat(ctx, tmp1, op1[0]); + MovFloat(ctx, tmp2, op1[0]); + ctx.Code().maxss(tmp2, op2[0]); + ctx.Code().cmpunordss(tmp1, tmp1); + ctx.Code().andps(tmp1, op2[0]); + ctx.Code().orps(tmp2, tmp1); + MovFloat(ctx, dest[0], tmp2); + } else { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovFloat(ctx, tmp, op1[0]); + ctx.Code().maxss(tmp, op2[0]); + MovFloat(ctx, dest[0], tmp); + } +} + +void EmitFPMax64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovDouble(ctx, tmp, op1[0]); + ctx.Code().maxsd(tmp, op2[0]); + MovDouble(ctx, dest[0], tmp); +} + +void EmitFPMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, bool is_legacy) { + if (is_legacy) { + Xmm tmp1 = ctx.TempXmmReg(); + Xmm tmp2 = ctx.TempXmmReg(); + MovFloat(ctx, tmp1, op1[0]); + MovFloat(ctx, tmp2, op1[0]); + ctx.Code().minss(tmp2, op2[0]); + ctx.Code().cmpunordss(tmp1, tmp1); + ctx.Code().andps(tmp1, op2[0]); + ctx.Code().orps(tmp2, tmp1); + MovFloat(ctx, dest[0], tmp2); + } else { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovFloat(ctx, tmp, op1[0]); + ctx.Code().minss(tmp, op2[0]); + MovFloat(ctx, dest[0], tmp); + } +} + +void EmitFPMin64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovDouble(ctx, tmp, op1[0]); + ctx.Code().minsd(tmp, op2[0]); + MovDouble(ctx, dest[0], tmp); +} + +void EmitFPMul16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Xmm tmp1 = ctx.TempXmmReg(); + Xmm tmp2 = ctx.TempXmmReg(); + EmitInlineF16ToF32(ctx, tmp1, op1[0]); + EmitInlineF16ToF32(ctx, tmp2, op2[0]); + ctx.Code().mulss(tmp1, tmp2); + EmitInlineF32ToF16(ctx, dest[0], tmp1); +} + +void EmitFPMul32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovFloat(ctx, tmp, op1[0]); + ctx.Code().mulss(tmp, op2[0]); + MovFloat(ctx, dest[0], tmp); +} + +void EmitFPMul64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovDouble(ctx, tmp, op1[0]); + ctx.Code().mulsd(tmp, op2[0]); + MovDouble(ctx, dest[0], tmp); +} + +void EmitFPDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovFloat(ctx, tmp, op1[0]); + ctx.Code().divss(tmp, op2[0]); + MovFloat(ctx, dest[0], tmp); +} + +void EmitFPDiv64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovDouble(ctx, tmp, op1[0]); + ctx.Code().divsd(tmp, op2[0]); + MovDouble(ctx, dest[0], tmp); +} + +void EmitFPNeg16(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt16() : dest[0].getReg().cvt16(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().xor_(tmp, 0x8000); + MovGP(ctx, dest[0], tmp); +} + +void EmitFPNeg32(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Reg tmp_reg = ctx.TempGPReg(false).cvt32(); + ctx.Code().mov(tmp_reg, 0x80000000); + ctx.Code().movd(tmp_xmm, tmp_reg); + ctx.Code().xorps(tmp_xmm, op1[0]); + MovFloat(ctx, dest[0], tmp_xmm); +} + +void EmitFPNeg64(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Reg tmp_reg = ctx.TempXmmReg(false); + ctx.Code().mov(tmp_reg, 0x8000000000000000); + ctx.Code().movq(tmp_xmm, tmp_reg); + ctx.Code().xorpd(tmp_xmm, op1[0]); + MovDouble(ctx, dest[0], tmp_xmm); + +} + +void EmitFPSin(EmitContext& ctx) { + throw NotImplementedException("FPSin"); +} + +void EmitFPCos(EmitContext& ctx) { + throw NotImplementedException("FPCos"); +} + +void EmitFPExp2(EmitContext& ctx) { + throw NotImplementedException("FPExp2"); +} + +void EmitFPLdexp(EmitContext& ctx) { + throw NotImplementedException("FPLdexp"); +} + +void EmitFPLog2(EmitContext& ctx) { + throw NotImplementedException("FPLog2"); +} + +void EmitFPRecip32(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().rcpss(tmp, op1[0]); + MovFloat(ctx, dest[0], tmp); +} + +void EmitFPRecip64(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Reg tmp_reg = ctx.TempGPReg(false); + ctx.Code().mov(tmp_reg, 1); + ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg); + ctx.Code().divsd(tmp_xmm, op1[0]); + MovDouble(ctx, dest[0], tmp_xmm); +} + +void EmitFPRecipSqrt32(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().rsqrtss(tmp, op1[0]); + MovFloat(ctx, dest[0], tmp); +} + +void EmitFPRecipSqrt64(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Reg tmp_reg = ctx.TempGPReg(false); + ctx.Code().mov(tmp_reg, 1); + ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg); + ctx.Code().divsd(tmp_xmm, op1[0]); + ctx.Code().sqrtsd(tmp_xmm, tmp_xmm); + MovDouble(ctx, dest[0], tmp_xmm); +} + +void EmitFPSqrt(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().sqrtss(tmp, op1[0]); + MovFloat(ctx, dest[0], tmp); +} + +void EmitFPSaturate16(EmitContext& ctx) { + throw NotImplementedException("FPSaturate16"); +} + +void EmitFPSaturate32(EmitContext& ctx) { + throw NotImplementedException("FPSaturate32"); +} + +void EmitFPSaturate64(EmitContext& ctx) { + throw NotImplementedException("FPSaturate64"); +} + +void EmitFPClamp16(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max) { + Xmm tmp1 = ctx.TempXmmReg(); + Xmm tmp2 = ctx.TempXmmReg(); + Xmm tmp3 = ctx.TempXmmReg(); + EmitInlineF16ToF32(ctx, tmp1, op[0]); + EmitInlineF16ToF32(ctx, tmp2, min[0]); + EmitInlineF16ToF32(ctx, tmp3, max[0]); + ctx.Code().maxss(tmp1, tmp2); + ctx.Code().minss(tmp1, tmp3); + EmitInlineF32ToF16(ctx, dest[0], tmp1); +} + +void EmitFPClamp32(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovFloat(ctx, tmp, op[0]); + ctx.Code().maxss(tmp, min[0]); + ctx.Code().minss(tmp, max[0]); + MovFloat(ctx, dest[0], tmp); +} + +void EmitFPClamp64(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovDouble(ctx, tmp, op[0]); + ctx.Code().maxsd(tmp, min[0]); + ctx.Code().minsd(tmp, max[0]); + MovDouble(ctx, dest[0], tmp); +} + +void EmitFPRoundEven16(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp = ctx.TempXmmReg(); + EmitInlineF16ToF32(ctx, tmp, op1[0]); + ctx.Code().roundss(tmp, tmp, 0x00); + EmitInlineF32ToF16(ctx, dest[0], tmp); +} + +void EmitFPRoundEven32(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().roundss(tmp, op1[0], 0x00); + MovFloat(ctx, dest[0], tmp); +} + +void EmitFPRoundEven64(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().roundsd(tmp, op1[0], 0x00); + MovDouble(ctx, dest[0], tmp); +} + +void EmitFPFloor16(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp = ctx.TempXmmReg(); + EmitInlineF16ToF32(ctx, tmp, op1[0]); + ctx.Code().roundss(tmp, tmp, 0x01); + EmitInlineF32ToF16(ctx, dest[0], tmp); +} + +void EmitFPFloor32(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().roundss(tmp, op1[0], 0x01); + MovFloat(ctx, dest[0], tmp); +} + +void EmitFPFloor64(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().roundsd(tmp, op1[0], 0x01); + MovDouble(ctx, dest[0], tmp); +} + +void EmitFPCeil16(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp = ctx.TempXmmReg(); + EmitInlineF16ToF32(ctx, tmp, op1[0]); + ctx.Code().roundss(tmp, tmp, 0x02); + EmitInlineF32ToF16(ctx, dest[0], tmp); +} + +void EmitFPCeil32(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().roundss(tmp, op1[0], 0x02); + MovFloat(ctx, dest[0], tmp); +} + +void EmitFPCeil64(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().roundsd(tmp, op1[0], 0x02); + MovDouble(ctx, dest[0], tmp); +} + +void EmitFPTrunc16(EmitContext& ctx) { + throw NotImplementedException("FPTrunc16"); +} + +void EmitFPTrunc32(EmitContext& ctx) { + throw NotImplementedException("FPTrunc32"); +} + +void EmitFPTrunc64(EmitContext& ctx) { + throw NotImplementedException("FPTrunc64"); +} + +void EmitFPFract32(EmitContext& ctx) { + throw NotImplementedException("FPFract32"); +} + +void EmitFPFract64(EmitContext& ctx) { + throw NotImplementedException("FPFract64"); +} + +void EmitFPFrexpSig32(EmitContext& ctx) { + throw NotImplementedException("FPFrexpSig32"); +} + +void EmitFPFrexpSig64(EmitContext& ctx) { + throw NotImplementedException("FPFrexpSig64"); +} + +void EmitFPFrexpExp32(EmitContext& ctx) { + throw NotImplementedException("FPFrexpExp32"); +} + +void EmitFPFrexpExp64(EmitContext& ctx) { + throw NotImplementedException("FPFrexpExp64"); +} + +void EmitFPOrdEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordEqual16(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPOrdEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordEqual32(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPOrdEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordEqual64(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPUnordEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp1 = ctx.TempXmmReg(); + Xmm tmp2 = ctx.TempXmmReg(); + EmitInlineF16ToF32(ctx, tmp1, lhs[0]); + EmitInlineF16ToF32(ctx, tmp2, rhs[0]); + ctx.Code().ucomiss(tmp1, tmp2); + ctx.Code().sete(dest[0]); +} + +void EmitFPUnordEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + MovFloat(ctx, tmp, lhs[0]); + ctx.Code().ucomiss(tmp, rhs[0]); + ctx.Code().sete(dest[0]); +} + +void EmitFPUnordEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + MovDouble(ctx, tmp, lhs[0]); + ctx.Code().ucomisd(tmp, rhs[0]); + ctx.Code().sete(dest[0]); +} + +void EmitFPOrdNotEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordNotEqual16(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPOrdNotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPOrdNotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordNotEqual64(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPUnordNotEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp1 = ctx.TempXmmReg(); + Xmm tmp2 = ctx.TempXmmReg(); + EmitInlineF16ToF32(ctx, tmp1, lhs[0]); + EmitInlineF16ToF32(ctx, tmp2, rhs[0]); + ctx.Code().ucomiss(tmp1, tmp2); + ctx.Code().setne(dest[0]); +} + +void EmitFPUnordNotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + MovFloat(ctx, tmp, lhs[0]); + ctx.Code().ucomiss(tmp, rhs[0]); + ctx.Code().setne(dest[0]); +} + +void EmitFPUnordNotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + MovDouble(ctx, tmp, lhs[0]); + ctx.Code().ucomisd(tmp, rhs[0]); + ctx.Code().setne(dest[0]); +} + +void EmitFPOrdLessThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordLessThan16(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPOrdLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordLessThan32(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPOrdLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordLessThan64(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPUnordLessThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp1 = ctx.TempXmmReg(); + Xmm tmp2 = ctx.TempXmmReg(); + EmitInlineF16ToF32(ctx, tmp1, lhs[0]); + EmitInlineF16ToF32(ctx, tmp2, rhs[0]); + ctx.Code().ucomiss(tmp1, tmp2); + ctx.Code().setb(dest[0]); +} + +void EmitFPUnordLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + MovFloat(ctx, tmp, lhs[0]); + ctx.Code().ucomiss(tmp, rhs[0]); + ctx.Code().setb(dest[0]); +} + +void EmitFPUnordLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + MovDouble(ctx, tmp, lhs[0]); + ctx.Code().ucomisd(tmp, rhs[0]); + ctx.Code().setb(dest[0]); +} + +void EmitFPOrdGreaterThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordGreaterThan16(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPOrdGreaterThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordGreaterThan32(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPOrdGreaterThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordGreaterThan64(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPUnordGreaterThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp1 = ctx.TempXmmReg(); + Xmm tmp2 = ctx.TempXmmReg(); + EmitInlineF16ToF32(ctx, tmp1, lhs[0]); + EmitInlineF16ToF32(ctx, tmp2, rhs[0]); + ctx.Code().ucomiss(tmp1, tmp2); + ctx.Code().seta(dest[0]); +} + +void EmitFPUnordGreaterThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + MovFloat(ctx, tmp, lhs[0]); + ctx.Code().ucomiss(tmp, rhs[0]); + ctx.Code().seta(dest[0]); +} + +void EmitFPUnordGreaterThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + MovDouble(ctx, tmp, lhs[0]); + ctx.Code().ucomisd(tmp, rhs[0]); + ctx.Code().seta(dest[0]); +} + +void EmitFPOrdLessThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordLessThanEqual16(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPOrdLessThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordLessThanEqual32(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPOrdLessThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordLessThanEqual64(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPUnordLessThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp1 = ctx.TempXmmReg(); + Xmm tmp2 = ctx.TempXmmReg(); + EmitInlineF16ToF32(ctx, tmp1, lhs[0]); + EmitInlineF16ToF32(ctx, tmp2, rhs[0]); + ctx.Code().ucomiss(tmp1, tmp2); + ctx.Code().setbe(dest[0]); +} + +void EmitFPUnordLessThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + MovFloat(ctx, tmp, lhs[0]); + ctx.Code().ucomiss(tmp, rhs[0]); + ctx.Code().setbe(dest[0]); +} + +void EmitFPUnordLessThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + MovDouble(ctx, tmp, lhs[0]); + ctx.Code().ucomisd(tmp, rhs[0]); + ctx.Code().setbe(dest[0]); +} + +void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordGreaterThanEqual16(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); + ctx.Code().vfpclassss(tmp1, tmp2); +} + +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordGreaterThanEqual32(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordGreaterThanEqual64(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp1 = ctx.TempXmmReg(); + Xmm tmp2 = ctx.TempXmmReg(); + EmitInlineF16ToF32(ctx, tmp1, lhs[0]); + EmitInlineF16ToF32(ctx, tmp2, rhs[0]); + ctx.Code().ucomiss(tmp1, tmp2); + ctx.Code().setae(dest[0]); +} + +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + MovFloat(ctx, tmp, lhs[0]); + ctx.Code().ucomiss(tmp, rhs[0]); + ctx.Code().setae(dest[0]); +} + +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + MovDouble(ctx, tmp, lhs[0]); + ctx.Code().ucomisd(tmp, rhs[0]); + ctx.Code().setae(dest[0]); +} + +void EmitFPIsNan16(EmitContext& ctx, const Operands& dest, const Operands& op) { + Xmm tmp = ctx.TempXmmReg(); + EmitInlineF16ToF32(ctx, tmp, op[0]); + ctx.Code().ucomiss(tmp, tmp); + ctx.Code().setp(dest[0]); +} + +void EmitFPIsNan32(EmitContext& ctx, const Operands& dest, const Operands& op) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovFloat(ctx, tmp, op[0]); + ctx.Code().ucomiss(tmp, tmp); + ctx.Code().setp(dest[0]); +} + +void EmitFPIsNan64(EmitContext& ctx, const Operands& dest, const Operands& op) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovDouble(ctx, tmp, op[0]); + ctx.Code().ucomisd(tmp, tmp); + ctx.Code().setp(dest[0]); +} + +void EmitFPIsInf32(EmitContext& ctx) { + throw NotImplementedException("FPIsInf32"); +} + +void EmitFPIsInf64(EmitContext& ctx) { + throw NotImplementedException("FPIsInf64"); +} + +void EmitFPCmpClass32(EmitContext&) { + UNREACHABLE(); +} +} \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h b/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h index 48f0facd4..9b34ff40b 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h @@ -157,7 +157,7 @@ void EmitCompositeInsertF16x4(EmitContext& ctx, const Operands& dest, const Oper void EmitCompositeShuffleF16x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2); void EmitCompositeShuffleF16x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3); void EmitCompositeShuffleF16x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4); -void EmitCompositeConstructF32x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2); +void EmitCompositeConstructF32x2(EmitContext& ctx, const Operands& dest, const OpEmitFPAbs16erands& src1, const Operands& src2); void EmitCompositeConstructF32x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3); void EmitCompositeConstructF32x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4); void EmitCompositeConstructF32x2x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2); @@ -227,103 +227,103 @@ void EmitPackUint2_10_10_10(EmitContext& ctx); void EmitUnpackUint2_10_10_10(EmitContext& ctx); void EmitPackSint2_10_10_10(EmitContext& ctx); void EmitUnpackSint2_10_10_10(EmitContext& ctx); -Id EmitFPAbs16(EmitContext& ctx, Id value); -Id EmitFPAbs32(EmitContext& ctx, Id value); -Id EmitFPAbs64(EmitContext& ctx, Id value); -Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPSub32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); -Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); -Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); -Id EmitFPMax32(EmitContext& ctx, Id a, Id b, bool is_legacy = false); -Id EmitFPMax64(EmitContext& ctx, Id a, Id b); -Id EmitFPMin32(EmitContext& ctx, Id a, Id b, bool is_legacy = false); -Id EmitFPMin64(EmitContext& ctx, Id a, Id b); -Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPDiv32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPDiv64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPNeg16(EmitContext& ctx, Id value); -Id EmitFPNeg32(EmitContext& ctx, Id value); -Id EmitFPNeg64(EmitContext& ctx, Id value); -Id EmitFPSin(EmitContext& ctx, Id value); -Id EmitFPCos(EmitContext& ctx, Id value); -Id EmitFPExp2(EmitContext& ctx, Id value); -Id EmitFPLdexp(EmitContext& ctx, Id value, Id exp); -Id EmitFPLog2(EmitContext& ctx, Id value); -Id EmitFPRecip32(EmitContext& ctx, Id value); -Id EmitFPRecip64(EmitContext& ctx, Id value); -Id EmitFPRecipSqrt32(EmitContext& ctx, Id value); -Id EmitFPRecipSqrt64(EmitContext& ctx, Id value); -Id EmitFPSqrt(EmitContext& ctx, Id value); -Id EmitFPSaturate16(EmitContext& ctx, Id value); -Id EmitFPSaturate32(EmitContext& ctx, Id value); -Id EmitFPSaturate64(EmitContext& ctx, Id value); -Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value); -Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value); -Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value); -Id EmitFPRoundEven16(EmitContext& ctx, Id value); -Id EmitFPRoundEven32(EmitContext& ctx, Id value); -Id EmitFPRoundEven64(EmitContext& ctx, Id value); -Id EmitFPFloor16(EmitContext& ctx, Id value); -Id EmitFPFloor32(EmitContext& ctx, Id value); -Id EmitFPFloor64(EmitContext& ctx, Id value); -Id EmitFPCeil16(EmitContext& ctx, Id value); -Id EmitFPCeil32(EmitContext& ctx, Id value); -Id EmitFPCeil64(EmitContext& ctx, Id value); -Id EmitFPTrunc16(EmitContext& ctx, Id value); -Id EmitFPTrunc32(EmitContext& ctx, Id value); -Id EmitFPTrunc64(EmitContext& ctx, Id value); -Id EmitFPFract32(EmitContext& ctx, Id value); -Id EmitFPFract64(EmitContext& ctx, Id value); -Id EmitFPFrexpSig32(EmitContext& ctx, Id value); -Id EmitFPFrexpSig64(EmitContext& ctx, Id value); -Id EmitFPFrexpExp32(EmitContext& ctx, Id value); -Id EmitFPFrexpExp64(EmitContext& ctx, Id value); -Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPIsNan16(EmitContext& ctx, Id value); -Id EmitFPIsNan32(EmitContext& ctx, Id value); -Id EmitFPIsNan64(EmitContext& ctx, Id value); -Id EmitFPIsInf32(EmitContext& ctx, Id value); -Id EmitFPIsInf64(EmitContext& ctx, Id value); +void EmitFPAbs16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitFPAbs32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitFPAbs64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitFPAdd16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitFPAdd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitFPAdd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitFPSub32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitFPFma16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3); +void EmitFPFma32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3); +void EmitFPFma64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3); +void EmitFPMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, bool is_legacy = false); +void EmitFPMax64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitFPMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, bool is_legacy = false); +void EmitFPMin64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitFPMul16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitFPMul32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitFPMul64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitFPDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitFPDiv64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitFPNeg16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitFPNeg32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitFPNeg64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitFPSin(EmitContext& ctx); +void EmitFPCos(EmitContext& ctx); +void EmitFPExp2(EmitContext& ctx); +void EmitFPLdexp(EmitContext& ctx); +void EmitFPLog2(EmitContext& ctx); +void EmitFPRecip32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitFPRecip64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitFPRecipSqrt32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitFPRecipSqrt64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitFPSqrt(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitFPSaturate16(EmitContext& ctx); +void EmitFPSaturate32(EmitContext& ctx); +void EmitFPSaturate64(EmitContext& ctx); +void EmitFPClamp16(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max); +void EmitFPClamp32(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max); +void EmitFPClamp64(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max); +void EmitFPRoundEven16(EmitContext& ctx, const Operands& dest, const Operands& op1); +void EmitFPRoundEven32(EmitContext& ctx, const Operands& dest, const Operands& op1); +void EmitFPRoundEven64(EmitContext& ctx, const Operands& dest, const Operands& op1); +void EmitFPFloor16(EmitContext& ctx, const Operands& dest, const Operands& op1); +void EmitFPFloor32(EmitContext& ctx, const Operands& dest, const Operands& op1); +void EmitFPFloor64(EmitContext& ctx, const Operands& dest, const Operands& op1); +void EmitFPCeil16(EmitContext& ctx, const Operands& dest, const Operands& op1); +void EmitFPCeil32(EmitContext& ctx, const Operands& dest, const Operands& op1); +void EmitFPCeil64(EmitContext& ctx, const Operands& dest, const Operands& op1); +void EmitFPTrunc16(EmitContext& ctx); +void EmitFPTrunc32(EmitContext& ctx); +void EmitFPTrunc64(EmitContext& ctx); +void EmitFPFract32(EmitContext& ctx); +void EmitFPFract64(EmitContext& ctx); +void EmitFPFrexpSig32(EmitContext& ctx); +void EmitFPFrexpSig64(EmitContext& ctx); +void EmitFPFrexpExp32(EmitContext& ctx); +void EmitFPFrexpExp64(EmitContext& ctx); +void EmitFPOrdEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdNotEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdNotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdNotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordNotEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordNotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordNotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdLessThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordLessThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdGreaterThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdGreaterThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdGreaterThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordGreaterThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordGreaterThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordGreaterThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdLessThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdLessThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdLessThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordLessThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordLessThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordLessThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPIsNan16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitFPIsNan32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitFPIsNan64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitFPIsInf32(EmitContext& ctx); +void EmitFPIsInf64(EmitContext& ctx); Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitIAdd64(EmitContext& ctx, Id a, Id b); Id EmitIAddCary32(EmitContext& ctx, Id a, Id b); diff --git a/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp b/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp index 1b706eeeb..5dd7e0b6c 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp +++ b/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp @@ -48,6 +48,16 @@ Xmm& EmitContext::TempXmmReg(bool reserve) { return reg; } +void EmitContext::PopTempGPReg() { + ASSERT(temp_gp_reg_index > 0); + temp_gp_reg_index--; +} + +void EmitContext::PopTempXmmReg() { + ASSERT(temp_xmm_reg_index > 0); + temp_xmm_reg_index--; +} + const Operands& EmitContext::Def(IR::Inst* inst) { return inst_to_operands.at(inst); } diff --git a/src/shader_recompiler/backend/asm_x64/x64_emit_context.h b/src/shader_recompiler/backend/asm_x64/x64_emit_context.h index 43aebc26a..eab5bad70 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_emit_context.h +++ b/src/shader_recompiler/backend/asm_x64/x64_emit_context.h @@ -40,7 +40,9 @@ public: [[nodiscard]] Xbyak::Reg64& TempGPReg(bool reserve = true); [[nodiscard]] Xbyak::Xmm& TempXmmReg(bool reserve = true); - + void PopTempGPReg(); + void PopTempXmmReg(); + [[nodiscard]] const Xbyak::Reg64& UserData() const {return Xbyak::util::r11;} [[nodiscard]] const Operands& Def(IR::Inst* inst); diff --git a/src/shader_recompiler/backend/asm_x64/x64_utils.cpp b/src/shader_recompiler/backend/asm_x64/x64_utils.cpp index 7948a41e8..3d327569b 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_utils.cpp +++ b/src/shader_recompiler/backend/asm_x64/x64_utils.cpp @@ -159,6 +159,9 @@ Reg ResizeRegToType(const Reg& reg, IR::Type type) { void MovFloat(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src) { CodeGenerator& c = ctx.Code(); + if (src == dst) { + return; + } if (src.isMEM() && dst.isMEM()) { Reg tmp = ctx.TempGPReg(false).cvt32(); c.mov(tmp, src); @@ -176,6 +179,9 @@ void MovFloat(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& void MovDouble(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src) { CodeGenerator& c = ctx.Code(); + if (src == dst) { + return; + } if (src.isMEM() && dst.isMEM()) { const Reg64& tmp = ctx.TempGPReg(false); c.mov(tmp, src); @@ -193,6 +199,9 @@ void MovDouble(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand void MovGP(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src) { CodeGenerator& c = ctx.Code(); + if (src == dst) { + return; + } Reg tmp = (src.isMEM() && dst.isMEM()) ? ctx.TempGPReg(false).changeBit(dst.getBit()) : dst.getReg(); if (src.getBit() == dst.getBit()) { c.mov(tmp, src); @@ -288,4 +297,144 @@ void MovValue(EmitContext& ctx, const Operands& dst, const IR::Value& src) { } } +void EmitInlineF16ToF32(EmitContext& ctx, const Operand& dest, const Operand& src) { + CodeGenerator& c = ctx.Code(); + Label nonzero_exp, zero_mantissa, norm_loop, norm_done, normal, done; + Reg sign = ctx.TempGPReg().cvt32(); + Reg exponent = ctx.TempGPReg().cvt32(); + Reg mantissa = ctx.TempGPReg().cvt32(); + + c.movzx(mantissa, src); + + // Extract sign, exponent, and mantissa + c.mov(sign, mantissa); + c.and_(sign, 0x8000); + c.shl(sign, 16); + c.mov(exponent, mantissa); + c.and_(exponent, 0x7C00); + c.shr(exponent, 10); + c.and_(mantissa, 0x03FF); + + // Check for zero exponent and mantissa + c.test(exponent, exponent); + c.jnz(nonzero_exp); + c.test(mantissa, mantissa); + c.jz(zero_mantissa); + + // Nromalize subnormal number + c.mov(exponent, 1); + c.L(norm_loop); + c.test(mantissa, 0x400); + c.jnz(norm_done); + c.shl(mantissa, 1); + c.dec(exponent); + c.jmp(norm_loop); + c.L(norm_done); + c.and_(mantissa, 0x03FF); + c.jmp(normal); + + // Zero mantissa + c.L(zero_mantissa); + c.and_(mantissa, sign); + c.jmp(done); + + // Non-zero exponent + c.L(nonzero_exp); + c.cmp(exponent, 0x1F); + c.jne(normal); + + // Infinite or NaN + c.shl(mantissa, 13); + c.or_(mantissa, sign); + c.or_(mantissa, 0x7F800000); + c.jmp(done); + + // Normal number + c.L(normal); + c.add(exponent, 112); + c.shl(exponent, 23); + c.shl(mantissa, 13); + c.or_(mantissa, sign); + c.or_(mantissa, exponent); + + c.L(done); + if (dest.isMEM()) { + c.mov(dest, mantissa); + } else { + c.movd(dest.getReg().cvt128(), mantissa); + } + + ctx.PopTempGPReg(); + ctx.PopTempGPReg(); + ctx.PopTempGPReg(); +} + +void EmitInlineF32ToF16(EmitContext& ctx, const Operand& dest, const Operand& src) { + CodeGenerator& c = ctx.Code(); + Label zero_exp, underflow, overflow, done; + Reg sign = ctx.TempGPReg().cvt32(); + Reg exponent = ctx.TempGPReg().cvt32(); + Reg mantissa = dest.isMEM() ? ctx.TempGPReg().cvt32() : dest.getReg().cvt32(); + + if (src.isMEM()) { + c.mov(mantissa, src); + } else { + c.movd(mantissa, src.getReg().cvt128()); + } + + // Extract sign, exponent, and mantissa + c.mov(exponent, mantissa); + c.mov(sign, mantissa); + c.and_(exponent, 0x7F800000); + c.and_(mantissa, 0x007FFFFF); + c.shr(exponent, 23); + c.shl(mantissa, 3); + c.shr(sign, 16); + c.and_(sign, 0x8000); + + // Subnormal numbers will be zero + c.test(exponent, exponent); + c.jz(zero_exp); + + // Check for overflow and underflow + c.sub(exponent, 112); + c.cmp(exponent, 0); + c.jle(underflow); + c.cmp(exponent, 0x1F); + c.jge(overflow); + + // Normal number + c.shl(exponent, 10); + c.shr(mantissa, 13); + c.or_(mantissa, exponent); + c.or_(mantissa, sign); + c.jmp(done); + + // Undeflow + c.L(underflow); + c.xor_(mantissa, mantissa); + c.jmp(done); + + // Overflow + c.L(overflow); + c.mov(mantissa, 0x7C00); + c.or_(mantissa, sign); + c.jmp(done); + + // Zero value + c.L(zero_exp); + c.and_(mantissa, sign); + + c.L(done); + if (dest.isMEM()) { + c.mov(dest, mantissa); + } else { + c.and_(mantissa, 0xFFFF); + } + + ctx.PopTempGPReg(); + ctx.PopTempGPReg(); + ctx.PopTempGPReg(); +} + } // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/x64_utils.h b/src/shader_recompiler/backend/asm_x64/x64_utils.h index 2d665653d..c22dbfc77 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_utils.h +++ b/src/shader_recompiler/backend/asm_x64/x64_utils.h @@ -19,5 +19,7 @@ void MovFloat(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& void MovDouble(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src); void MovGP(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src); void MovValue(EmitContext& ctx, const Operands& dst, const IR::Value& src); +void EmitInlineF16ToF32(EmitContext& ctx, const Xbyak::Operand& dest, const Xbyak::Operand& src); +void EmitInlineF32ToF16(EmitContext& ctx, const Xbyak::Operand& dest, const Xbyak::Operand& src); } // namespace Shader::Backend::X64 \ No newline at end of file