From 01239233b2c79a85f42a80ec8ef054e476bcb5f2 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sun, 6 Apr 2025 20:30:38 +0200 Subject: [PATCH] Finish emits --- CMakeLists.txt | 6 + .../backend/asm_x64/emit_x64.cpp | 29 +- .../backend/asm_x64/emit_x64_barrier.cpp | 2 +- .../asm_x64/emit_x64_bitwise_conversion.cpp | 8 +- .../backend/asm_x64/emit_x64_composite.cpp | 2 +- .../asm_x64/emit_x64_context_get_set.cpp | 4 +- .../backend/asm_x64/emit_x64_convert.cpp | 57 +- .../asm_x64/emit_x64_floating_point.cpp | 106 ++-- .../backend/asm_x64/emit_x64_image.cpp | 2 +- .../backend/asm_x64/emit_x64_instructions.h | 207 ++++---- .../backend/asm_x64/emit_x64_integer.cpp | 502 ++++++++++++++++++ .../backend/asm_x64/emit_x64_logical.cpp | 40 ++ .../backend/asm_x64/emit_x64_select.cpp | 71 +++ .../backend/asm_x64/emit_x64_special.cpp | 55 ++ .../backend/asm_x64/emit_x64_undefined.cpp | 28 + .../backend/asm_x64/emit_x64_warp.cpp | 32 ++ .../backend/asm_x64/x64_emit_context.cpp | 10 +- .../backend/asm_x64/x64_emit_context.h | 15 +- .../compute_value/do_integer_operations.cpp | 3 +- .../ir/compute_value/imm_value.cpp | 56 ++ .../ir/compute_value/imm_value.h | 3 + 21 files changed, 1029 insertions(+), 209 deletions(-) create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_integer.cpp create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_logical.cpp create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_select.cpp create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_special.cpp create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_undefined.cpp create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_warp.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index cc27e0cf6..c33cf9163 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -908,7 +908,13 @@ if (ARCHITECTURE STREQUAL "x86_64") src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h + src/shader_recompiler/backend/asm_x64/emit_x64_integer.cpp + src/shader_recompiler/backend/asm_x64/emit_x64_logical.cpp + src/shader_recompiler/backend/asm_x64/emit_x64_select.cpp src/shader_recompiler/backend/asm_x64/emit_x64_shared_memory.cpp + src/shader_recompiler/backend/asm_x64/emit_x64_special.cpp + src/shader_recompiler/backend/asm_x64/emit_x64_undefined.cpp + src/shader_recompiler/backend/asm_x64/emit_x64_warp.cpp src/shader_recompiler/backend/asm_x64/emit_x64.cpp src/shader_recompiler/backend/asm_x64/emit_x64.h src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64.cpp index e128216fc..9464bd36b 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64.cpp @@ -38,7 +38,7 @@ static void EmitCondition(EmitContext& ctx, const IR::Inst* ref, Label& label, b } template -ArgType Arg(EmitContext& ctx, const IR::Value& arg) { +std::remove_reference_t Arg(EmitContext& ctx, const IR::Value& arg) { if constexpr (std::is_same_v) { return ctx.Def(arg); } else if constexpr (std::is_same_v) { @@ -114,9 +114,24 @@ void EmitInst(EmitContext& ctx, IR::Inst* inst) { UNREACHABLE_MSG("Invalid opcode {}", inst->GetOpcode()); } +static bool IsLastInst(const IR::AbstractSyntaxList& list, IR::AbstractSyntaxList::const_iterator it) { + for (; it != list.end(); ++it) { + switch (it->type) { + case IR::AbstractSyntaxNode::Type::Return: + case IR::AbstractSyntaxNode::Type::Loop: + case IR::AbstractSyntaxNode::Type::EndIf: + continue; + default: + return false; + } + } + return true; +} + void Traverse(EmitContext& ctx, const IR::Program& program) { CodeGenerator& c = ctx.Code(); - for (const IR::AbstractSyntaxNode& node : program.syntax_list) { + for (auto it = program.syntax_list.begin(); it != program.syntax_list.end(); ++it) { + const IR::AbstractSyntaxNode& node = *it; ctx.ResetTempRegs(); switch (node.type) { case IR::AbstractSyntaxNode::Type::Block: { @@ -130,6 +145,9 @@ void Traverse(EmitContext& ctx, const IR::Program& program) { MovValue(ctx, ctx.Def(phi), value); } } + if (ctx.EndFlag() && IsLastInst(program.syntax_list, it)) { + c.jmp(ctx.EndLabel()); + } break; } case IR::AbstractSyntaxNode::Type::If: { @@ -148,17 +166,14 @@ void Traverse(EmitContext& ctx, const IR::Program& program) { IR::Inst* ref = node.data.break_node.cond.InstRecursive(); Label& merge = ctx.BlockLabel(node.data.break_node.merge); EmitCondition(ctx, ref, merge, true); - +c.jz(merge); - break; - } - case IR::AbstractSyntaxNode::Type::Return: { - c.jmp(ctx.EndLabel()); + c.jz(merge); break; } case IR::AbstractSyntaxNode::Type::Unreachable: { c.int3(); break; } + case IR::AbstractSyntaxNode::Type::Return: case IR::AbstractSyntaxNode::Type::Loop: case IR::AbstractSyntaxNode::Type::EndIf: break; diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_barrier.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_barrier.cpp index 62df58ae9..b610b9c8d 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_barrier.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_barrier.cpp @@ -17,4 +17,4 @@ void EmitDeviceMemoryBarrier(EmitContext& ctx) { } -} \ No newline at end of file +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp index 14d6d77ac..0a4ecc96b 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp @@ -65,7 +65,7 @@ void EmitBitCastF64U64(EmitContext& ctx, const Operands& dest, const Operands& s void EmitPackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& src) { const bool is_mem = dest[0].isMEM() && (src[0].isMEM() || src[1].isMEM()); - Reg tmp = is_mem ? ctx.TempGPReg(false) : dest[0].getReg(); + Reg tmp = is_mem ? ctx.TempGPReg() : dest[0].getReg(); MovGP(ctx, tmp, src[1]); ctx.Code().shl(tmp, 32); ctx.Code().or_(tmp, src[0]); @@ -75,7 +75,7 @@ void EmitPackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& sr void EmitUnpackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& src) { Reg src0 = src[0].isMEM() ? ctx.TempGPReg() : src[0].getReg(); MovGP(ctx, src0, src[0]); - Reg dest1 = dest[1].isMEM() ? ctx.TempGPReg(false) : dest[1].getReg().changeBit(64); + Reg dest1 = dest[1].isMEM() ? ctx.TempGPReg() : dest[1].getReg().changeBit(64); MovGP(ctx, dest1, src0); ctx.Code().shr(dest1, 32); MovGP(ctx, dest[1], dest1); @@ -83,7 +83,7 @@ void EmitUnpackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& } void EmitPackFloat2x32(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovFloat(ctx, tmp, src[0]); ctx.Code().pinsrd(tmp, src[1], 1); MovFloat(ctx, dest[0], tmp); @@ -201,4 +201,4 @@ void EmitUnpackSint2_10_10_10(EmitContext& ctx) { throw NotImplementedException("UnpackSint2_10_10_10"); } -} \ No newline at end of file +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp index 910fd2cec..2421553bd 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp @@ -347,4 +347,4 @@ void EmitCompositeShuffleF64x4(EmitContext& ctx, const Operands& dest, const Ope MovDouble(ctx, dest[3], GetSuffleOperand<4>(composite1, composite2, idx4)); } -} \ No newline at end of file +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp index 3669b3708..169a8d85a 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp @@ -58,7 +58,7 @@ void EmitGetGotoVariable(EmitContext&) { } void EmitReadConst(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset) { - Reg& tmp = ctx.TempGPReg(false); + Reg& tmp = ctx.TempGPReg(); MovGP(ctx, tmp, base[1]); ctx.Code().shl(tmp, 32); ctx.Code().or_(tmp, base[0]); @@ -198,4 +198,4 @@ void EmitStoreBufferFormatF32(EmitContext& ctx) { throw NotImplementedException("StoreBufferFormatF32"); } -} \ No newline at end of file +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp index f9ca78432..48ebf4fa5 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp @@ -10,7 +10,7 @@ using namespace Xbyak; using namespace Xbyak::util; void EmitConvertS16F16(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp_xmm = ctx.TempXmmReg(false); + Xmm tmp_xmm = ctx.TempXmmReg(); Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); EmitInlineF16ToF32(ctx, tmp_xmm, src[0]); ctx.Code().cvttss2si(tmp_reg, tmp_xmm); @@ -19,21 +19,21 @@ void EmitConvertS16F16(EmitContext& ctx, const Operands& dest, const Operands& s } void EmitConvertS16F32(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); ctx.Code().cvttss2si(tmp, src[0]); ctx.Code().and_(tmp, 0xFFFF); MovGP(ctx, dest[0], tmp); } void EmitConvertS16F64(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); ctx.Code().cvttsd2si(tmp, src[0]); ctx.Code().and_(tmp, 0xFFFF); MovGP(ctx, dest[0], tmp); } void EmitConvertS32F16(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp_xmm = ctx.TempXmmReg(false); + Xmm tmp_xmm = ctx.TempXmmReg(); Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); EmitInlineF16ToF32(ctx, tmp_xmm, src[0]); ctx.Code().cvttss2si(tmp_reg, tmp_xmm); @@ -41,19 +41,19 @@ void EmitConvertS32F16(EmitContext& ctx, const Operands& dest, const Operands& s } void EmitConvertS32F32(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); ctx.Code().cvttss2si(tmp, src[0]); MovGP(ctx, dest[0], tmp); } void EmitConvertS32F64(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); ctx.Code().cvttsd2si(tmp, src[0]); MovGP(ctx, dest[0], tmp); } void EmitConvertS64F16(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp_xmm = ctx.TempXmmReg(false); + Xmm tmp_xmm = ctx.TempXmmReg(); Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg() : dest[0].getReg(); EmitInlineF16ToF32(ctx, tmp_xmm, src[0]); ctx.Code().cvttss2si(tmp_reg, tmp_xmm); @@ -61,13 +61,13 @@ void EmitConvertS64F16(EmitContext& ctx, const Operands& dest, const Operands& s } void EmitConvertS64F32(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg() : dest[0].getReg(); ctx.Code().cvttss2si(tmp, src[0]); MovGP(ctx, dest[0], tmp); } void EmitConvertS64F64(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg() : dest[0].getReg(); ctx.Code().cvttsd2si(tmp, src[0]); MovGP(ctx, dest[0], tmp); } @@ -125,20 +125,20 @@ void EmitConvertF32F16(EmitContext& ctx, const Operands& dest, const Operands& s } void EmitConvertF32F64(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().cvtsd2ss(tmp, src[0]); MovFloat(ctx, dest[0], tmp); } void EmitConvertF64F32(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().cvtss2sd(tmp, src[0]); MovDouble(ctx, dest[0], tmp); } void EmitConvertF16S8(EmitContext& ctx, const Operands& dest, const Operands& src) { Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); - Xmm tmp_xmm = ctx.TempXmmReg(false); + Xmm tmp_xmm = ctx.TempXmmReg(); ctx.Code().movsx(tmp_reg, src[0]); ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg); EmitInlineF32ToF16(ctx, dest[0], tmp_xmm); @@ -146,20 +146,20 @@ void EmitConvertF16S8(EmitContext& ctx, const Operands& dest, const Operands& sr void EmitConvertF16S16(EmitContext& ctx, const Operands& dest, const Operands& src) { Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); - Xmm tmp_xmm = ctx.TempXmmReg(false); + Xmm tmp_xmm = ctx.TempXmmReg(); ctx.Code().movsx(tmp_reg, src[0]); ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg); EmitInlineF32ToF16(ctx, dest[0], tmp_xmm); } void EmitConvertF16S32(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp = ctx.TempXmmReg(false); + Xmm tmp = ctx.TempXmmReg(); ctx.Code().cvtsi2ss(tmp, src[0]); EmitInlineF32ToF16(ctx, dest[0], tmp); } void EmitConvertF16S64(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp = ctx.TempXmmReg(false); + Xmm tmp = ctx.TempXmmReg(); ctx.Code().cvtsi2ss(tmp, src[0]); EmitInlineF32ToF16(ctx, dest[0], tmp); } @@ -181,29 +181,29 @@ void EmitConvertF16U64(EmitContext& ctx, const Operands& dest, const Operands& s } void EmitConvertF32S8(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp_reg = ctx.TempGPReg(false).cvt32(); - Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Reg tmp_reg = ctx.TempGPReg().cvt32(); + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().movsx(tmp_reg, src[0]); ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg); MovFloat(ctx, dest[0], tmp_xmm); } void EmitConvertF32S16(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp_reg = ctx.TempGPReg(false).cvt32(); - Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Reg tmp_reg = ctx.TempGPReg().cvt32(); + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().movsx(tmp_reg, src[0]); ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg); MovFloat(ctx, dest[0], tmp_xmm); } void EmitConvertF32S32(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().cvtsi2ss(tmp, src[0]); MovFloat(ctx, dest[0], tmp); } void EmitConvertF32S64(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().cvtsi2ss(tmp, src[0]); MovFloat(ctx, dest[0], tmp); } @@ -225,29 +225,29 @@ void EmitConvertF32U64(EmitContext& ctx, const Operands& dest, const Operands& s } void EmitConvertF64S8(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp_reg = ctx.TempGPReg(false).cvt32(); - Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Reg tmp_reg = ctx.TempGPReg().cvt32(); + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().movsx(tmp_reg, src[0]); ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg); MovDouble(ctx, dest[0], tmp_xmm); } void EmitConvertF64S16(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp_reg = ctx.TempGPReg(false).cvt32(); - Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Reg tmp_reg = ctx.TempGPReg().cvt32(); + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().movsx(tmp_reg, src[0]); ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg); MovDouble(ctx, dest[0], tmp_xmm); } void EmitConvertF64S32(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().cvtsi2sd(tmp, src[0]); MovDouble(ctx, dest[0], tmp); } void EmitConvertF64S64(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().cvtsi2sd(tmp, src[0]); MovDouble(ctx, dest[0], tmp); } @@ -276,5 +276,4 @@ void EmitConvertU32U16(EmitContext& ctx, const Operands& dest, const Operands& s MovGP(ctx, dest[0], src[0]); } -} - +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp index d209b1e36..588b1ed2d 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp @@ -13,15 +13,15 @@ using namespace Xbyak::util; void EmitFPAbs16(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt16() : dest[0].getReg().cvt16(); + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg().cvt16() : dest[0].getReg().cvt16(); MovGP(ctx, tmp, src[0]); ctx.Code().and_(tmp, 0x7FFF); MovGP(ctx, dest[0], tmp); } void EmitFPAbs32(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg reg_tmp = ctx.TempXmmReg(false); - Xmm xmm_tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Reg reg_tmp = ctx.TempXmmReg(); + Xmm xmm_tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().mov(reg_tmp, 0x7FFFFFFF); ctx.Code().movd(xmm_tmp, reg_tmp); ctx.Code().andps(xmm_tmp, src[0]); @@ -29,8 +29,8 @@ void EmitFPAbs32(EmitContext& ctx, const Operands& dest, const Operands& src) { } void EmitFPAbs64(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg reg_tmp = ctx.TempGPReg(false); - Xmm xmm_tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Reg reg_tmp = ctx.TempGPReg(); + Xmm xmm_tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().mov(reg_tmp, 0x7FFFFFFFFFFFFFFF); ctx.Code().movq(xmm_tmp, reg_tmp); ctx.Code().andpd(xmm_tmp, src[0]); @@ -47,21 +47,21 @@ void EmitFPAdd16(EmitContext& ctx, const Operands& dest, const Operands& op1, co } void EmitFPAdd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovFloat(ctx, tmp, op1[0]); ctx.Code().addss(tmp, op2[0]); MovFloat(ctx, dest[0], tmp); } void EmitFPAdd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovDouble(ctx, tmp, op1[0]); ctx.Code().addsd(tmp, op2[0]); MovDouble(ctx, dest[0], tmp); } void EmitFPSub32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovFloat(ctx, tmp, op1[0]); ctx.Code().subss(tmp, op2[0]); MovFloat(ctx, dest[0], tmp); @@ -112,7 +112,7 @@ void EmitFPMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, co ctx.Code().orps(tmp2, tmp1); MovFloat(ctx, dest[0], tmp2); } else { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovFloat(ctx, tmp, op1[0]); ctx.Code().maxss(tmp, op2[0]); MovFloat(ctx, dest[0], tmp); @@ -120,7 +120,7 @@ void EmitFPMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, co } void EmitFPMax64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovDouble(ctx, tmp, op1[0]); ctx.Code().maxsd(tmp, op2[0]); MovDouble(ctx, dest[0], tmp); @@ -138,7 +138,7 @@ void EmitFPMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, co ctx.Code().orps(tmp2, tmp1); MovFloat(ctx, dest[0], tmp2); } else { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovFloat(ctx, tmp, op1[0]); ctx.Code().minss(tmp, op2[0]); MovFloat(ctx, dest[0], tmp); @@ -146,7 +146,7 @@ void EmitFPMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, co } void EmitFPMin64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovDouble(ctx, tmp, op1[0]); ctx.Code().minsd(tmp, op2[0]); MovDouble(ctx, dest[0], tmp); @@ -162,43 +162,43 @@ void EmitFPMul16(EmitContext& ctx, const Operands& dest, const Operands& op1, co } void EmitFPMul32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovFloat(ctx, tmp, op1[0]); ctx.Code().mulss(tmp, op2[0]); MovFloat(ctx, dest[0], tmp); } void EmitFPMul64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovDouble(ctx, tmp, op1[0]); ctx.Code().mulsd(tmp, op2[0]); MovDouble(ctx, dest[0], tmp); } void EmitFPDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovFloat(ctx, tmp, op1[0]); ctx.Code().divss(tmp, op2[0]); MovFloat(ctx, dest[0], tmp); } void EmitFPDiv64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovDouble(ctx, tmp, op1[0]); ctx.Code().divsd(tmp, op2[0]); MovDouble(ctx, dest[0], tmp); } void EmitFPNeg16(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt16() : dest[0].getReg().cvt16(); + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg().cvt16() : dest[0].getReg().cvt16(); MovGP(ctx, tmp, op1[0]); ctx.Code().xor_(tmp, 0x8000); MovGP(ctx, dest[0], tmp); } void EmitFPNeg32(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); - Reg tmp_reg = ctx.TempGPReg(false).cvt32(); + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Reg tmp_reg = ctx.TempGPReg().cvt32(); ctx.Code().mov(tmp_reg, 0x80000000); ctx.Code().movd(tmp_xmm, tmp_reg); ctx.Code().xorps(tmp_xmm, op1[0]); @@ -206,8 +206,8 @@ void EmitFPNeg32(EmitContext& ctx, const Operands& dest, const Operands& op1) { } void EmitFPNeg64(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); - Reg tmp_reg = ctx.TempXmmReg(false); + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Reg tmp_reg = ctx.TempXmmReg(); ctx.Code().mov(tmp_reg, 0x8000000000000000); ctx.Code().movq(tmp_xmm, tmp_reg); ctx.Code().xorpd(tmp_xmm, op1[0]); @@ -236,14 +236,14 @@ void EmitFPLog2(EmitContext& ctx) { } void EmitFPRecip32(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().rcpss(tmp, op1[0]); MovFloat(ctx, dest[0], tmp); } void EmitFPRecip64(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); - Reg tmp_reg = ctx.TempGPReg(false); + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Reg tmp_reg = ctx.TempGPReg(); ctx.Code().mov(tmp_reg, 1); ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg); ctx.Code().divsd(tmp_xmm, op1[0]); @@ -251,14 +251,14 @@ void EmitFPRecip64(EmitContext& ctx, const Operands& dest, const Operands& op1) } void EmitFPRecipSqrt32(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().rsqrtss(tmp, op1[0]); MovFloat(ctx, dest[0], tmp); } void EmitFPRecipSqrt64(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); - Reg tmp_reg = ctx.TempGPReg(false); + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Reg tmp_reg = ctx.TempGPReg(); ctx.Code().mov(tmp_reg, 1); ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg); ctx.Code().divsd(tmp_xmm, op1[0]); @@ -267,7 +267,7 @@ void EmitFPRecipSqrt64(EmitContext& ctx, const Operands& dest, const Operands& o } void EmitFPSqrt(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().sqrtss(tmp, op1[0]); MovFloat(ctx, dest[0], tmp); } @@ -297,7 +297,7 @@ void EmitFPClamp16(EmitContext& ctx, const Operands& dest, const Operands& op, c } void EmitFPClamp32(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovFloat(ctx, tmp, op[0]); ctx.Code().maxss(tmp, min[0]); ctx.Code().minss(tmp, max[0]); @@ -305,7 +305,7 @@ void EmitFPClamp32(EmitContext& ctx, const Operands& dest, const Operands& op, c } void EmitFPClamp64(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovDouble(ctx, tmp, op[0]); ctx.Code().maxsd(tmp, min[0]); ctx.Code().minsd(tmp, max[0]); @@ -320,13 +320,13 @@ void EmitFPRoundEven16(EmitContext& ctx, const Operands& dest, const Operands& o } void EmitFPRoundEven32(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().roundss(tmp, op1[0], 0x00); MovFloat(ctx, dest[0], tmp); } void EmitFPRoundEven64(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().roundsd(tmp, op1[0], 0x00); MovDouble(ctx, dest[0], tmp); } @@ -339,13 +339,13 @@ void EmitFPFloor16(EmitContext& ctx, const Operands& dest, const Operands& op1) } void EmitFPFloor32(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().roundss(tmp, op1[0], 0x01); MovFloat(ctx, dest[0], tmp); } void EmitFPFloor64(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().roundsd(tmp, op1[0], 0x01); MovDouble(ctx, dest[0], tmp); } @@ -358,13 +358,13 @@ void EmitFPCeil16(EmitContext& ctx, const Operands& dest, const Operands& op1) { } void EmitFPCeil32(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().roundss(tmp, op1[0], 0x02); MovFloat(ctx, dest[0], tmp); } void EmitFPCeil64(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().roundsd(tmp, op1[0], 0x02); MovDouble(ctx, dest[0], tmp); } @@ -439,14 +439,14 @@ void EmitFPUnordEqual16(EmitContext& ctx, const Operands& dest, const Operands& } void EmitFPUnordEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); MovFloat(ctx, tmp, lhs[0]); ctx.Code().ucomiss(tmp, rhs[0]); ctx.Code().sete(dest[0]); } void EmitFPUnordEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); MovDouble(ctx, tmp, lhs[0]); ctx.Code().ucomisd(tmp, rhs[0]); ctx.Code().sete(dest[0]); @@ -462,7 +462,7 @@ void EmitFPOrdNotEqual16(EmitContext& ctx, const Operands& dest, const Operands& void EmitFPOrdNotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { Label not_nan; - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); ctx.Code().jnp(not_nan); ctx.Code().mov(dest[0], 0); ctx.Code().L(not_nan); @@ -486,14 +486,14 @@ void EmitFPUnordNotEqual16(EmitContext& ctx, const Operands& dest, const Operand } void EmitFPUnordNotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); MovFloat(ctx, tmp, lhs[0]); ctx.Code().ucomiss(tmp, rhs[0]); ctx.Code().setne(dest[0]); } void EmitFPUnordNotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); MovDouble(ctx, tmp, lhs[0]); ctx.Code().ucomisd(tmp, rhs[0]); ctx.Code().setne(dest[0]); @@ -533,14 +533,14 @@ void EmitFPUnordLessThan16(EmitContext& ctx, const Operands& dest, const Operand } void EmitFPUnordLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); MovFloat(ctx, tmp, lhs[0]); ctx.Code().ucomiss(tmp, rhs[0]); ctx.Code().setb(dest[0]); } void EmitFPUnordLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); MovDouble(ctx, tmp, lhs[0]); ctx.Code().ucomisd(tmp, rhs[0]); ctx.Code().setb(dest[0]); @@ -580,14 +580,14 @@ void EmitFPUnordGreaterThan16(EmitContext& ctx, const Operands& dest, const Oper } void EmitFPUnordGreaterThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); MovFloat(ctx, tmp, lhs[0]); ctx.Code().ucomiss(tmp, rhs[0]); ctx.Code().seta(dest[0]); } void EmitFPUnordGreaterThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); MovDouble(ctx, tmp, lhs[0]); ctx.Code().ucomisd(tmp, rhs[0]); ctx.Code().seta(dest[0]); @@ -627,14 +627,14 @@ void EmitFPUnordLessThanEqual16(EmitContext& ctx, const Operands& dest, const Op } void EmitFPUnordLessThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); MovFloat(ctx, tmp, lhs[0]); ctx.Code().ucomiss(tmp, rhs[0]); ctx.Code().setbe(dest[0]); } void EmitFPUnordLessThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); MovDouble(ctx, tmp, lhs[0]); ctx.Code().ucomisd(tmp, rhs[0]); ctx.Code().setbe(dest[0]); @@ -646,7 +646,6 @@ void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, const Operands& dest, const O ctx.Code().jnp(not_nan); ctx.Code().mov(dest[0], 0); ctx.Code().L(not_nan); - ctx.Code().vfpclassss(tmp1, tmp2); } void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { @@ -675,14 +674,14 @@ void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, const Operands& dest, const } void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); MovFloat(ctx, tmp, lhs[0]); ctx.Code().ucomiss(tmp, rhs[0]); ctx.Code().setae(dest[0]); } void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); MovDouble(ctx, tmp, lhs[0]); ctx.Code().ucomisd(tmp, rhs[0]); ctx.Code().setae(dest[0]); @@ -696,14 +695,14 @@ void EmitFPIsNan16(EmitContext& ctx, const Operands& dest, const Operands& op) { } void EmitFPIsNan32(EmitContext& ctx, const Operands& dest, const Operands& op) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovFloat(ctx, tmp, op[0]); ctx.Code().ucomiss(tmp, tmp); ctx.Code().setp(dest[0]); } void EmitFPIsNan64(EmitContext& ctx, const Operands& dest, const Operands& op) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovDouble(ctx, tmp, op[0]); ctx.Code().ucomisd(tmp, tmp); ctx.Code().setp(dest[0]); @@ -720,4 +719,5 @@ void EmitFPIsInf64(EmitContext& ctx) { void EmitFPCmpClass32(EmitContext&) { UNREACHABLE(); } -} \ No newline at end of file + +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp index bc0e436e1..33b53e6ce 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp @@ -59,4 +59,4 @@ void EmitCubeFaceIndex(EmitContext& ctx) { throw NotImplementedException("CubeFaceIndex"); } -} \ No newline at end of file +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h b/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h index 9b34ff40b..5725bbc56 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h @@ -25,7 +25,7 @@ class EmitContext; void EmitPhi(EmitContext& ctx); void EmitVoid(EmitContext& ctx); void EmitIdentity(EmitContext& ctx); -void EmitConditionRef(EmitContext& ctx, const IR::Value& value); +void EmitConditionRef(EmitContext& ctx); void EmitReference(EmitContext&); void EmitPhiMove(EmitContext&); void EmitJoin(EmitContext& ctx); @@ -47,8 +47,8 @@ void EmitFPCmpClass32(EmitContext& ctx); void EmitPrologue(EmitContext& ctx); void EmitEpilogue(EmitContext& ctx); void EmitDiscard(EmitContext& ctx); -void EmitDiscardCond(EmitContext& ctx, Id condition); -void EmitDebugPrint(EmitContext& ctx, IR::Inst* inst, Id arg0, Id arg1, Id arg2, Id arg3, Id arg4); +void EmitDiscardCond(EmitContext& ctx, const Operands& condition); +void EmitDebugPrint(EmitContext& ctx); void EmitBarrier(EmitContext& ctx); void EmitWorkgroupMemoryBarrier(EmitContext& ctx); void EmitDeviceMemoryBarrier(EmitContext& ctx); @@ -98,28 +98,27 @@ void EmitBufferAtomicAnd32(EmitContext& ctx); void EmitBufferAtomicOr32(EmitContext& ctx); void EmitBufferAtomicXor32(EmitContext& ctx); void EmitBufferAtomicSwap32(EmitContext& ctx); -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index); -Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp); -void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp); -Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index); -void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index); -Id EmitReadTcsGenericOuputAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, - Id comp_index); -Id EmitGetPatch(EmitContext& ctx, IR::Patch patch); -void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value); -void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); -void EmitSetSampleMask(EmitContext& ctx, Id value); -void EmitSetFragDepth(EmitContext& ctx, Id value); -Id EmitWorkgroupId(EmitContext& ctx); -Id EmitLocalInvocationId(EmitContext& ctx); -Id EmitInvocationId(EmitContext& ctx); -Id EmitInvocationInfo(EmitContext& ctx); -Id EmitSampleId(EmitContext& ctx); -Id EmitUndefU1(EmitContext& ctx); -Id EmitUndefU8(EmitContext& ctx); -Id EmitUndefU16(EmitContext& ctx); -Id EmitUndefU32(EmitContext& ctx); -Id EmitUndefU64(EmitContext& ctx); +void EmitGetAttribute(EmitContext& ctx); +void EmitGetAttributeU32(EmitContext& ctx); +void EmitSetAttribute(EmitContext& ctx); +void EmitGetTessGenericAttribute(EmitContext& ctx); +void EmitSetTcsGenericAttribute(EmitContext& ctx); +void EmitReadTcsGenericOuputAttribute(EmitContext& ctx); +void EmitGetPatch(EmitContext& ctx); +void EmitSetPatch(EmitContext& ctx); +void EmitSetFragColor(EmitContext& ctx); +void EmitSetSampleMask(EmitContext& ctx); +void EmitSetFragDepth(EmitContext& ctx); +void EmitWorkgroupId(EmitContext& ctx); +void EmitLocalInvocationId(EmitContext& ctx); +void EmitInvocationId(EmitContext& ctx); +void EmitInvocationInfo(EmitContext& ctx); +void EmitSampleId(EmitContext& ctx); +void EmitUndefU1(EmitContext& ctx); +void EmitUndefU8(EmitContext& ctx); +void EmitUndefU16(EmitContext& ctx); +void EmitUndefU32(EmitContext& ctx); +void EmitUndefU64(EmitContext& ctx); void EmitLoadSharedU32(EmitContext& ctx); void EmitLoadSharedU64(EmitContext& ctx); void EmitWriteSharedU32(EmitContext& ctx); @@ -157,7 +156,7 @@ void EmitCompositeInsertF16x4(EmitContext& ctx, const Operands& dest, const Oper void EmitCompositeShuffleF16x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2); void EmitCompositeShuffleF16x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3); void EmitCompositeShuffleF16x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4); -void EmitCompositeConstructF32x2(EmitContext& ctx, const Operands& dest, const OpEmitFPAbs16erands& src1, const Operands& src2); +void EmitCompositeConstructF32x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2); void EmitCompositeConstructF32x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3); void EmitCompositeConstructF32x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4); void EmitCompositeConstructF32x2x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2); @@ -182,14 +181,14 @@ void EmitCompositeInsertF64x4(EmitContext& ctx, const Operands& dest, const Oper void EmitCompositeShuffleF64x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2); void EmitCompositeShuffleF64x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3); void EmitCompositeShuffleF64x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4); -Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value); +void EmitSelectU1(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value); +void EmitSelectU8(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value); +void EmitSelectU16(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value); +void EmitSelectU32(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value); +void EmitSelectU64(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value); +void EmitSelectF16(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value); +void EmitSelectF32(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value); +void EmitSelectF64(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value); void EmitBitCastU16F16(EmitContext& ctx, const Operands& dest, const Operands& src); void EmitBitCastU32F32(EmitContext& ctx, const Operands& dest, const Operands& src); void EmitBitCastU64F64(EmitContext& ctx, const Operands& dest, const Operands& src); @@ -324,68 +323,68 @@ void EmitFPIsNan32(EmitContext& ctx, const Operands& dest, const Operands& src); void EmitFPIsNan64(EmitContext& ctx, const Operands& dest, const Operands& src); void EmitFPIsInf32(EmitContext& ctx); void EmitFPIsInf64(EmitContext& ctx); -Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitIAdd64(EmitContext& ctx, Id a, Id b); -Id EmitIAddCary32(EmitContext& ctx, Id a, Id b); -Id EmitISub32(EmitContext& ctx, Id a, Id b); -Id EmitISub64(EmitContext& ctx, Id a, Id b); -Id EmitSMulExt(EmitContext& ctx, Id a, Id b); -Id EmitUMulExt(EmitContext& ctx, Id a, Id b); -Id EmitIMul32(EmitContext& ctx, Id a, Id b); -Id EmitIMul64(EmitContext& ctx, Id a, Id b); -Id EmitSDiv32(EmitContext& ctx, Id a, Id b); -Id EmitUDiv32(EmitContext& ctx, Id a, Id b); -Id EmitSMod32(EmitContext& ctx, Id a, Id b); -Id EmitUMod32(EmitContext& ctx, Id a, Id b); -Id EmitINeg32(EmitContext& ctx, Id value); -Id EmitINeg64(EmitContext& ctx, Id value); -Id EmitIAbs32(EmitContext& ctx, Id value); -Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); -Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift); -Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift); -Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift); -Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift); -Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift); -Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitBitwiseAnd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitBitwiseOr64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count); -Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); -Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); -Id EmitBitReverse32(EmitContext& ctx, Id value); -Id EmitBitCount32(EmitContext& ctx, Id value); -Id EmitBitCount64(EmitContext& ctx, Id value); -Id EmitBitwiseNot32(EmitContext& ctx, Id value); -Id EmitFindSMsb32(EmitContext& ctx, Id value); -Id EmitFindUMsb32(EmitContext& ctx, Id value); -Id EmitFindILsb32(EmitContext& ctx, Id value); -Id EmitFindILsb64(EmitContext& ctx, Id value); -Id EmitSMin32(EmitContext& ctx, Id a, Id b); -Id EmitUMin32(EmitContext& ctx, Id a, Id b); -Id EmitSMax32(EmitContext& ctx, Id a, Id b); -Id EmitUMax32(EmitContext& ctx, Id a, Id b); -Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); -Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); -Id EmitSLessThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSLessThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitULessThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitULessThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitIEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitIEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); -Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); -Id EmitINotEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitINotEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); -Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); -Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); -Id EmitLogicalNot(EmitContext& ctx, Id value); +void EmitIAdd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitIAdd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitIAddCary32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitISub32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitISub64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitSMulExt(EmitContext& ctx,const Operands& dest, const Operands& op1, const Operands& op2); +void EmitUMulExt(EmitContext& ctx,const Operands& dest, const Operands& op1, const Operands& op2); +void EmitIMul32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitIMul64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitSDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitUDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitSMod32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitUMod32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitINeg32(EmitContext& ctx, const Operands& dest, const Operands& op); +void EmitINeg64(EmitContext& ctx, const Operands& dest, const Operands& op); +void EmitIAbs32(EmitContext& ctx, const Operands& dest, const Operands& op); +void EmitShiftLeftLogical32(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift); +void EmitShiftLeftLogical64(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift); +void EmitShiftRightLogical32(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift);; +void EmitShiftRightLogical64(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift);; +void EmitShiftRightArithmetic32(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift); +void EmitShiftRightArithmetic64(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift); +void EmitBitwiseAnd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitBitwiseAnd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitBitwiseOr32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitBitwiseOr64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitBitwiseXor32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitBitFieldInsert(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& insert, const Operands& offset, const Operands& count); +void EmitBitFieldSExtract(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset, const Operands& count); +void EmitBitFieldUExtract(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset, const Operands& count); +void EmitBitReverse32(EmitContext& ctx); +void EmitBitCount32(EmitContext& ctx); +void EmitBitCount64(EmitContext& ctx); +void EmitBitwiseNot32(EmitContext& ctx, const Operands& dest, const Operands& op); +void EmitFindSMsb32(EmitContext& ctx); +void EmitFindUMsb32(EmitContext& ctx); +void EmitFindILsb32(EmitContext& ctx); +void EmitFindILsb64(EmitContext& ctx); +void EmitSMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitUMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitSMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitUMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitSClamp32(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max); +void EmitUClamp32(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max); +void EmitSLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitSLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitULessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitULessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitIEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitIEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitSLessThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitULessThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitSGreaterThan(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitUGreaterThan(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitINotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitINotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitSGreaterThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitUGreaterThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitLogicalOr(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitLogicalAnd(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitLogicalXor(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitLogicalNot(EmitContext& ctx, const Operands& dest, const Operands& op); void EmitConvertS16F16(EmitContext& ctx, const Operands& dest, const Operands& src); void EmitConvertS16F32(EmitContext& ctx, const Operands& dest, const Operands& src); void EmitConvertS16F64(EmitContext& ctx, const Operands& dest, const Operands& src); @@ -461,17 +460,17 @@ void EmitImageAtomicAnd32(EmitContext& ctx); void EmitImageAtomicOr32(EmitContext& ctx); void EmitImageAtomicXor32(EmitContext& ctx); void EmitImageAtomicExchange32(EmitContext& ctx); -Id EmitCubeFaceIndex(EmitContext& ctx, IR::Inst* inst, Id cube_coords); -Id EmitLaneId(EmitContext& ctx); -Id EmitWarpId(EmitContext& ctx); -Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index); -Id EmitReadFirstLane(EmitContext& ctx, Id value); -Id EmitReadLane(EmitContext& ctx, Id value, u32 lane); -Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane); +void EmitCubeFaceIndex(EmitContext& ctx); +void EmitLaneId(EmitContext& ctx); +void EmitWarpId(EmitContext& ctx); +void EmitQuadShuffle(EmitContext& ctx); +void EmitReadFirstLane(EmitContext& ctx); +void EmitReadLane(EmitContext& ctx); +void EmitWriteLane(EmitContext& ctx); void EmitDataAppend(EmitContext& ctx); void EmitDataConsume(EmitContext& ctx); void EmitEmitVertex(EmitContext& ctx); void EmitEmitPrimitive(EmitContext& ctx); -} \ No newline at end of file +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_integer.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_integer.cpp new file mode 100644 index 000000000..2cc3b7c7e --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_integer.cpp @@ -0,0 +1,502 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" +#include "shader_recompiler/backend/asm_x64/x64_utils.h" + +namespace Shader::Backend::X64 { + +using namespace Xbyak; +using namespace Xbyak::util; + +namespace { + +static bool EmitSaveRegTemp(EmitContext ctx, const Reg& save, const Operand& dest) { + if (dest.getIdx() == save.getIdx()) { + // Destination is reg, no need to save + return false; + } + ctx.Code().push(save); + return true; +} + +static void EmitRestoreRegTemp(EmitContext ctx, const Reg& save) { + ctx.Code().pop(save); +} + +} // namespace + +void EmitIAdd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + if (dest[0].isREG() && op1[0].isREG() && op2[0].isREG()) { + ctx.Code().lea(dest[0].getReg(), ptr[op1[0].getReg() + op2[0].getReg()]); + } else { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().add(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); + } +} + +void EmitIAdd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + if (dest[0].isREG() && op1[0].isREG() && op2[0].isREG()) { + ctx.Code().lea(dest[0].getReg(), ptr[op1[0].getReg() + op2[0].getReg()]); + } else { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().add(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); + } +} + +void EmitIAddCary32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + Operand carry = dest[1]; + carry.setBit(1); + MovGP(ctx, tmp, op1[0]); + ctx.Code().add(tmp, op2[0]); + ctx.Code().setc(carry); +} + +void EmitISub32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().sub(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitISub64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().sub(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitSMulExt(EmitContext& ctx) { + throw NotImplementedException("SMulExtended"); +} + +void EmitUMulExt(EmitContext& ctx) { + throw NotImplementedException("UMulExtended"); +} + +void EmitIMul32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().imul(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitIMul64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().imul(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitSDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + bool rax_saved = EmitSaveRegTemp(ctx, rax, dest[0]); + bool rdx_saved = EmitSaveRegTemp(ctx, rdx, dest[0]); + Reg tmp = op2[0].getReg().cvt32(); + while (tmp.getIdx() == rax.getIdx()) { + tmp = ctx.TempGPReg().cvt32(); + } + MovGP(ctx, tmp, op2[0]); + MovGP(ctx, eax, op1[0]); + ctx.Code().idiv(tmp); + MovGP(ctx, dest[0], eax); + if (rdx_saved) { + EmitRestoreRegTemp(ctx, rdx); + } + if (rax_saved) { + EmitRestoreRegTemp(ctx, rax); + } +} + +void EmitUDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + bool rax_saved = EmitSaveRegTemp(ctx, rax, dest[0]); + bool rdx_saved = EmitSaveRegTemp(ctx, rdx, dest[0]); + Reg tmp = op2[0].getReg().cvt32(); + while (tmp.getIdx() == rax.getIdx()) { + tmp = ctx.TempGPReg().cvt32(); + } + MovGP(ctx, tmp, op2[0]); + MovGP(ctx, eax, op1[0]); + ctx.Code().div(tmp); + MovGP(ctx, dest[0], eax); + if (rdx_saved) { + EmitRestoreRegTemp(ctx, rdx); + } + if (rax_saved) { + EmitRestoreRegTemp(ctx, rax); + } +} + +void EmitSMod32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + bool rax_saved = EmitSaveRegTemp(ctx, rax, dest[0]); + bool rdx_saved = EmitSaveRegTemp(ctx, rdx, dest[0]); + Reg tmp = op2[0].getReg().cvt32(); + while (tmp.getIdx() == rax.getIdx()) { + tmp = ctx.TempGPReg().cvt32(); + } + MovGP(ctx, tmp, op2[0]); + MovGP(ctx, eax, op1[0]); + ctx.Code().idiv(tmp); + MovGP(ctx, dest[0], edx); + if (rdx_saved) { + EmitRestoreRegTemp(ctx, rdx); + } + if (rax_saved) { + EmitRestoreRegTemp(ctx, rax); + } +} + +void EmitUMod32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + bool rax_saved = EmitSaveRegTemp(ctx, rax, dest[0]); + bool rdx_saved = EmitSaveRegTemp(ctx, rdx, dest[0]); + Reg tmp = op2[0].getReg().cvt32(); + while (tmp.getIdx() == rax.getIdx()) { + tmp = ctx.TempGPReg().cvt32(); + } + MovGP(ctx, tmp, op2[0]); + MovGP(ctx, eax, op1[0]); + ctx.Code().div(tmp); + MovGP(ctx, dest[0], edx); + if (rdx_saved) { + EmitRestoreRegTemp(ctx, rdx); + } + if (rax_saved) { + EmitRestoreRegTemp(ctx, rax); + } +} + +void EmitINeg32(EmitContext& ctx, const Operands& dest, const Operands& op) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, op[0]); + ctx.Code().neg(tmp); + MovGP(ctx, dest[0], tmp); +} + +void EmitINeg64(EmitContext& ctx, const Operands& dest, const Operands& op) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + MovGP(ctx, tmp, op[0]); + ctx.Code().neg(tmp); + MovGP(ctx, dest[0], tmp); +} + +void EmitIAbs32(EmitContext& ctx, const Operands& dest, const Operands& op) { + Label done; + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, op[0]); + ctx.Code().cmp(tmp, 0); + ctx.Code().jns(done); + ctx.Code().neg(tmp); + ctx.Code().L(done); + MovGP(ctx, dest[0], tmp); +} + +void EmitShiftLeftLogical32(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift) { + bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]); + Reg tmp = dest[0].getIdx() == rcx.getIdx() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, base[0]); + MovGP(ctx, cl, shift[0]); + ctx.Code().shl(tmp, cl); + MovGP(ctx, dest[0], tmp); + if (rcx_saved) { + EmitRestoreRegTemp(ctx, rcx); + } +} + +void EmitShiftLeftLogical64(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift) { + bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]); + Reg tmp = dest[0].getIdx() == rcx.getIdx() ? ctx.TempGPReg(false) : dest[0].getReg(); + MovGP(ctx, tmp, base[0]); + MovGP(ctx, cl, shift[0]); + ctx.Code().shl(tmp, cl); + MovGP(ctx, dest[0], tmp); + if (rcx_saved) { + EmitRestoreRegTemp(ctx, rcx); + } +} + +void EmitShiftRightLogical32(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift) { + bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]); + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, base[0]); + MovGP(ctx, cl, shift[0]); + ctx.Code().shr(tmp, cl); + MovGP(ctx, dest[0], tmp); + if (rcx_saved) { + EmitRestoreRegTemp(ctx, rcx); + } +} + +void EmitShiftRightLogical64(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift) { + bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]); + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + MovGP(ctx, tmp, base[0]); + MovGP(ctx, cl, shift[0]); + ctx.Code().shr(tmp, cl); + MovGP(ctx, dest[0], tmp); + if (rcx_saved) { + EmitRestoreRegTemp(ctx, rcx); + } +} + +void EmitShiftRightArithmetic32(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift) { + bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]); + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, base[0]); + MovGP(ctx, cl, shift[0]); + ctx.Code().sar(tmp, cl); + MovGP(ctx, dest[0], tmp); + if (rcx_saved) { + EmitRestoreRegTemp(ctx, rcx); + } +} + +void EmitShiftRightArithmetic64(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift) { + bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]); + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + MovGP(ctx, tmp, base[0]); + MovGP(ctx, cl, shift[0]); + ctx.Code().sar(tmp, cl); + MovGP(ctx, dest[0], tmp); + if (rcx_saved) { + EmitRestoreRegTemp(ctx, rcx); + } +} + +void EmitBitwiseAnd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().and_(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitBitwiseAnd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().and_(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitBitwiseOr32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().or_(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitBitwiseOr64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().or_(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitBitwiseXor32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().xor_(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitBitFieldInsert(EmitContext& ctx) { + throw NotImplementedException("BitFieldInsert"); +} + +void EmitBitFieldSExtract(EmitContext& ctx) { + throw NotImplementedException("BitFieldSExtract"); +} + +void EmitBitFieldUExtract(EmitContext& ctx) { + throw NotImplementedException("BitFieldUExtract"); +} + +void EmitBitReverse32(EmitContext& ctx) { + throw NotImplementedException("BitReverse32"); +} + +void EmitBitCount32(EmitContext& ctx) { + throw NotImplementedException("BitCount32"); +} + +void EmitBitCount64(EmitContext& ctx) { + throw NotImplementedException("BitCount64"); +} + +void EmitBitwiseNot32(EmitContext& ctx, const Operands& dest, const Operands& op) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, op[0]); + ctx.Code().not_(tmp); + MovGP(ctx, dest[0], tmp); +} + +void EmitFindSMsb32(EmitContext& ctx) { + throw NotImplementedException("FindSMsb32"); +} + +void EmitFindUMsb32(EmitContext& ctx) { + throw NotImplementedException("FindUMsb32"); +} + +void EmitFindILsb32(EmitContext& ctx) { + throw NotImplementedException("FindILsb32"); +} + +void EmitFindILsb64(EmitContext& ctx) { + throw NotImplementedException("FindILsb64"); +} + +void EmitSMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().cmp(tmp, op2[0]); + ctx.Code().cmovg(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitUMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().cmp(tmp, op2[0]); + ctx.Code().cmova(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitSMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().cmp(tmp, op2[0]); + ctx.Code().cmovl(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitUMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().cmp(tmp, op2[0]); + ctx.Code().cmovb(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitSClamp32(EmitContext& ctx, const Operands& dest, const Operands& value, const Operands& min, const Operands& max) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, value[0]); + ctx.Code().cmp(tmp, min[0]); + ctx.Code().cmovl(tmp, min[0]); + ctx.Code().cmp(tmp, max[0]); + ctx.Code().cmovg(tmp, max[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitUClamp32(EmitContext& ctx, const Operands& dest, const Operands& value, const Operands& min, const Operands& max) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, value[0]); + ctx.Code().cmp(tmp, min[0]); + ctx.Code().cmovb(tmp, min[0]); + ctx.Code().cmp(tmp, max[0]); + ctx.Code().cmova(tmp, max[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitSLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false).cvt32() : lhs[0].getReg().cvt32(); + MovGP(ctx, tmp, lhs[0]); + ctx.Code().cmp(tmp, rhs[0]); + ctx.Code().setl(dest[0]); +} + +void EmitSLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false) : lhs[0].getReg(); + MovGP(ctx, tmp, lhs[0]); + ctx.Code().cmp(tmp, rhs[0]); + ctx.Code().setl(dest[0]); +} + +void EmitULessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false).cvt32() : lhs[0].getReg().cvt32(); + MovGP(ctx, tmp, lhs[0]); + ctx.Code().cmp(tmp, rhs[0]); + ctx.Code().setb(dest[0]); +} + +void EmitULessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false) : lhs[0].getReg(); + MovGP(ctx, tmp, lhs[0]); + ctx.Code().cmp(tmp, rhs[0]); + ctx.Code().setb(dest[0]); +} + +void EmitIEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false).cvt32() : lhs[0].getReg().cvt32(); + MovGP(ctx, tmp, lhs[0]); + ctx.Code().cmp(tmp, rhs[0]); + ctx.Code().sete(dest[0]); +} + +void EmitIEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false) : lhs[0].getReg(); + MovGP(ctx, tmp, lhs[0]); + ctx.Code().cmp(tmp, rhs[0]); + ctx.Code().sete(dest[0]); +} + +void EmitSLessThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false).cvt32() : lhs[0].getReg().cvt32(); + MovGP(ctx, tmp, lhs[0]); + ctx.Code().cmp(tmp, rhs[0]); + ctx.Code().setle(dest[0]); +} + +void EmitULessThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false).cvt32() : lhs[0].getReg().cvt32(); + MovGP(ctx, tmp, lhs[0]); + ctx.Code().cmp(tmp, rhs[0]); + ctx.Code().setbe(dest[0]); +} + +void EmitSGreaterThan(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false).cvt32() : lhs[0].getReg().cvt32(); + MovGP(ctx, tmp, lhs[0]); + ctx.Code().cmp(tmp, rhs[0]); + ctx.Code().setg(dest[0]); +} + +void EmitUGreaterThan(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false).cvt32() : lhs[0].getReg().cvt32(); + MovGP(ctx, tmp, lhs[0]); + ctx.Code().cmp(tmp, rhs[0]); + ctx.Code().seta(dest[0]); +} + +void EmitINotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false).cvt32() : lhs[0].getReg().cvt32(); + MovGP(ctx, tmp, lhs[0]); + ctx.Code().cmp(tmp, rhs[0]); + ctx.Code().setne(dest[0]); +} + +void EmitINotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false) : lhs[0].getReg(); + MovGP(ctx, tmp, lhs[0]); + ctx.Code().cmp(tmp, rhs[0]); + ctx.Code().setne(dest[0]); +} + +void EmitSGreaterThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false).cvt32() : lhs[0].getReg().cvt32(); + MovGP(ctx, tmp, lhs[0]); + ctx.Code().cmp(tmp, rhs[0]); + ctx.Code().setge(dest[0]); +} + +void EmitUGreaterThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false).cvt32() : lhs[0].getReg().cvt32(); + MovGP(ctx, tmp, lhs[0]); + ctx.Code().cmp(tmp, rhs[0]); + ctx.Code().setae(dest[0]); +} + +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_logical.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_logical.cpp new file mode 100644 index 000000000..30ec2eeeb --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_logical.cpp @@ -0,0 +1,40 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" +#include "shader_recompiler/backend/asm_x64/x64_utils.h" + +namespace Shader::Backend::X64 { + +using namespace Xbyak; +using namespace Xbyak::util; + +void EmitLogicalOr(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg().cvt8() : dest[0].getReg().cvt8(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().or_(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitLogicalAnd(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg().cvt8() : dest[0].getReg().cvt8(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().and_(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitLogicalXor(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg().cvt8() : dest[0].getReg().cvt8(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().xor_(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitLogicalNot(EmitContext& ctx, const Operands& dest, const Operands& op) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg().cvt8() : dest[0].getReg().cvt8(); + MovGP(ctx, tmp, op[0]); + ctx.Code().not_(tmp); + MovGP(ctx, dest[0], tmp); +} + +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_select.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_select.cpp new file mode 100644 index 000000000..56ecaee03 --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_select.cpp @@ -0,0 +1,71 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" +#include "shader_recompiler/backend/asm_x64/x64_utils.h" + +namespace Shader::Backend::X64 { + +using namespace Xbyak; +using namespace Xbyak::util; + +void EmitSelectU1(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) { + Label false_label, end_label; + Reg tmp = cond[0].isMEM() ? ctx.TempGPReg().cvt8() : cond[0].getReg().cvt8(); + MovGP(ctx, tmp, cond[0]); + ctx.Code().test(tmp, tmp); + ctx.Code().jz(false_label); + MovGP(ctx, dest[0], true_value[0]); + ctx.Code().jmp(end_label); + ctx.Code().L(false_label); + MovGP(ctx, dest[0], false_value[0]); + ctx.Code().L(end_label); +} + +void EmitSelectU8(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) { + EmitSelectU1(ctx, dest, cond, true_value, false_value); +} + +void EmitSelectU16(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) { + EmitSelectU1(ctx, dest, cond, true_value, false_value); +} + +void EmitSelectU32(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) { + EmitSelectU1(ctx, dest, cond, true_value, false_value); +} + +void EmitSelectU64(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) { + EmitSelectU1(ctx, dest, cond, true_value, false_value); +} + +void EmitSelectF16(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) { + EmitSelectU1(ctx, dest, cond, true_value, false_value); +} + +void EmitSelectF32(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) { + Label false_label, end_label; + Reg tmp = cond[0].isMEM() ? ctx.TempGPReg().cvt8() : cond[0].getReg().cvt8(); + MovGP(ctx, tmp, cond[0]); + ctx.Code().test(tmp, tmp); + ctx.Code().jz(false_label); + MovFloat(ctx, dest[0], true_value[0]); + ctx.Code().jmp(end_label); + ctx.Code().L(false_label); + MovFloat(ctx, dest[0], false_value[0]); + ctx.Code().L(end_label); +} + +void EmitSelectF64(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) { + Label false_label, end_label; + Reg tmp = cond[0].isMEM() ? ctx.TempGPReg().cvt8() : cond[0].getReg().cvt8(); + MovGP(ctx, tmp, cond[0]); + ctx.Code().test(tmp, tmp); + ctx.Code().jz(false_label); + MovDouble(ctx, dest[0], true_value[0]); + ctx.Code().jmp(end_label); + ctx.Code().L(false_label); + MovDouble(ctx, dest[0], false_value[0]); + ctx.Code().L(end_label); +} + +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_special.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_special.cpp new file mode 100644 index 000000000..acae51f66 --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_special.cpp @@ -0,0 +1,55 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" +#include "shader_recompiler/backend/asm_x64/x64_utils.h" + +namespace Shader::Backend::X64 { + +using namespace Xbyak; +using namespace Xbyak::util; + +void EmitPrologue(EmitContext& ctx) { + ctx.Prologue(); +} + +void ConvertDepthMode(EmitContext& ctx) { + +} + +void ConvertPositionToClipSpace(EmitContext& ctx) { + +} + +void EmitEpilogue(EmitContext& ctx) { + ctx.SetEndFlag(); +} + +void EmitDiscard(EmitContext& ctx) { + ctx.SetEndFlag(); +} + +void EmitDiscardCond(EmitContext& ctx, const Operands& condition) { + Reg tmp = condition[0].isMEM() ? ctx.TempGPReg().cvt8() : condition[0].getReg().cvt8(); + MovGP(ctx, tmp, condition[0]); + ctx.Code().test(tmp, tmp); + ctx.Code().jnz(ctx.EndLabel()); +} + +void EmitEmitVertex(EmitContext& ctx) { + +} + +void EmitEmitPrimitive(EmitContext& ctx) { + +} + +void EmitEndPrimitive(EmitContext& ctx) { + +} + +void EmitDebugPrint(EmitContext& ctx) { + +} + +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_undefined.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_undefined.cpp new file mode 100644 index 000000000..b1f87d61f --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_undefined.cpp @@ -0,0 +1,28 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" + +namespace Shader::Backend::X64 { + +void EmitUndefU1(EmitContext& ctx) { + UNREACHABLE_MSG("x64 Instruction"); +} + +void EmitUndefU8(EmitContext&) { + UNREACHABLE_MSG("x64 Instruction"); +} + +void EmitUndefU16(EmitContext&) { + UNREACHABLE_MSG("x64 Instruction"); +} + +void EmitUndefU32(EmitContext& ctx) { + UNREACHABLE_MSG("x64 Instruction"); +} + +void EmitUndefU64(EmitContext&) { + UNREACHABLE_MSG("x64 Instruction"); +} + +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_warp.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_warp.cpp new file mode 100644 index 000000000..1498345de --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_warp.cpp @@ -0,0 +1,32 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" + +namespace Shader::Backend::X64 { + +void EmitWarpId(EmitContext& ctx) { + +} + +void EmitLaneId(EmitContext& ctx) { + +} + +void EmitQuadShuffle(EmitContext& ctx) { + +} + +void EmitReadFirstLane(EmitContext& ctx) { + +} + +void EmitReadLane(EmitContext& ctx) { + +} + +void EmitWriteLane(EmitContext& ctx) { + +} + +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp b/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp index 5dd7e0b6c..4aaea8cd4 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp +++ b/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp @@ -58,6 +58,11 @@ void EmitContext::PopTempXmmReg() { temp_xmm_reg_index--; } +void EmitContext::ResetTempRegs() { + temp_gp_reg_index = 0; + temp_xmm_reg_index = 0; +} + const Operands& EmitContext::Def(IR::Inst* inst) { return inst_to_operands.at(inst); } @@ -135,11 +140,6 @@ EmitContext::PhiAssignments(IR::Block* block) const { return std::nullopt; } -void EmitContext::ResetTempRegs() { - temp_gp_reg_index = 0; - temp_xmm_reg_index = 0; -} - void EmitContext::Prologue() { if (inst_stack_space > 0) { code.sub(rsp, inst_stack_space); diff --git a/src/shader_recompiler/backend/asm_x64/x64_emit_context.h b/src/shader_recompiler/backend/asm_x64/x64_emit_context.h index eab5bad70..5c907f0ca 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_emit_context.h +++ b/src/shader_recompiler/backend/asm_x64/x64_emit_context.h @@ -38,10 +38,21 @@ public: return block_labels.at(block); } + void SetEndFlag() { + end_flag = true; + } + + [[nodiscard]] bool EndFlag() { + bool flag = end_flag; + end_flag = false; + return flag; + } + [[nodiscard]] Xbyak::Reg64& TempGPReg(bool reserve = true); [[nodiscard]] Xbyak::Xmm& TempXmmReg(bool reserve = true); void PopTempGPReg(); void PopTempXmmReg(); + void ResetTempRegs(); [[nodiscard]] const Xbyak::Reg64& UserData() const {return Xbyak::util::r11;} @@ -50,7 +61,6 @@ public: [[nodiscard]] std::optional> PhiAssignments(IR::Block* block) const; - void ResetTempRegs(); void Prologue(); void Epilogue(); @@ -108,6 +118,9 @@ private: boost::container::small_flat_map block_labels; Xbyak::Label end_label; + // End flag, used to defer jump to end label + bool end_flag = false; + void SpillInst(RegAllocContext& ctx, const ActiveInstInterval& interval, ActiveIntervalList& active_intervals); void AdjustInstInterval(InstInterval& interval, const FlatInstList& insts); diff --git a/src/shader_recompiler/ir/compute_value/do_integer_operations.cpp b/src/shader_recompiler/ir/compute_value/do_integer_operations.cpp index 4e5f29e73..dacdaae14 100644 --- a/src/shader_recompiler/ir/compute_value/do_integer_operations.cpp +++ b/src/shader_recompiler/ir/compute_value/do_integer_operations.cpp @@ -17,7 +17,8 @@ void DoIAdd64(ImmValueList& inst_values, const ImmValueList& args0, const ImmVal } void DoIAddCary32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { - UNREACHABLE_MSG("IAddCary32 not implemented"); + Common::CartesianInvoke(ImmValue::AddCarry, + std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } void DoISub32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { diff --git a/src/shader_recompiler/ir/compute_value/imm_value.cpp b/src/shader_recompiler/ir/compute_value/imm_value.cpp index c9ebf1519..2000bdfba 100644 --- a/src/shader_recompiler/ir/compute_value/imm_value.cpp +++ b/src/shader_recompiler/ir/compute_value/imm_value.cpp @@ -385,6 +385,62 @@ ImmValue ImmValue::Add(const ImmValue& a, const ImmValue& b) no a.imm_values[3].imm_f64 + b.imm_values[3].imm_f64); } +template <> +ImmValue ImmValue::AddCarry(const ImmValue& a, const ImmValue& b) noexcept { + u8 result = a.imm_values[0].imm_u8 + b.imm_values[0].imm_u8; + u8 carry = (result < a.imm_values[0].imm_u8) ? 1 : 0; + return ImmValue(result, carry); +} + +template <> +ImmValue ImmValue::AddCarry(const ImmValue& a, const ImmValue& b) noexcept { + u8 result = a.imm_values[0].imm_u8 + b.imm_values[0].imm_u8; + u8 carry = (result < a.imm_values[0].imm_u8) ? 1 : 0; + return ImmValue(result, carry); +} + +template <> +ImmValue ImmValue::AddCarry(const ImmValue& a, const ImmValue& b) noexcept { + u16 result = a.imm_values[0].imm_u16 + b.imm_values[0].imm_u16; + u16 carry = (result < a.imm_values[0].imm_u16) ? 1 : 0; + return ImmValue(result, carry); +} + +template <> +ImmValue ImmValue::AddCarry(const ImmValue& a, const ImmValue& b) noexcept { + s16 result = a.imm_values[0].imm_s16 + b.imm_values[0].imm_s16; + s16 carry = (result < a.imm_values[0].imm_s16) ? 1 : 0; + return ImmValue(result, carry); +} + +template <> +ImmValue ImmValue::AddCarry(const ImmValue& a, const ImmValue& b) noexcept { + u32 result = a.imm_values[0].imm_u32 + b.imm_values[0].imm_u32; + u32 carry = (result < a.imm_values[0].imm_u32) ? 1 : 0; + return ImmValue(result, carry); +} + +template <> +ImmValue ImmValue::AddCarry(const ImmValue& a, const ImmValue& b) noexcept { + s32 result = a.imm_values[0].imm_s32 + b.imm_values[0].imm_s32; + s32 carry = (result < a.imm_values[0].imm_s32) ? 1 : 0; + return ImmValue(result, carry); +} + +template <> +ImmValue ImmValue::AddCarry(const ImmValue& a, const ImmValue& b) noexcept { + u64 result = a.imm_values[0].imm_u64 + b.imm_values[0].imm_u64; + u64 carry = (result < a.imm_values[0].imm_u64) ? 1 : 0; + return ImmValue(result, carry); +} + +template <> +ImmValue ImmValue::AddCarry(const ImmValue& a, const ImmValue& b) noexcept { + s64 result = a.imm_values[0].imm_s64 + b.imm_values[0].imm_s64; + s64 carry = (result < a.imm_values[0].imm_s64) ? 1 : 0; + return ImmValue(result, carry); +} + template <> ImmValue ImmValue::Sub(const ImmValue& a, const ImmValue& b) noexcept { return ImmValue(a.imm_values[0].imm_u8 - b.imm_values[0].imm_u8, diff --git a/src/shader_recompiler/ir/compute_value/imm_value.h b/src/shader_recompiler/ir/compute_value/imm_value.h index a46712ce5..800ee4b16 100644 --- a/src/shader_recompiler/ir/compute_value/imm_value.h +++ b/src/shader_recompiler/ir/compute_value/imm_value.h @@ -97,6 +97,9 @@ public: template [[nodiscard]] static ImmValue Add(const ImmValue& a, const ImmValue& b) noexcept; + template + [[nodiscard]] static ImmValue AddCarry(const ImmValue& a, const ImmValue& b) noexcept; + template [[nodiscard]] static ImmValue Sub(const ImmValue& a, const ImmValue& b) noexcept;