From 86f403aaf92b01356e08f7c6f1c77ae49aa58a4a Mon Sep 17 00:00:00 2001 From: Nokk Date: Mon, 8 Jul 2024 10:08:42 +1000 Subject: [PATCH] Implemented Legacy Max/Min using NMax/NMin --- .../backend/spirv/emit_spirv_floating_point.cpp | 13 +++++++++++-- .../backend/spirv/emit_spirv_instructions.h | 4 ++-- .../frontend/translate/translate.cpp | 5 ++++- .../frontend/translate/translate.h | 2 ++ .../frontend/translate/vector_alu.cpp | 12 ++++++++++++ src/shader_recompiler/ir/ir_emitter.cpp | 15 +++++++++++---- src/shader_recompiler/ir/ir_emitter.h | 4 ++-- 7 files changed, 44 insertions(+), 11 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index 04b0b96e1..39313ce52 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -51,7 +51,11 @@ Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c)); } -Id EmitFPMax32(EmitContext& ctx, Id a, Id b) { +Id EmitFPMax32(EmitContext& ctx, Id a, Id b, bool is_legacy) { + if (is_legacy) { + return ctx.OpNMax(ctx.F32[1], a, b); + } + return ctx.OpFMax(ctx.F32[1], a, b); } @@ -59,7 +63,12 @@ Id EmitFPMax64(EmitContext& ctx, Id a, Id b) { return ctx.OpFMax(ctx.F64[1], a, b); } -Id EmitFPMin32(EmitContext& ctx, Id a, Id b) { +Id EmitFPMin32(EmitContext& ctx, Id a, Id b, bool is_legacy) { + if (is_legacy) + { + return ctx.OpNMin(ctx.F32[1], a, b); + } + return ctx.OpFMin(ctx.F32[1], a, b); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index acbaf9969..495ada5de 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -165,9 +165,9 @@ Id EmitFPSub32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); -Id EmitFPMax32(EmitContext& ctx, Id a, Id b); +Id EmitFPMax32(EmitContext& ctx, Id a, Id b, bool is_legacy = false); Id EmitFPMax64(EmitContext& ctx, Id a, Id b); -Id EmitFPMin32(EmitContext& ctx, Id a, Id b); +Id EmitFPMin32(EmitContext& ctx, Id a, Id b, bool is_legacy = false); Id EmitFPMin64(EmitContext& ctx, Id a, Id b); Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index d5c3ac8a9..223723e0c 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -625,6 +625,9 @@ void Translate(IR::Block* block, u32 block_base, std::span inst_l case Opcode::V_MIN3_F32: translator.V_MIN3_F32(inst); break; + case Opcode::V_MIN_LEGACY_F32: + translator.V_MIN_LEGACY_F32(inst); + break; case Opcode::V_MADMK_F32: translator.V_MADMK_F32(inst); break; @@ -876,7 +879,7 @@ void Translate(IR::Block* block, u32 block_base, std::span inst_l translator.V_MAD_F32(inst); break; case Opcode::V_MAX_LEGACY_F32: - translator.V_MAX_F32(inst); + translator.V_MAX_LEGACY_F32(inst); break; case Opcode::V_RSQ_LEGACY_F32: case Opcode::V_RSQ_CLAMP_F32: diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index e1f72e5f8..ab656988c 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -113,6 +113,7 @@ public: void V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst); void V_MAX_F32(const GcnInst& inst); void V_MAX_U32(bool is_signed, const GcnInst& inst); + void V_MAX_LEGACY_F32(const GcnInst& inst); void V_RSQ_F32(const GcnInst& inst); void V_SIN_F32(const GcnInst& inst); void V_LOG_F32(const GcnInst& inst); @@ -120,6 +121,7 @@ public: void V_SQRT_F32(const GcnInst& inst); void V_MIN_F32(const GcnInst& inst); void V_MIN3_F32(const GcnInst& inst); + void V_MIN_LEGACY_F32(const GcnInst& inst); void V_MADMK_F32(const GcnInst& inst); void V_CUBEMA_F32(const GcnInst& inst); void V_CUBESC_F32(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index a434567a0..3393b81f1 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -215,6 +215,12 @@ void Translator::V_MAX_U32(bool is_signed, const GcnInst& inst) { SetDst(inst.dst[0], ir.IMax(src0, src1, is_signed)); } +void Translator::V_MAX_LEGACY_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src1{GetSrc(inst.src[1], true)}; + SetDst(inst.dst[0], ir.FPMax(src0, src1, true)); +} + void Translator::V_RSQ_F32(const GcnInst& inst) { const IR::F32 src0{GetSrc(inst.src[0], true)}; SetDst(inst.dst[0], ir.FPRecipSqrt(src0)); @@ -253,6 +259,12 @@ void Translator::V_MIN3_F32(const GcnInst& inst) { SetDst(inst.dst[0], ir.FPMin(src0, ir.FPMin(src1, src2))); } +void Translator::V_MIN_LEGACY_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src1{GetSrc(inst.src[1], true)}; + SetDst(inst.dst[0], ir.FPMin(src0, src1, true)); +} + void Translator::V_MADMK_F32(const GcnInst& inst) { const IR::F32 src0{GetSrc(inst.src[0], true)}; const IR::F32 src1{GetSrc(inst.src[1], true)}; diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 7f0fa741e..44128f236 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -865,28 +865,35 @@ U1 IREmitter::FPUnordered(const F32F64& lhs, const F32F64& rhs) { return LogicalOr(FPIsNan(lhs), FPIsNan(rhs)); } -F32F64 IREmitter::FPMax(const F32F64& lhs, const F32F64& rhs) { +F32F64 IREmitter::FPMax(const F32F64& lhs, const F32F64& rhs, bool is_legacy) { if (lhs.Type() != rhs.Type()) { UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type()); } + switch (lhs.Type()) { case Type::F32: - return Inst(Opcode::FPMax32, lhs, rhs); + return Inst(Opcode::FPMax32, lhs, rhs, is_legacy); case Type::F64: + if (is_legacy) { + UNREACHABLE_MSG("F64 cannot be used with LEGACY ops"); + } return Inst(Opcode::FPMax64, lhs, rhs); default: ThrowInvalidType(lhs.Type()); } } -F32F64 IREmitter::FPMin(const F32F64& lhs, const F32F64& rhs) { +F32F64 IREmitter::FPMin(const F32F64& lhs, const F32F64& rhs, bool is_legacy) { if (lhs.Type() != rhs.Type()) { UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type()); } switch (lhs.Type()) { case Type::F32: - return Inst(Opcode::FPMin32, lhs, rhs); + return Inst(Opcode::FPMin32, lhs, rhs, is_legacy); case Type::F64: + if (is_legacy) { + UNREACHABLE_MSG("F64 cannot be used with LEGACY ops"); + } return Inst(Opcode::FPMin64, lhs, rhs); default: ThrowInvalidType(lhs.Type()); diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index c3342530b..51ab9d030 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -149,8 +149,8 @@ public: [[nodiscard]] U1 FPIsInf(const F32F64& value); [[nodiscard]] U1 FPOrdered(const F32F64& lhs, const F32F64& rhs); [[nodiscard]] U1 FPUnordered(const F32F64& lhs, const F32F64& rhs); - [[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs); - [[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs); + [[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, bool is_legacy = false); + [[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs, bool is_legacy = false); [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); [[nodiscard]] Value IAddCary(const U32& a, const U32& b);