From 8a84f1b7787ac9237626cbe9eaf1c9d1f3aee488 Mon Sep 17 00:00:00 2001 From: kalaposfos13 <153381648+kalaposfos13@users.noreply.github.com> Date: Thu, 21 Aug 2025 04:53:54 +0200 Subject: [PATCH] Implement V_CMP_GT_U64 (#3352) * Implement V_CMP_GT_U64 * Add GroupAny * Use GroupAny * Add assert * clang --- .../backend/spirv/emit_spirv_instructions.h | 1 + src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp | 4 ++++ src/shader_recompiler/frontend/translate/vector_alu.cpp | 8 ++++++++ src/shader_recompiler/ir/ir_emitter.cpp | 5 +++++ src/shader_recompiler/ir/ir_emitter.h | 1 + src/shader_recompiler/ir/opcodes.inc | 1 + 6 files changed, 20 insertions(+) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 319f2d5ba..55e7536d2 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -536,6 +536,7 @@ Id EmitReadLane(EmitContext& ctx, Id value, Id lane); Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane); Id EmitBallot(EmitContext& ctx, Id bit); Id EmitBallotFindLsb(EmitContext& ctx, Id mask); +Id EmitGroupAny(EmitContext& ctx, Id bit); Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding); Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index 951c76001..cd28ee5c5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -42,4 +42,8 @@ Id EmitBallotFindLsb(EmitContext& ctx, Id mask) { return ctx.OpGroupNonUniformBallotFindLSB(ctx.U32[1], SubgroupScope(ctx), mask); } +Id EmitGroupAny(EmitContext& ctx, Id bit) { + return ctx.OpGroupNonUniformAny(ctx.U1[1], SubgroupScope(ctx), bit); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 20e49708f..f8be69523 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -331,6 +331,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_CMP_U64(ConditionOp::EQ, false, false, inst); case Opcode::V_CMP_NE_U64: return V_CMP_U64(ConditionOp::LG, false, false, inst); + case Opcode::V_CMP_GT_U64: + return V_CMP_U64(ConditionOp::GT, false, false, inst); case Opcode::V_CMP_CLASS_F32: return V_CMP_CLASS_F32(inst); @@ -1020,6 +1022,12 @@ void Translator::V_CMP_U64(ConditionOp op, bool is_signed, bool set_exec, const return ir.IEqual(src0, src1); case ConditionOp::LG: // NE return ir.INotEqual(src0, src1); + case ConditionOp::GT: + if (src1.IsImmediate() && src1.U64() == 0) { + ASSERT(inst.src[0].field == OperandField::ScalarGPR); + return ir.GroupAny(ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code))); + } + return ir.IGreaterThan(src0, src1, is_signed); default: UNREACHABLE_MSG("Unsupported V_CMP_U64 condition operation: {}", u32(op)); } diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 6d22ff582..cbe7fc16f 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -5,6 +5,7 @@ #include #include #include "common/assert.h" +#include "ir_emitter.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/ir/debug_print.h" #include "shader_recompiler/ir/ir_emitter.h" @@ -668,6 +669,10 @@ U32 IREmitter::BallotFindLsb(const Value& mask) { return Inst(Opcode::BallotFindLsb, mask); } +U1 IREmitter::GroupAny(const U1& bit) { + return Inst(Opcode::GroupAny, bit); +} + F32F64 IREmitter::FPAdd(const F32F64& a, const F32F64& b) { if (a.Type() != b.Type()) { UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type()); diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 7fbc3cc90..6f20d5780 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -177,6 +177,7 @@ public: [[nodiscard]] U32 WriteLane(const U32& value, const U32& write_value, const U32& lane); [[nodiscard]] Value Ballot(const U1& bit); [[nodiscard]] U32 BallotFindLsb(const Value& mask); + [[nodiscard]] U1 GroupAny(const U1& bit); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index a7b29132c..34bddb015 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -479,3 +479,4 @@ OPCODE(Ballot, U32x4, U1, OPCODE(BallotFindLsb, U32, U32x4, ) OPCODE(DataAppend, U32, U32, U32 ) OPCODE(DataConsume, U32, U32, U32 ) +OPCODE(GroupAny, U1, U1, )