From bbf89cd3db3a6c75697739e0d6d434f1cdfd6224 Mon Sep 17 00:00:00 2001 From: Dmugetsu Date: Wed, 16 Apr 2025 18:17:48 -0600 Subject: [PATCH] Added ir instructions for new opcodes. Removing Write implementations. Maping operation S_BFE_I32 as it was added in translate but wasnt pointing to anything. --- .../backend/spirv/emit_spirv_atomic.cpp | 21 +++++++++++++++ .../backend/spirv/emit_spirv_instructions.h | 4 +++ .../frontend/translate/data_share.cpp | 27 +++---------------- .../frontend/translate/scalar_alu.cpp | 15 +++++++++++ .../frontend/translate/translate.h | 2 -- src/shader_recompiler/ir/ir_emitter.cpp | 12 +++++++++ src/shader_recompiler/ir/ir_emitter.h | 4 +++ src/shader_recompiler/ir/opcodes.inc | 3 +++ 8 files changed, 62 insertions(+), 26 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 4faa99fe8..c6ec65606 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -21,6 +21,15 @@ Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value, return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); } +Id SharedAtomicU32_IncDec(EmitContext& ctx, Id offset, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id)) { + const Id shift_id{ctx.ConstU32(2U)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics); +} + Id BufferAtomicU32BoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto emit_func) { if (Sirit::ValidId(buffer_size)) { // Bounds checking enabled, wrap in a conditional branch to make sure that @@ -99,6 +108,18 @@ Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value) { return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicXor); } +Id EmitSharedAtomicISub32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicISub); +} + +Id EmitSharedAtomicIIncrement32(EmitContext& ctx, Id offset) { + return SharedAtomicU32_IncDec(ctx, offset, &Sirit::Module::OpAtomicIIncrement); +} + +Id EmitSharedAtomicIDecrement32(EmitContext& ctx, Id offset) { + return SharedAtomicU32_IncDec(ctx, offset, &Sirit::Module::OpAtomicIDecrement); +} + Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 68438fbba..9b7528be8 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -130,6 +130,10 @@ Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value); +Id EmitSharedAtomicIIncrement32(EmitContext& ctx, Id offset); +Id EmitSharedAtomicIDecrement32(EmitContext& ctx, Id offset); +Id EmitSharedAtomicISub32(EmitContext& ctx, Id offset, Id value); + Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); Id EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3); Id EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4); diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index 846a26692..14dc23a03 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -19,10 +19,6 @@ void Translator::EmitDataShare(const GcnInst& inst) { return DS_INC_U32(inst, false); case Opcode::DS_DEC_U32: return DS_DEC_U32(inst, false); - case Opcode::DS_WRITE_SRC2_B32: - return DS_WRITE_SRC2_B32(inst, true); - case Opcode::DS_WRITE_SRC2_B64: - return DS_WRITE_SRC2_B64(inst, true); case Opcode::DS_SUB_RTN_U32: return DS_SUB_U32(inst, true); case Opcode::DS_MIN_I32: @@ -245,7 +241,7 @@ void Translator::DS_INC_U32(const GcnInst& inst, bool rtn) { const IR::U32 offset = ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)); const IR::U32 addr_offset = ir.IAdd(addr, offset); - const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, ir.Imm32(1)); + const IR::Value original_val = ir.SharedAtomicIIncrement(addr_offset); if (rtn) { SetDst(inst.dst[0], IR::U32{original_val}); } @@ -256,36 +252,19 @@ void Translator::DS_DEC_U32(const GcnInst& inst, bool rtn) { const IR::U32 offset = ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)); const IR::U32 addr_offset = ir.IAdd(addr, offset); - const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, ir.Imm32(-1)); + const IR::Value original_val = ir.SharedAtomicIDecrement(addr_offset); if (rtn) { SetDst(inst.dst[0], IR::U32{original_val}); } } -void Translator::DS_WRITE_SRC2_B32(const GcnInst& inst, bool rtn) { - const IR::U32 addr{GetSrc(inst.src[0])}; - const IR::U32 data{GetSrc(inst.src[1])}; - const u32 offset = (inst.control.ds.offset1 << 8u) + inst.control.ds.offset0; - const IR::U32 addr_offset = ir.IAdd(addr, ir.Imm32(offset)); - ir.WriteShared(32, addr_offset, data); -} - -void Translator::DS_WRITE_SRC2_B64(const GcnInst& inst, bool rtn) { - const IR::U32 addr{GetSrc(inst.src[0])}; - const IR::U32 data0{GetSrc(inst.src[1])}; - const IR::U32 data1{GetSrc(inst.src[2])}; - const u32 offset = (inst.control.ds.offset1 << 8u) + inst.control.ds.offset0; - const IR::U32 addr_offset = ir.IAdd(addr, ir.Imm32(offset)); - ir.WriteShared(64, ir.CompositeConstruct(data0, data1), addr_offset); -} - void Translator::DS_SUB_U32(const GcnInst& inst, bool rtn) { const IR::U32 addr{GetSrc(inst.src[0])}; const IR::U32 data{GetSrc(inst.src[1])}; const IR::U32 offset = ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)); const IR::U32 addr_offset = ir.IAdd(addr, offset); - const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data); + const IR::Value original_val = ir.SharedAtomicISub(addr_offset, data); if (rtn) { SetDst(inst.dst[0], IR::U32{original_val}); } diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index 3a8e894ae..cf541f396 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -521,6 +521,21 @@ void Translator::S_BFE(const GcnInst& inst, bool is_signed) { ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); } +void Translator::S_BFE_I32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + + const IR::U32 offset{ir.BitwiseAnd(src1, ir.Imm32(0x1F))}; + + const IR::U32 count{ir.BitFieldExtract(src1, ir.Imm32(16), ir.Imm32(7))}; + + const IR::U32 result{ir.BitFieldExtract(src0, offset, count, false)}; + + SetDst(inst.dst[0], result); + + ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); +} + void Translator::S_ABSDIFF_I32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 3c910e95a..52d91c963 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -278,8 +278,6 @@ public: void DS_SUB_U32(const GcnInst& inst, bool); void DS_INC_U32(const GcnInst& inst, bool rtn); void DS_DEC_U32(const GcnInst& inst, bool rtn); - void DS_WRITE_SRC2_B32(const GcnInst& inst, bool rtn); - void DS_WRITE_SRC2_B64(const GcnInst& inst, bool rtn); // Buffer Memory // MUBUF / MTBUF diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index a51d126c7..e8836bb4c 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -357,6 +357,18 @@ U32 IREmitter::SharedAtomicXor(const U32& address, const U32& data) { return Inst(Opcode::SharedAtomicXor32, address, data); } +U32 IREmitter::SharedAtomicIIncrement(const U32& address) { + return Inst(Opcode::SharedAtomicIIncrement32, address); +} + +U32 IREmitter::SharedAtomicIDecrement(const U32& address) { + return Inst(Opcode::SharedAtomicIDecrement32, address); +} + +U32 IREmitter::SharedAtomicISub(const U32& address, const U32& data) { + return Inst(Opcode::SharedAtomicISub32, address, data); +} + U32 IREmitter::ReadConst(const Value& base, const U32& offset) { return Inst(Opcode::ReadConst, base, offset); } diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index f1d564b80..186d83a07 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -106,6 +106,10 @@ public: [[nodiscard]] U32 SharedAtomicOr(const U32& address, const U32& data); [[nodiscard]] U32 SharedAtomicXor(const U32& address, const U32& data); + [[nodiscard]] U32 SharedAtomicIIncrement(const U32& address); + [[nodiscard]] U32 SharedAtomicIDecrement(const U32& address); + [[nodiscard]] U32 SharedAtomicISub(const U32& address, const U32& data); + [[nodiscard]] U32 ReadConst(const Value& base, const U32& offset); [[nodiscard]] U32 ReadConstBuffer(const Value& handle, const U32& index); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 10819f898..4932ff9a0 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -44,6 +44,9 @@ OPCODE(SharedAtomicUMax32, U32, U32, OPCODE(SharedAtomicAnd32, U32, U32, U32, ) OPCODE(SharedAtomicOr32, U32, U32, U32, ) OPCODE(SharedAtomicXor32, U32, U32, U32, ) +OPCODE(SharedAtomicISub32, U32, U32, U32, ) +OPCODE(SharedAtomicIIncrement32, U32, U32, ) +OPCODE(SharedAtomicIDecrement32, U32, U32, ) // Context getters/setters OPCODE(GetUserData, U32, ScalarReg, )