From 6f9787e73393bca1fbab96c74b20f531a188ab94 Mon Sep 17 00:00:00 2001 From: microsoftv <6063922+microsoftv@users.noreply.github.com> Date: Sat, 17 Aug 2024 12:33:08 -0400 Subject: [PATCH] fixed BUFFER_ATOMIC_ADD, DS_ADD_U32 fails --- .../frontend/translate/data_share.cpp | 18 ++++++++++----- .../frontend/translate/vector_memory.cpp | 13 ++++------- src/shader_recompiler/ir/ir_emitter.cpp | 23 ++++++++----------- src/shader_recompiler/ir/ir_emitter.h | 6 ++--- src/shader_recompiler/ir/opcodes.inc | 10 ++++---- .../ir/passes/resource_tracking_pass.cpp | 12 ++++++++++ 6 files changed, 46 insertions(+), 36 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index 7b1c2a708..3b0ccfcaa 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -121,10 +121,12 @@ void Translator::DS_ADD_U32(const GcnInst& inst) { const IR::U32 data{GetSrc(inst.src[1])}; const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0)); const IR::U32 addr_offset = ir.IAdd(addr, offset); + IR::VectorReg dst_reg{inst.dst[0].code}; - const IR::U32 value = ir.SharedAtomicIAdd(addr_offset, data); + const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data); + // const IR::Value original_val = ir.LoadShared(32, false, addr_offset); - SetDst(inst.dst[0], value); + ir.SetVectorReg(dst_reg, IR::U32{original_val}); } void Translator::DS_MIN_U32(const GcnInst& inst) { @@ -132,10 +134,12 @@ void Translator::DS_MIN_U32(const GcnInst& inst) { const IR::U32 data{GetSrc(inst.src[1])}; const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0)); const IR::U32 addr_offset = ir.IAdd(addr, offset); + IR::VectorReg dst_reg{inst.dst[0].code}; - const IR::U32 value = ir.SharedAtomicIMax(addr_offset, data, false); + const IR::Value original_val = ir.SharedAtomicIMin(addr_offset, data, false); + // const IR::Value original_val = ir.LoadShared(32, false, addr_offset); - SetDst(inst.dst[0], value); + ir.SetVectorReg(dst_reg, IR::U32{original_val}); } void Translator::DS_MAX_U32(const GcnInst& inst) { @@ -143,10 +147,12 @@ void Translator::DS_MAX_U32(const GcnInst& inst) { const IR::U32 data{GetSrc(inst.src[1])}; const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0)); const IR::U32 addr_offset = ir.IAdd(addr, offset); + IR::VectorReg dst_reg{inst.dst[0].code}; - const IR::U32 value = ir.SharedAtomicIMax(addr_offset, data, false); + const IR::Value original_val = ir.SharedAtomicIMax(addr_offset, data, false); + // const IR::Value original_val = ir.LoadShared(32, false, addr_offset); - SetDst(inst.dst[0], value); + ir.SetVectorReg(dst_reg, IR::U32{original_val}); } void Translator::S_BARRIER() { diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 4295c1fee..6bdb9596f 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -458,7 +458,7 @@ void Translator::BUFFER_ATOMIC(u32 num_dwords, AtomicOp op, const GcnInst& inst) info.inst_offset.Assign(mubuf.offset); info.offset_enable.Assign(mubuf.offen); - // Get vdata value(s) + // Get vdata value IR::Value vdata_val = ir.GetVectorReg(vdata); // Get address of vdata @@ -469,18 +469,13 @@ void Translator::BUFFER_ATOMIC(u32 num_dwords, AtomicOp op, const GcnInst& inst) ir.CompositeConstruct(ir.GetScalarReg(srsrc), ir.GetScalarReg(srsrc + 1), ir.GetScalarReg(srsrc + 2), ir.GetScalarReg(srsrc + 3)); - // Get current srsrc value (incorrect) - IR::U32 prev_val = ir.GetScalarReg(srsrc); - // Apply atomic op - // derefs srsrc buffer and adds vdata value to it - const IR::U32 new_vdata = IR::U32{ir.BufferAtomicIAdd(handle, address, vdata_val, info)}; + // derefs srsrc buffer and adds vdata value to it, then returns + const IR::Value original_val = ir.BufferAtomicIAdd(handle, address, vdata_val, info); if (mubuf.glc) { - ir.SetVectorReg(vdata, prev_val); + ir.SetVectorReg(vdata, IR::U32{original_val}); } - - return; } void Translator::IMAGE_GET_LOD(const GcnInst& inst) { diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index e13f0f439..3ae068072 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -286,26 +286,23 @@ void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) } } -U32U64 IREmitter::SharedAtomicIAdd(const U32U64& a, const U32U64& b) { - if (a.Type() != b.Type()) { - UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type()); - } - switch (a.Type()) { +U32F32 IREmitter::SharedAtomicIAdd(const U32& address, const U32F32& data) { + switch (data.Type()) { case Type::U32: - return Inst(Opcode::SharedAtomicIAdd32, a, b); + return Inst(Opcode::SharedAtomicIAdd32, address, data); default: - ThrowInvalidType(a.Type()); + ThrowInvalidType(data.Type()); } } -U32 IREmitter::SharedAtomicIMin(const U32& a, const U32& b, bool is_signed) { - return is_signed ? Inst(Opcode::SharedAtomicSMin32, a, b) - : Inst(Opcode::SharedAtomicUMin32, a, b); +U32 IREmitter::SharedAtomicIMin(const U32& address, const U32& data, bool is_signed) { + return is_signed ? Inst(Opcode::SharedAtomicSMin32, address, data) + : Inst(Opcode::SharedAtomicUMin32, address, data); } -U32 IREmitter::SharedAtomicIMax(const U32& a, const U32& b, bool is_signed) { - return is_signed ? Inst(Opcode::SharedAtomicSMax32, a, b) - : Inst(Opcode::SharedAtomicUMax32, a, b); +U32 IREmitter::SharedAtomicIMax(const U32& address, const U32& data, bool is_signed) { + return is_signed ? Inst(Opcode::SharedAtomicSMax32, address, data) + : Inst(Opcode::SharedAtomicUMax32, address, data); } U32 IREmitter::ReadConst(const Value& base, const U32& offset) { diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 210b43f9c..ff9dc98b1 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -84,9 +84,9 @@ public: [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset); void WriteShared(int bit_size, const Value& value, const U32& offset); - [[nodiscard]] U32U64 SharedAtomicIAdd(const U32U64& a, const U32U64& b); - [[nodiscard]] U32 SharedAtomicIMin(const U32& a, const U32& b, bool is_signed); - [[nodiscard]] U32 SharedAtomicIMax(const U32& a, const U32& b, bool is_signed); + [[nodiscard]] U32F32 SharedAtomicIAdd(const U32& address, const U32F32& data); + [[nodiscard]] U32 SharedAtomicIMin(const U32& address, const U32& data, bool is_signed); + [[nodiscard]] U32 SharedAtomicIMax(const U32& address, const U32& data, bool is_signed); [[nodiscard]] U32 ReadConst(const Value& base, const U32& offset); [[nodiscard]] F32 ReadConstBuffer(const Value& handle, const U32& index); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 14bc28856..e9ecd4350 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -96,11 +96,11 @@ OPCODE(StoreBufferFormatF32x4, Void, Opaq OPCODE(StoreBufferU32, Void, Opaque, Opaque, U32, ) // Buffer atomic operations -OPCODE(BufferAtomicIAdd32, Opaque, Opaque, Opaque, Opaque ) -OPCODE(BufferAtomicSMin32, U32, U32, U32, ) -OPCODE(BufferAtomicUMin32, U32, U32, U32, ) -OPCODE(BufferAtomicSMax32, U32, U32, U32, ) -OPCODE(BufferAtomicUMax32, U32, U32, U32, ) +OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 ) +OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 ) +OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 ) +OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 ) +OPCODE(BufferAtomicUMax32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicInc32, U32, Opaque, Opaque, U32, ) OPCODE(BufferAtomicDec32, U32, Opaque, Opaque, U32, ) OPCODE(BufferAtomicAnd32, U32, Opaque, Opaque, U32, ) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 97fc5b999..529e8dc8d 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -42,6 +42,17 @@ bool IsBufferInstruction(const IR::Inst& inst) { case IR::Opcode::StoreBufferFormatF32x3: case IR::Opcode::StoreBufferFormatF32x4: case IR::Opcode::StoreBufferU32: + case IR::Opcode::BufferAtomicIAdd32: + case IR::Opcode::BufferAtomicSMin32: + case IR::Opcode::BufferAtomicUMin32: + case IR::Opcode::BufferAtomicSMax32: + case IR::Opcode::BufferAtomicUMax32: + case IR::Opcode::BufferAtomicInc32: + case IR::Opcode::BufferAtomicDec32: + case IR::Opcode::BufferAtomicAnd32: + case IR::Opcode::BufferAtomicOr32: + case IR::Opcode::BufferAtomicXor32: + case IR::Opcode::BufferAtomicExchange32: return true; default: return false; @@ -108,6 +119,7 @@ IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) { case IR::Opcode::LoadBufferU32: case IR::Opcode::ReadConstBufferU32: case IR::Opcode::StoreBufferU32: + case IR::Opcode::BufferAtomicIAdd32: return IR::Type::U32; default: UNREACHABLE();