From 5fe7b4b8ae208c707ae3ffbb93ee1defc7d32127 Mon Sep 17 00:00:00 2001 From: microsoftv <6063922+microsoftv@users.noreply.github.com> Date: Sat, 17 Aug 2024 13:58:25 -0400 Subject: [PATCH] resource tracking for buffer atomic --- .../backend/spirv/emit_spirv_atomic.cpp | 4 -- .../frontend/translate/translate.h | 2 +- .../frontend/translate/vector_memory.cpp | 50 +++++++++----- src/shader_recompiler/ir/ir_emitter.h | 10 +-- .../ir/passes/resource_tracking_pass.cpp | 66 +++++++++++-------- 5 files changed, 76 insertions(+), 56 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index f6a1a829e..37e91d3b1 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -23,13 +23,9 @@ Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value, Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value, Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { - // Get srsrc buffer auto& buffer = ctx.buffers[handle]; - // Get address of vdata by vaddr + buffer offset address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); - // Get first index of data (4-aligned indices, addr >> 2) const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); - // Get pointer to first data value in buffer using index const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index); const auto [scope, semantics]{AtomicArgs(ctx)}; return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, value); diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 85060225a..5c7b3b714 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -187,7 +187,7 @@ public: // Vector Memory void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst); void BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst); - void BUFFER_ATOMIC(u32 num_dwords, AtomicOp op, const GcnInst& inst); + void BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst); // Vector interpolation void V_INTERP_P2_F32(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index d8969b225..08674fa2d 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -107,7 +107,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { // Buffer atomic operations case Opcode::BUFFER_ATOMIC_ADD: - return BUFFER_ATOMIC(1, AtomicOp::Add, inst); + return BUFFER_ATOMIC(AtomicOp::Add, inst); default: LogMissingOpcode(inst); } @@ -439,40 +439,54 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, bool is_form } } -void Translator::BUFFER_ATOMIC(u32 num_dwords, AtomicOp op, const GcnInst& inst) { - // Get controls for mubuf-specific instructions +// TODO: U64 +void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) { const auto& mubuf = inst.control.mubuf; - // Get vaddr register const IR::VectorReg vaddr{inst.src[0].code}; - // Get vdata register const IR::VectorReg vdata{inst.src[1].code}; - // Get srsrc register const IR::ScalarReg srsrc{inst.src[2].code * 4}; - // Get offset value from soffset register - const IR::U32 soffset{GetSrc(inst.src[3])}; // TODO: Use this maybe? + const IR::U32 soffset{GetSrc(inst.src[3])}; ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported"); - // Setup instruction info from controls IR::BufferInstInfo info{}; info.index_enable.Assign(mubuf.idxen); info.inst_offset.Assign(mubuf.offset); info.offset_enable.Assign(mubuf.offen); - // Get vdata value IR::Value vdata_val = ir.GetVectorReg(vdata); - - // Get address of vdata const IR::U32 address = ir.GetVectorReg(vaddr); - - // Construct srsrc SGPRs const IR::Value handle = ir.CompositeConstruct(ir.GetScalarReg(srsrc), ir.GetScalarReg(srsrc + 1), ir.GetScalarReg(srsrc + 2), ir.GetScalarReg(srsrc + 3)); - // Apply atomic op - // derefs srsrc buffer and adds vdata value to it - // then returns original srsrc buffer value - const IR::Value original_val = ir.BufferAtomicIAdd(handle, address, vdata_val, info); + const IR::Value original_val = [&] { + switch (op) { + case AtomicOp::Swap: + return ir.BufferAtomicExchange(handle, address, vdata_val, info); + case AtomicOp::Add: + return ir.BufferAtomicIAdd(handle, address, vdata_val, info); + case AtomicOp::Smin: + return ir.BufferAtomicIMin(handle, address, vdata_val, true, info); + case AtomicOp::Umin: + return ir.BufferAtomicIMin(handle, address, vdata_val, false, info); + case AtomicOp::Smax: + return ir.BufferAtomicIMax(handle, address, vdata_val, true, info); + case AtomicOp::Umax: + return ir.BufferAtomicIMax(handle, address, vdata_val, false, info); + case AtomicOp::And: + return ir.BufferAtomicAnd(handle, address, vdata_val, info); + case AtomicOp::Or: + return ir.BufferAtomicOr(handle, address, vdata_val, info); + case AtomicOp::Xor: + return ir.BufferAtomicXor(handle, address, vdata_val, info); + case AtomicOp::Inc: + return ir.BufferAtomicInc(handle, address, vdata_val, info); + case AtomicOp::Dec: + return ir.BufferAtomicDec(handle, address, vdata_val, info); + default: + UNREACHABLE(); + } + }(); if (mubuf.glc) { ir.SetVectorReg(vdata, IR::U32{original_val}); diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index ff9dc98b1..e485dea04 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -102,10 +102,12 @@ public: [[nodiscard]] Value BufferAtomicIAdd(const Value& handle, const Value& address, const Value& value, BufferInstInfo info); - [[nodiscard]] Value BufferAtomicIMin(const Value& handle, const Value& address, const Value& a, - bool is_signed, BufferInstInfo info); - [[nodiscard]] Value BufferAtomicIMax(const Value& handle, const Value& address, const Value& a, - bool is_signed, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicIMin(const Value& handle, const Value& address, + const Value& value, bool is_signed, + BufferInstInfo info); + [[nodiscard]] Value BufferAtomicIMax(const Value& handle, const Value& address, + const Value& value, bool is_signed, + BufferInstInfo info); [[nodiscard]] Value BufferAtomicInc(const Value& handle, const Value& address, const Value& value, BufferInstInfo info); [[nodiscard]] Value BufferAtomicDec(const Value& handle, const Value& address, diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 529e8dc8d..3fd59e0e4 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -20,6 +20,42 @@ struct SharpLocation { auto operator<=>(const SharpLocation&) const = default; }; +bool IsBufferAtomic(const IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::BufferAtomicIAdd32: + case IR::Opcode::BufferAtomicSMin32: + case IR::Opcode::BufferAtomicUMin32: + case IR::Opcode::BufferAtomicSMax32: + case IR::Opcode::BufferAtomicUMax32: + case IR::Opcode::BufferAtomicInc32: + case IR::Opcode::BufferAtomicDec32: + case IR::Opcode::BufferAtomicAnd32: + case IR::Opcode::BufferAtomicOr32: + case IR::Opcode::BufferAtomicXor32: + case IR::Opcode::BufferAtomicExchange32: + return true; + default: + return false; + } +} + +bool IsBufferStore(const IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::StoreBufferF32: + case IR::Opcode::StoreBufferF32x2: + case IR::Opcode::StoreBufferF32x3: + case IR::Opcode::StoreBufferF32x4: + case IR::Opcode::StoreBufferFormatF32: + case IR::Opcode::StoreBufferFormatF32x2: + case IR::Opcode::StoreBufferFormatF32x3: + case IR::Opcode::StoreBufferFormatF32x4: + case IR::Opcode::StoreBufferU32: + return true; + default: + return IsBufferAtomic(inst); + } +} + bool IsBufferInstruction(const IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::LoadBufferF32: @@ -42,20 +78,9 @@ bool IsBufferInstruction(const IR::Inst& inst) { case IR::Opcode::StoreBufferFormatF32x3: case IR::Opcode::StoreBufferFormatF32x4: case IR::Opcode::StoreBufferU32: - case IR::Opcode::BufferAtomicIAdd32: - case IR::Opcode::BufferAtomicSMin32: - case IR::Opcode::BufferAtomicUMin32: - case IR::Opcode::BufferAtomicSMax32: - case IR::Opcode::BufferAtomicUMax32: - case IR::Opcode::BufferAtomicInc32: - case IR::Opcode::BufferAtomicDec32: - case IR::Opcode::BufferAtomicAnd32: - case IR::Opcode::BufferAtomicOr32: - case IR::Opcode::BufferAtomicXor32: - case IR::Opcode::BufferAtomicExchange32: return true; default: - return false; + return IsBufferStore(inst); } } @@ -126,23 +151,6 @@ IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) { } } -bool IsBufferStore(const IR::Inst& inst) { - switch (inst.GetOpcode()) { - case IR::Opcode::StoreBufferF32: - case IR::Opcode::StoreBufferF32x2: - case IR::Opcode::StoreBufferF32x3: - case IR::Opcode::StoreBufferF32x4: - case IR::Opcode::StoreBufferFormatF32: - case IR::Opcode::StoreBufferFormatF32x2: - case IR::Opcode::StoreBufferFormatF32x3: - case IR::Opcode::StoreBufferFormatF32x4: - case IR::Opcode::StoreBufferU32: - return true; - default: - return false; - } -} - bool IsImageInstruction(const IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::ImageSampleExplicitLod: