From 9de11cd6bb22a2fa7552ad69216817beac77636d Mon Sep 17 00:00:00 2001 From: microsoftv <6063922+microsoftv@users.noreply.github.com> Date: Wed, 14 Aug 2024 19:02:25 -0400 Subject: [PATCH] Shared Atomics --- .vscode/settings.json | 6 +++ .../backend/spirv/emit_spirv_atomic.cpp | 29 +++++++++++++ .../backend/spirv/emit_spirv_instructions.h | 5 +++ .../frontend/translate/data_share.cpp | 42 ++++++++----------- src/shader_recompiler/ir/ir_emitter.cpp | 36 ++++++++++++++++ src/shader_recompiler/ir/ir_emitter.h | 36 +++++++++------- src/shader_recompiler/ir/opcodes.inc | 7 ++++ 7 files changed, 122 insertions(+), 39 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000..5cf571563 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "files.associations": { + "*.mod": "go", + "*.inc": "cpp" + } +} \ No newline at end of file diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 4232cfd4b..964306d49 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -12,6 +12,15 @@ std::pair AtomicArgs(EmitContext& ctx) { return {scope, semantics}; } +Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { + const Id shift_id{ctx.ConstU32(2U)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); +} + Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value, Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { auto& buffer = ctx.buffers[handle]; @@ -31,6 +40,26 @@ Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id va } } // Anonymous namespace +Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicIAdd); +} + +Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMax); +} + +Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMax); +} + +Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMin); +} + +Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMin); +} + Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 27d7924bc..7d20a093b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -110,6 +110,11 @@ Id EmitLoadSharedU128(EmitContext& ctx, Id offset); void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value); void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value); void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value); +Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value); +Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value); +Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value); +Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value); +Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value); Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index 30b8a2daa..e9680bfe1 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -25,12 +25,12 @@ void Translator::EmitDataShare(const GcnInst& inst) { return DS_WRITE(32, false, true, inst); case Opcode::DS_WRITE2_B64: return DS_WRITE(64, false, true, inst); - case Opcode::DS_MAX_U32: - return DS_MAX_U32(inst); - case Opcode::DS_MIN_U32: - return DS_MIN_U32(inst); case Opcode::DS_ADD_U32: return DS_ADD_U32(inst); + case Opcode::DS_MIN_U32: + return DS_MIN_U32(inst); + case Opcode::DS_MAX_U32: + return DS_MAX_U32(inst); default: LogMissingOpcode(inst); } @@ -116,19 +116,16 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnI } } -void Translator::DS_MAX_U32(const GcnInst& inst) { +void Translator::DS_ADD_U32(const GcnInst& inst) { const IR::U32 addr{GetSrc(inst.src[0])}; const IR::U32 data{GetSrc(inst.src[1])}; const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0)); const IR::U32 addr_offset = ir.IAdd(addr, offset); - const IR::U32 old_value = IR::U32(ir.LoadShared(32, false, addr_offset)); - const IR::U32 new_value = ir.UMax(old_value, data); - ir.WriteShared(32, new_value, addr_offset); + const IR::U32 value = ir.SharedAtomicIAdd(addr_offset, data); + ir.WriteShared(32, value, addr_offset); - if (inst.dst[0].type != ScalarType::Undefined) { - SetDst(inst.dst[0], old_value); - } + SetDst(inst.dst[0], value); } void Translator::DS_MIN_U32(const GcnInst& inst) { @@ -137,27 +134,22 @@ void Translator::DS_MIN_U32(const GcnInst& inst) { const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0)); const IR::U32 addr_offset = ir.IAdd(addr, offset); - const IR::U32 old_value = IR::U32(ir.LoadShared(32, false, addr_offset)); - const IR::U32 new_value = ir.UMin(old_value, data); - ir.WriteShared(32, new_value, addr_offset); + const IR::U32 value = ir.SharedAtomicUMax(addr_offset, data); + ir.WriteShared(32, value, addr_offset); - if (inst.dst[0].type != ScalarType::Undefined) { - SetDst(inst.dst[0], old_value); - } + SetDst(inst.dst[0], value); } -void Translator::DS_ADD_U32(const GcnInst& inst) { +void Translator::DS_MAX_U32(const GcnInst& inst) { const IR::U32 addr{GetSrc(inst.src[0])}; const IR::U32 data{GetSrc(inst.src[1])}; const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0)); const IR::U32 addr_offset = ir.IAdd(addr, offset); - const IR::U32 aligned_addr = ir.BitwiseAnd(addr_offset, ir.Imm32(~3)); - const IR::U32 old_value = IR::U32(ir.LoadShared(32, false, aligned_addr)); - const IR::U32 new_value = ir.IAdd(old_value, data); - ir.WriteShared(32, new_value, aligned_addr); - if (inst.dst[0].type != ScalarType::Undefined) { - SetDst(inst.dst[0], new_value); - } + + const IR::U32 value = ir.SharedAtomicUMax(addr_offset, data); + ir.WriteShared(32, value, addr_offset); + + SetDst(inst.dst[0], value); } void Translator::S_BARRIER() { diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index c4c067ad5..56225d8f9 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -286,6 +286,42 @@ void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) } } +U32U64 IREmitter::SharedAtomicIAdd(const U32U64& a, const U32U64& b) { + if (a.Type() != b.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type()); + } + switch (a.Type()) { + case Type::U32: + return Inst(Opcode::SharedAtomicIAdd32, a, b); + default: + ThrowInvalidType(a.Type()); + } +} + +U32 IREmitter::SharedAtomicSMin(const U32& a, const U32& b) { + return Inst(Opcode::SharedAtomicSMin32, a, b); +} + +U32 IREmitter::SharedAtomicUMin(const U32& a, const U32& b) { + return Inst(Opcode::SharedAtomicUMin32, a, b); +} + +U32 IREmitter::SharedAtomicIMin(const U32& a, const U32& b, bool is_signed) { + return is_signed ? SharedAtomicSMin(a, b) : SharedAtomicUMin(a, b); +} + +U32 IREmitter::SharedAtomicSMax(const U32& a, const U32& b) { + return Inst(Opcode::SharedAtomicSMax32, a, b); +} + +U32 IREmitter::SharedAtomicUMax(const U32& a, const U32& b) { + return Inst(Opcode::SharedAtomicUMax32, a, b); +} + +U32 IREmitter::SharedAtomicIMax(const U32& a, const U32& b, bool is_signed) { + return is_signed ? SharedAtomicSMax(a, b) : SharedAtomicUMax(a, b); +} + U32 IREmitter::ReadConst(const Value& base, const U32& offset) { return Inst(Opcode::ReadConst, base, offset); } diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 5aa80ae30..85fbf4a41 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -84,6 +84,14 @@ public: [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset); void WriteShared(int bit_size, const Value& value, const U32& offset); + [[nodiscard]] U32U64 SharedAtomicIAdd(const U32U64& a, const U32U64& b); + [[nodiscard]] U32 SharedAtomicSMin(const U32& a, const U32& b); + [[nodiscard]] U32 SharedAtomicUMin(const U32& a, const U32& b); + [[nodiscard]] U32 SharedAtomicIMin(const U32& a, const U32& b, bool is_signed); + [[nodiscard]] U32 SharedAtomicSMax(const U32& a, const U32& b); + [[nodiscard]] U32 SharedAtomicUMax(const U32& a, const U32& b); + [[nodiscard]] U32 SharedAtomicIMax(const U32& a, const U32& b, bool is_signed); + [[nodiscard]] U32 ReadConst(const Value& base, const U32& offset); [[nodiscard]] F32 ReadConstBuffer(const Value& handle, const U32& index); @@ -94,20 +102,20 @@ public: void StoreBuffer(int num_dwords, const Value& handle, const Value& address, const Value& data, BufferInstInfo info); - [[nodiscard]] Value BufferAtomicIAdd(const Value& handle, const Value& address, - const Value& value, BufferInstInfo info); - [[nodiscard]] Value BufferAtomicSMin(const Value& handle, const Value& address, - const Value& value, BufferInstInfo info); - [[nodiscard]] Value BufferAtomicUMin(const Value& handle, const Value& address, - const Value& value, BufferInstInfo info); - [[nodiscard]] Value BufferAtomicIMin(const Value& handle, const Value& address, - const Value& value, bool is_signed, BufferInstInfo info); - [[nodiscard]] Value BufferAtomicSMax(const Value& handle, const Value& address, - const Value& value, BufferInstInfo info); - [[nodiscard]] Value BufferAtomicUMax(const Value& handle, const Value& address, - const Value& value, BufferInstInfo info); - [[nodiscard]] Value BufferAtomicIMax(const Value& handle, const Value& address, - const Value& value, bool is_signed, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicIAdd(const Value& handle, const Value& a, + const Value& b, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicSMin(const Value& handle, const Value& a, + const Value& b, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicUMin(const Value& handle, const Value& a, + const Value& b, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicIMin(const Value& handle, const Value& a, + const Value& b, bool is_signed, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicSMax(const Value& handle, const Value& a, + const Value& b, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicUMax(const Value& handle, const Value& a, + const Value& b, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicIMax(const Value& handle, const Value& a, + const Value& b, bool is_signed, BufferInstInfo info); [[nodiscard]] Value BufferAtomicInc(const Value& handle, const Value& address, const Value& value, BufferInstInfo info); [[nodiscard]] Value BufferAtomicDec(const Value& handle, const Value& address, diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 14921b16f..8898490f3 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -33,6 +33,13 @@ OPCODE(WriteSharedU32, Void, U32, OPCODE(WriteSharedU64, Void, U32, U32x2, ) OPCODE(WriteSharedU128, Void, U32, U32x4, ) +// Shared atomic operations +OPCODE(SharedAtomicIAdd32, U32, U32, U32, ) +OPCODE(SharedAtomicSMin32, U32, U32, U32, ) +OPCODE(SharedAtomicUMin32, U32, U32, U32, ) +OPCODE(SharedAtomicSMax32, U32, U32, U32, ) +OPCODE(SharedAtomicUMax32, U32, U32, U32, ) + // Context getters/setters OPCODE(GetUserData, U32, ScalarReg, ) OPCODE(GetThreadBitScalarReg, U1, ScalarReg, )