diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index e0bc4b77d..18230413f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -12,6 +12,19 @@ std::pair AtomicArgs(EmitContext& ctx) { return {scope, semantics}; } + +Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { + auto& buffer = ctx.buffers[handle]; + address = + ctx.OpIAdd(ctx.U32[1], address, buffer.offset); + const Id index = + ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); + const Id pointer{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); +} + Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value, Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { const auto& texture = ctx.images[handle & 0xFFFF]; @@ -21,6 +34,52 @@ Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id va } } // Anonymous namespace +Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd); +} + +Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMin); +} + +Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMin); +} + +Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMax); +} + +Id EmitBufferAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMax); +} + +Id EmitBufferAtomicInc32(EmitContext&, IR::Inst*, u32, Id, Id) { + // TODO: This is not yet implemented + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBufferAtomicDec32(EmitContext&, IR::Inst*, u32, Id, Id) { + // TODO: This is not yet implemented + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicAnd); +} + +Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicOr); +} + +Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicXor); +} + +Id EmitBufferAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicExchange); +} + Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) { return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicIAdd); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 85c6eaac5..3d8cfcfc1 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -77,6 +77,17 @@ void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicDec32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp); Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp); diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index 532e024e2..497658ccd 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -25,6 +25,10 @@ void Translator::EmitDataShare(const GcnInst& inst) { return DS_WRITE(32, false, true, inst); case Opcode::DS_WRITE2_B64: return DS_WRITE(64, false, true, inst); + case Opcode::DS_MAX_U32: + return DS_MAX_U32(inst); + case Opcode::DS_MIN_U32: + return DS_MIN_U32(inst); default: LogMissingOpcode(inst); } @@ -110,6 +114,36 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnI } } +void Translator::DS_MAX_U32(const GcnInst& inst) { + const IR::U32 addr{GetSrc(inst.src[0])}; + const IR::U32 data{GetSrc(inst.src[1])}; + const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0)); + const IR::U32 addr_offset = ir.IAdd(addr, offset); + + const IR::U32 old_value = IR::U32(ir.LoadShared(32, false, addr_offset)); + const IR::U32 new_value = ir.UMax(old_value, data); + ir.WriteShared(32, new_value, addr_offset); + + if (inst.dst[0].type != ScalarType::Undefined) { + SetDst(inst.dst[0], old_value); + } +} + +void Translator::DS_MIN_U32(const GcnInst& inst) { + const IR::U32 addr{GetSrc(inst.src[0])}; + const IR::U32 data{GetSrc(inst.src[1])}; + const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0)); + const IR::U32 addr_offset = ir.IAdd(addr, offset); + + const IR::U32 old_value = IR::U32(ir.LoadShared(32, false, addr_offset)); + const IR::U32 new_value = ir.UMin(old_value, data); + ir.WriteShared(32, new_value, addr_offset); + + if (inst.dst[0].type != ScalarType::Undefined) { + SetDst(inst.dst[0], old_value); + } +} + void Translator::S_BARRIER() { ir.Barrier(); } diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 2e12209dc..08e7835a1 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -187,6 +187,7 @@ public: // Vector Memory void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst); void BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst); + void BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst); // Vector interpolation void V_INTERP_P2_F32(const GcnInst& inst); @@ -196,6 +197,8 @@ public: void DS_SWIZZLE_B32(const GcnInst& inst); void DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst); void DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst); + void DS_MAX_U32(const GcnInst& inst); + void DS_MIN_U32(const GcnInst& inst); void V_READFIRSTLANE_B32(const GcnInst& inst); void V_READLANE_B32(const GcnInst& inst); void V_WRITELANE_B32(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index bb202e426..b2fa86590 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -80,6 +80,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { case Opcode::BUFFER_STORE_FORMAT_X: case Opcode::BUFFER_STORE_DWORD: return BUFFER_STORE_FORMAT(1, false, inst); + case Opcode::BUFFER_STORE_FORMAT_XY: case Opcode::BUFFER_STORE_DWORDX2: return BUFFER_STORE_FORMAT(2, false, inst); case Opcode::BUFFER_STORE_DWORDX3: @@ -87,6 +88,8 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { case Opcode::BUFFER_STORE_FORMAT_XYZW: case Opcode::BUFFER_STORE_DWORDX4: return BUFFER_STORE_FORMAT(4, false, inst); + case Opcode::BUFFER_ATOMIC_ADD: + return BUFFER_ATOMIC(AtomicOp::Add, inst); default: LogMissingOpcode(inst); } @@ -413,6 +416,64 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnIns ir.StoreBuffer(num_dwords, handle, address, value, info); } +void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) { + const auto& mtbuf = inst.control.mtbuf; + IR::VectorReg src_reg{inst.src[1].code}; + IR::VectorReg addr_reg{inst.src[0].code}; + const IR::ScalarReg sharp{inst.src[2].code * 4}; + + const IR::Value address = [&]() -> IR::Value { + if (mtbuf.idxen && mtbuf.offen) { + return ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1)); + } + if (mtbuf.idxen || mtbuf.offen) { + return ir.GetVectorReg(addr_reg); + } + return IR::Value{}; + }(); + + IR::BufferInstInfo info{}; + info.index_enable.Assign(mtbuf.idxen); + info.offset_enable.Assign(mtbuf.offen); + info.inst_offset.Assign(mtbuf.offset); + + const IR::Value handle = + ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1), + ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3)); + const IR::Value value = ir.GetVectorReg(src_reg); + + const IR::Value result = [&] { + switch (op) { + case AtomicOp::Swap: + return ir.BufferAtomicExchange(handle, address, value, info); + case AtomicOp::Add: + return ir.BufferAtomicIAdd(handle, address, value, info); + case AtomicOp::Smin: + return ir.BufferAtomicIMin(handle, address, value, true, info); + case AtomicOp::Umin: + return ir.BufferAtomicUMin(handle, address, value, info); + case AtomicOp::Smax: + return ir.BufferAtomicIMax(handle, address, value, true, info); + case AtomicOp::Umax: + return ir.BufferAtomicUMax(handle, address, value, info); + case AtomicOp::And: + return ir.BufferAtomicAnd(handle, address, value, info); + case AtomicOp::Or: + return ir.BufferAtomicOr(handle, address, value, info); + case AtomicOp::Xor: + return ir.BufferAtomicXor(handle, address, value, info); + case AtomicOp::Inc: + return ir.BufferAtomicInc(handle, address, value, info); + case AtomicOp::Dec: + return ir.BufferAtomicDec(handle, address, value, info); + default: + UNREACHABLE(); + } + }(); + const IR::U32F32 c_result = static_cast(result); + ir.SetVectorReg(src_reg, c_result); +} + void Translator::IMAGE_GET_LOD(const GcnInst& inst) { const auto& mimg = inst.control.mimg; IR::VectorReg dst_reg{inst.dst[0].code}; diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 08b7fbbc0..628946dd6 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -357,6 +357,73 @@ void IREmitter::StoreBuffer(int num_dwords, const Value& handle, const Value& ad } } +Value IREmitter::BufferAtomicIAdd(const Value& handle, const Value& address, const Value& value, + BufferInstInfo info) { + return Inst(Opcode::BufferAtomicIAdd32, Flags{info}, handle, address, value); +} + +Value IREmitter::BufferAtomicSMin(const Value& handle, const Value& address, const Value& value, + BufferInstInfo info) { + return Inst(Opcode::BufferAtomicSMin32, Flags{info}, handle, address, value); +} + +Value IREmitter::BufferAtomicUMin(const Value& handle, const Value& address, const Value& value, + BufferInstInfo info) { + return Inst(Opcode::BufferAtomicUMin32, Flags{info}, handle, address, value); +} + +Value IREmitter::BufferAtomicIMin(const Value& handle, const Value& address, const Value& value, + bool is_signed, BufferInstInfo info) { + return is_signed ? BufferAtomicSMin(handle, address, value, info) + : BufferAtomicUMin(handle, address, value, info); +} + +Value IREmitter::BufferAtomicSMax(const Value& handle, const Value& address, const Value& value, + BufferInstInfo info) { + return Inst(Opcode::BufferAtomicSMax32, Flags{info}, handle, address, value); +} + +Value IREmitter::BufferAtomicUMax(const Value& handle, const Value& address, const Value& value, + BufferInstInfo info) { + return Inst(Opcode::BufferAtomicUMax32, Flags{info}, handle, address, value); +} + +Value IREmitter::BufferAtomicIMax(const Value& handle, const Value& address, const Value& value, + bool is_signed, BufferInstInfo info) { + return is_signed ? BufferAtomicSMax(handle, address, value, info) + : BufferAtomicUMax(handle, address, value, info); +} + +Value IREmitter::BufferAtomicInc(const Value& handle, const Value& address, const Value& value, + BufferInstInfo info) { + return Inst(Opcode::BufferAtomicInc32, Flags{info}, handle, address, value); +} + +Value IREmitter::BufferAtomicDec(const Value& handle, const Value& address, const Value& value, + BufferInstInfo info) { + return Inst(Opcode::BufferAtomicDec32, Flags{info}, handle, address, value); +} + +Value IREmitter::BufferAtomicAnd(const Value& handle, const Value& address, const Value& value, + BufferInstInfo info) { + return Inst(Opcode::BufferAtomicAnd32, Flags{info}, handle, address, value); +} + +Value IREmitter::BufferAtomicOr(const Value& handle, const Value& address, const Value& value, + BufferInstInfo info) { + return Inst(Opcode::BufferAtomicOr32, Flags{info}, handle, address, value); +} + +Value IREmitter::BufferAtomicXor(const Value& handle, const Value& address, const Value& value, + BufferInstInfo info) { + return Inst(Opcode::BufferAtomicXor32, Flags{info}, handle, address, value); +} + +Value IREmitter::BufferAtomicExchange(const Value& handle, const Value& address, const Value& value, + BufferInstInfo info) { + return Inst(Opcode::BufferAtomicExchange32, Flags{info}, handle, address, value); +} + U32 IREmitter::LaneId() { return Inst(Opcode::LaneId); } diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index fda206394..cc1022fbf 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -94,6 +94,34 @@ public: void StoreBuffer(int num_dwords, const Value& handle, const Value& address, const Value& data, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicIAdd(const Value& handle, const Value& address, + const Value& value, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicSMin(const Value& handle, const Value& address, + const Value& value, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicUMin(const Value& handle, const Value& address, + const Value& value, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicIMin(const Value& handle, const Value& address, + const Value& value, bool is_signed, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicSMax(const Value& handle, const Value& address, + const Value& value, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicUMax(const Value& handle, const Value& address, + const Value& value, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicIMax(const Value& handle, const Value& address, + const Value& value, bool is_signed, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicInc(const Value& handle, const Value& address, + const Value& value, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicDec(const Value& handle, const Value& address, + const Value& value, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicAnd(const Value& handle, const Value& address, + const Value& value, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicOr(const Value& handle, const Value& address, + const Value& value, + BufferInstInfo info); + [[nodiscard]] Value BufferAtomicXor(const Value& handle, const Value& address, + const Value& value, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicExchange(const Value& handle, const Value& address, + const Value& value, BufferInstInfo info); + [[nodiscard]] U32 LaneId(); [[nodiscard]] U32 WarpId(); [[nodiscard]] U32 QuadShuffle(const U32& value, const U32& index); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 46918bc39..b7c9d66ad 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -90,6 +90,19 @@ OPCODE(StoreBufferF32x3, Void, Opaq OPCODE(StoreBufferF32x4, Void, Opaque, Opaque, F32x4, ) OPCODE(StoreBufferU32, Void, Opaque, Opaque, U32, ) +// Buffer atomic operations +OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicUMax32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicInc32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicDec32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicAnd32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicOr32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicXor32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicExchange32, U32, Opaque, Opaque, U32, ) + // Vector utility OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, )