diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 13fd8e180..47290e7e8 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -27,6 +27,19 @@ Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value, }); } +Id SharedAtomicU32IncDec(EmitContext& ctx, Id offset, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id)) { + const Id shift_id{ctx.ConstU32(2U)}; + const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)}; + const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)}; + const Id pointer{ + ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index)}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] { + return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics); + }); +} + Id SharedAtomicU64(EmitContext& ctx, Id offset, Id value, Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { const Id shift_id{ctx.ConstU32(3U)}; @@ -40,19 +53,6 @@ Id SharedAtomicU64(EmitContext& ctx, Id offset, Id value, }); } -Id SharedAtomicU32_IncDec(EmitContext& ctx, Id offset, - Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id)) { - const Id shift_id{ctx.ConstU32(2U)}; - const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)}; - const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)}; - const Id pointer{ - ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index)}; - const auto [scope, semantics]{AtomicArgs(ctx)}; - return AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] { - return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics); - }); -} - Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value, Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { const auto& buffer = ctx.buffers[handle]; @@ -68,6 +68,21 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id }); } +Id BufferAtomicU32IncDec(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id)) { + const auto& buffer = ctx.buffers[handle]; + if (Sirit::ValidId(buffer.offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); + } + const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); + const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32]; + const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index); + const auto [scope, semantics]{AtomicArgs(ctx)}; + return AccessBoundsCheck<32>(ctx, index, buffer.size_dwords, [&] { + return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics); + }); +} + Id BufferAtomicU32CmpSwap(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value, Id cmp_value, Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id, Id, Id)) { @@ -156,12 +171,12 @@ Id EmitSharedAtomicISub32(EmitContext& ctx, Id offset, Id value) { return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicISub); } -Id EmitSharedAtomicIIncrement32(EmitContext& ctx, Id offset) { - return SharedAtomicU32_IncDec(ctx, offset, &Sirit::Module::OpAtomicIIncrement); +Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset) { + return SharedAtomicU32IncDec(ctx, offset, &Sirit::Module::OpAtomicIIncrement); } -Id EmitSharedAtomicIDecrement32(EmitContext& ctx, Id offset) { - return SharedAtomicU32_IncDec(ctx, offset, &Sirit::Module::OpAtomicIDecrement); +Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset) { + return SharedAtomicU32IncDec(ctx, offset, &Sirit::Module::OpAtomicIDecrement); } Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { @@ -172,6 +187,10 @@ Id EmitBufferAtomicIAdd64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre return BufferAtomicU64(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd); } +Id EmitBufferAtomicISub32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicISub); +} + Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMin); } @@ -188,14 +207,12 @@ Id EmitBufferAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMax); } -Id EmitBufferAtomicInc32(EmitContext&, IR::Inst*, u32, Id, Id) { - // TODO - UNREACHABLE_MSG("Unsupported BUFFER_ATOMIC opcode: ", IR::Opcode::BufferAtomicInc32); +Id EmitBufferAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { + return BufferAtomicU32IncDec(ctx, inst, handle, address, &Sirit::Module::OpAtomicIIncrement); } -Id EmitBufferAtomicDec32(EmitContext&, IR::Inst*, u32, Id, Id) { - // TODO - UNREACHABLE_MSG("Unsupported BUFFER_ATOMIC opcode: ", IR::Opcode::BufferAtomicDec32); +Id EmitBufferAtomicDec32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { + return BufferAtomicU32IncDec(ctx, inst, handle, address, &Sirit::Module::OpAtomicIDecrement); } Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 3441c5a23..d0060a40d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -87,12 +87,13 @@ void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicIAdd64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicISub32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -Id EmitBufferAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -Id EmitBufferAtomicDec32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitBufferAtomicDec32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); @@ -136,8 +137,8 @@ Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value); -Id EmitSharedAtomicIIncrement32(EmitContext& ctx, Id offset); -Id EmitSharedAtomicIDecrement32(EmitContext& ctx, Id offset); +Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset); +Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset); Id EmitSharedAtomicISub32(EmitContext& ctx, Id offset, Id value); Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp index c59406499..9b005bc7e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp @@ -13,10 +13,10 @@ Id EmitLoadSharedU16(EmitContext& ctx, Id offset) { const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)}; const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 2u)}; - return AccessBoundsCheck<16>(ctx, index, ctx.ConstU32(num_elements), [&] { + return AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] { const Id pointer = ctx.OpAccessChain(ctx.shared_u16, ctx.shared_memory_u16, ctx.u32_zero_value, index); - return ctx.OpLoad(ctx.U16, pointer); + return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); }); } @@ -40,7 +40,7 @@ Id EmitLoadSharedU64(EmitContext& ctx, Id offset) { return AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] { const Id pointer{ ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)}; - return ctx.OpLoad(ctx.U64, pointer); + return ctx.OpBitcast(ctx.U32[2], ctx.OpLoad(ctx.U64, pointer)); }); } @@ -52,7 +52,7 @@ void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) { AccessBoundsCheck<16>(ctx, index, ctx.ConstU32(num_elements), [&] { const Id pointer = ctx.OpAccessChain(ctx.shared_u16, ctx.shared_memory_u16, ctx.u32_zero_value, index); - ctx.OpStore(pointer, value); + ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value)); return Id{0}; }); } @@ -78,7 +78,7 @@ void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) { AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] { const Id pointer{ ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)}; - ctx.OpStore(pointer, value); + ctx.OpStore(pointer, ctx.OpBitcast(ctx.U64, value)); return Id{0}; }); } diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index 4b6a58fd0..c6eea48a0 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -216,31 +216,26 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool strid if (is_pair) { const u32 adj = (bit_size == 32 ? 4 : 8) * (stride64 ? 64 : 1); const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0 * adj))); - if (bit_size == 32) { - ir.WriteShared(32, ir.GetVectorReg(data0), addr0); + if (bit_size == 64) { + ir.WriteShared( + 64, ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)), + addr0); } else { - ir.WriteShared(64, - ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data0), - ir.GetVectorReg(data0 + 1))), - addr0); + ir.WriteShared(32, ir.GetVectorReg(data0), addr0); } const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj))); - if (bit_size == 32) { - ir.WriteShared(32, ir.GetVectorReg(data1), addr1); + if (bit_size == 64) { + ir.WriteShared( + 64, ir.CompositeConstruct(ir.GetVectorReg(data1), ir.GetVectorReg(data1 + 1)), + addr1); } else { - ir.WriteShared(64, - ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data1), - ir.GetVectorReg(data1 + 1))), - addr1); + ir.WriteShared(32, ir.GetVectorReg(data1), addr1); } } else if (bit_size == 64) { const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset)); const IR::Value data = ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)); - ir.WriteShared(bit_size, ir.PackUint2x32(data), addr0); - } else if (bit_size == 16) { - const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset)); - ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0); + ir.WriteShared(bit_size, data, addr0); } else { const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset)); ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0); @@ -264,7 +259,7 @@ void Translator::DS_INC_U32(const GcnInst& inst, bool rtn) { const IR::U32 offset = ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)); const IR::U32 addr_offset = ir.IAdd(addr, offset); - const IR::Value original_val = ir.SharedAtomicIIncrement(addr_offset); + const IR::Value original_val = ir.SharedAtomicInc(addr_offset); if (rtn) { SetDst(inst.dst[0], IR::U32{original_val}); } @@ -275,7 +270,7 @@ void Translator::DS_DEC_U32(const GcnInst& inst, bool rtn) { const IR::U32 offset = ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)); const IR::U32 addr_offset = ir.IAdd(addr, offset); - const IR::Value original_val = ir.SharedAtomicIDecrement(addr_offset); + const IR::Value original_val = ir.SharedAtomicDec(addr_offset); if (rtn) { SetDst(inst.dst[0], IR::U32{original_val}); } @@ -309,32 +304,25 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride const u32 adj = (bit_size == 32 ? 4 : 8) * (stride64 ? 64 : 1); const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0 * adj))); const IR::Value data0 = ir.LoadShared(bit_size, is_signed, addr0); - if (bit_size == 32) { - ir.SetVectorReg(dst_reg++, IR::U32{data0}); + if (bit_size == 64) { + ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data0, 0)}); + ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data0, 1)}); } else { - const auto vector = ir.UnpackUint2x32(IR::U64{data0}); - ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 0)}); - ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 1)}); + ir.SetVectorReg(dst_reg++, IR::U32{data0}); } const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj))); const IR::Value data1 = ir.LoadShared(bit_size, is_signed, addr1); - if (bit_size == 32) { - ir.SetVectorReg(dst_reg++, IR::U32{data1}); + if (bit_size == 64) { + ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data1, 0)}); + ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data1, 1)}); } else { - const auto vector = ir.UnpackUint2x32(IR::U64{data1}); - ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 0)}); - ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 1)}); + ir.SetVectorReg(dst_reg++, IR::U32{data1}); } } else if (bit_size == 64) { const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset)); const IR::Value data = ir.LoadShared(bit_size, is_signed, addr0); - const auto vector = ir.UnpackUint2x32(IR::U64{data}); - ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(vector, 0)}); - ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(vector, 1)}); - } else if (bit_size == 16) { - const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset)); - const IR::U16 data = IR::U16{ir.LoadShared(bit_size, is_signed, addr0)}; - ir.SetVectorReg(dst_reg, ir.UConvert(32, data)); + ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(data, 0)}); + ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(data, 1)}); } else { const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset)); const IR::U32 data = IR::U32{ir.LoadShared(bit_size, is_signed, addr0)}; diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 5eb2079a4..54e8b8ee8 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -354,9 +354,9 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) { case AtomicOp::Xor: return ir.BufferAtomicXor(handle, address, vdata_val, buffer_info); case AtomicOp::Inc: - return ir.BufferAtomicInc(handle, address, vdata_val, buffer_info); + return ir.BufferAtomicInc(handle, address, buffer_info); case AtomicOp::Dec: - return ir.BufferAtomicDec(handle, address, vdata_val, buffer_info); + return ir.BufferAtomicDec(handle, address, buffer_info); default: UNREACHABLE(); } diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 2c37c8099..3d9b62e43 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -294,11 +294,11 @@ void IREmitter::SetPatch(Patch patch, const F32& value) { Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) { switch (bit_size) { case 16: - return Inst(Opcode::LoadSharedU16, offset); + return Inst(Opcode::LoadSharedU16, offset); case 32: return Inst(Opcode::LoadSharedU32, offset); case 64: - return Inst(Opcode::LoadSharedU64, offset); + return Inst(Opcode::LoadSharedU64, offset); default: UNREACHABLE_MSG("Invalid bit size {}", bit_size); } @@ -353,12 +353,12 @@ U32 IREmitter::SharedAtomicXor(const U32& address, const U32& data) { return Inst(Opcode::SharedAtomicXor32, address, data); } -U32 IREmitter::SharedAtomicIIncrement(const U32& address) { - return Inst(Opcode::SharedAtomicIIncrement32, address); +U32 IREmitter::SharedAtomicInc(const U32& address) { + return Inst(Opcode::SharedAtomicInc32, address); } -U32 IREmitter::SharedAtomicIDecrement(const U32& address) { - return Inst(Opcode::SharedAtomicIDecrement32, address); +U32 IREmitter::SharedAtomicDec(const U32& address) { + return Inst(Opcode::SharedAtomicDec32, address); } U32 IREmitter::SharedAtomicISub(const U32& address, const U32& data) { @@ -474,7 +474,19 @@ void IREmitter::StoreBufferFormat(const Value& handle, const Value& address, con Value IREmitter::BufferAtomicIAdd(const Value& handle, const Value& address, const Value& value, BufferInstInfo info) { - return Inst(Opcode::BufferAtomicIAdd32, Flags{info}, handle, address, value); + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::BufferAtomicIAdd32, Flags{info}, handle, address, value); + case Type::U64: + return Inst(Opcode::BufferAtomicIAdd64, Flags{info}, handle, address, value); + default: + ThrowInvalidType(value.Type()); + } +} + +Value IREmitter::BufferAtomicISub(const Value& handle, const Value& address, const Value& value, + BufferInstInfo info) { + return Inst(Opcode::BufferAtomicISub32, Flags{info}, handle, address, value); } Value IREmitter::BufferAtomicIMin(const Value& handle, const Value& address, const Value& value, @@ -489,14 +501,12 @@ Value IREmitter::BufferAtomicIMax(const Value& handle, const Value& address, con : Inst(Opcode::BufferAtomicUMax32, Flags{info}, handle, address, value); } -Value IREmitter::BufferAtomicInc(const Value& handle, const Value& address, const Value& value, - BufferInstInfo info) { - return Inst(Opcode::BufferAtomicInc32, Flags{info}, handle, address, value); +Value IREmitter::BufferAtomicInc(const Value& handle, const Value& address, BufferInstInfo info) { + return Inst(Opcode::BufferAtomicInc32, Flags{info}, handle, address); } -Value IREmitter::BufferAtomicDec(const Value& handle, const Value& address, const Value& value, - BufferInstInfo info) { - return Inst(Opcode::BufferAtomicDec32, Flags{info}, handle, address, value); +Value IREmitter::BufferAtomicDec(const Value& handle, const Value& address, BufferInstInfo info) { + return Inst(Opcode::BufferAtomicDec32, Flags{info}, handle, address); } Value IREmitter::BufferAtomicAnd(const Value& handle, const Value& address, const Value& value, diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index eae44ed04..b8853c065 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -100,16 +100,15 @@ public: void WriteShared(int bit_size, const Value& value, const U32& offset); [[nodiscard]] U32U64 SharedAtomicIAdd(const U32& address, const U32U64& data); + [[nodiscard]] U32 SharedAtomicISub(const U32& address, const U32& data); [[nodiscard]] U32 SharedAtomicIMin(const U32& address, const U32& data, bool is_signed); [[nodiscard]] U32 SharedAtomicIMax(const U32& address, const U32& data, bool is_signed); + [[nodiscard]] U32 SharedAtomicInc(const U32& address); + [[nodiscard]] U32 SharedAtomicDec(const U32& address); [[nodiscard]] U32 SharedAtomicAnd(const U32& address, const U32& data); [[nodiscard]] U32 SharedAtomicOr(const U32& address, const U32& data); [[nodiscard]] U32 SharedAtomicXor(const U32& address, const U32& data); - [[nodiscard]] U32 SharedAtomicIIncrement(const U32& address); - [[nodiscard]] U32 SharedAtomicIDecrement(const U32& address); - [[nodiscard]] U32 SharedAtomicISub(const U32& address, const U32& data); - [[nodiscard]] U32 ReadConst(const Value& base, const U32& offset); [[nodiscard]] U32 ReadConstBuffer(const Value& handle, const U32& index); @@ -134,14 +133,16 @@ public: [[nodiscard]] Value BufferAtomicIAdd(const Value& handle, const Value& address, const Value& value, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicISub(const Value& handle, const Value& address, + const Value& value, BufferInstInfo info); [[nodiscard]] Value BufferAtomicIMin(const Value& handle, const Value& address, const Value& value, bool is_signed, BufferInstInfo info); [[nodiscard]] Value BufferAtomicIMax(const Value& handle, const Value& address, const Value& value, bool is_signed, BufferInstInfo info); [[nodiscard]] Value BufferAtomicInc(const Value& handle, const Value& address, - const Value& value, BufferInstInfo info); + BufferInstInfo info); [[nodiscard]] Value BufferAtomicDec(const Value& handle, const Value& address, - const Value& value, BufferInstInfo info); + BufferInstInfo info); [[nodiscard]] Value BufferAtomicAnd(const Value& handle, const Value& address, const Value& value, BufferInstInfo info); [[nodiscard]] Value BufferAtomicOr(const Value& handle, const Value& address, diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index a57310fb9..f55c2b378 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -66,6 +66,8 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::StoreBufferF32x4: case Opcode::StoreBufferFormatF32: case Opcode::BufferAtomicIAdd32: + case Opcode::BufferAtomicIAdd64: + case Opcode::BufferAtomicISub32: case Opcode::BufferAtomicSMin32: case Opcode::BufferAtomicUMin32: case Opcode::BufferAtomicSMax32: @@ -76,15 +78,21 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::BufferAtomicOr32: case Opcode::BufferAtomicXor32: case Opcode::BufferAtomicSwap32: + case Opcode::BufferAtomicCmpSwap32: case Opcode::DataAppend: case Opcode::DataConsume: - case Opcode::WriteSharedU64: + case Opcode::WriteSharedU16: case Opcode::WriteSharedU32: + case Opcode::WriteSharedU64: case Opcode::SharedAtomicIAdd32: + case Opcode::SharedAtomicIAdd64: + case Opcode::SharedAtomicISub32: case Opcode::SharedAtomicSMin32: case Opcode::SharedAtomicUMin32: case Opcode::SharedAtomicSMax32: case Opcode::SharedAtomicUMax32: + case Opcode::SharedAtomicInc32: + case Opcode::SharedAtomicDec32: case Opcode::SharedAtomicAnd32: case Opcode::SharedAtomicOr32: case Opcode::SharedAtomicXor32: diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index e96e32297..79c0a9431 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -30,26 +30,26 @@ OPCODE(EmitVertex, Void, OPCODE(EmitPrimitive, Void, ) // Shared memory operations -OPCODE(LoadSharedU16, U16, U32, ) +OPCODE(LoadSharedU16, U32, U32, ) OPCODE(LoadSharedU32, U32, U32, ) -OPCODE(LoadSharedU64, U64, U32, ) -OPCODE(WriteSharedU16, Void, U32, U16, ) +OPCODE(LoadSharedU64, U32x2, U32, ) +OPCODE(WriteSharedU16, Void, U32, U32, ) OPCODE(WriteSharedU32, Void, U32, U32, ) -OPCODE(WriteSharedU64, Void, U32, U64, ) +OPCODE(WriteSharedU64, Void, U32, U32x2, ) // Shared atomic operations OPCODE(SharedAtomicIAdd32, U32, U32, U32, ) OPCODE(SharedAtomicIAdd64, U64, U32, U64, ) +OPCODE(SharedAtomicISub32, U32, U32, U32, ) OPCODE(SharedAtomicSMin32, U32, U32, U32, ) OPCODE(SharedAtomicUMin32, U32, U32, U32, ) OPCODE(SharedAtomicSMax32, U32, U32, U32, ) OPCODE(SharedAtomicUMax32, U32, U32, U32, ) +OPCODE(SharedAtomicInc32, U32, U32, ) +OPCODE(SharedAtomicDec32, U32, U32, ) OPCODE(SharedAtomicAnd32, U32, U32, U32, ) OPCODE(SharedAtomicOr32, U32, U32, U32, ) OPCODE(SharedAtomicXor32, U32, U32, U32, ) -OPCODE(SharedAtomicISub32, U32, U32, U32, ) -OPCODE(SharedAtomicIIncrement32, U32, U32, ) -OPCODE(SharedAtomicIDecrement32, U32, U32, ) // Context getters/setters OPCODE(GetUserData, U32, ScalarReg, ) @@ -120,12 +120,13 @@ OPCODE(StoreBufferFormatF32, Void, Opaq // Buffer atomic operations OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicIAdd64, U64, Opaque, Opaque, U64 ) +OPCODE(BufferAtomicISub32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicUMax32, U32, Opaque, Opaque, U32 ) -OPCODE(BufferAtomicInc32, U32, Opaque, Opaque, U32, ) -OPCODE(BufferAtomicDec32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicInc32, U32, Opaque, Opaque, ) +OPCODE(BufferAtomicDec32, U32, Opaque, Opaque, ) OPCODE(BufferAtomicAnd32, U32, Opaque, Opaque, U32, ) OPCODE(BufferAtomicOr32, U32, Opaque, Opaque, U32, ) OPCODE(BufferAtomicXor32, U32, Opaque, Opaque, U32, ) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 18c77e600..bc6ef3711 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -17,6 +17,8 @@ using SharpLocation = u32; bool IsBufferAtomic(const IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::BufferAtomicIAdd32: + case IR::Opcode::BufferAtomicIAdd64: + case IR::Opcode::BufferAtomicISub32: case IR::Opcode::BufferAtomicSMin32: case IR::Opcode::BufferAtomicUMin32: case IR::Opcode::BufferAtomicSMax32: @@ -27,6 +29,7 @@ bool IsBufferAtomic(const IR::Inst& inst) { case IR::Opcode::BufferAtomicOr32: case IR::Opcode::BufferAtomicXor32: case IR::Opcode::BufferAtomicSwap32: + case IR::Opcode::BufferAtomicCmpSwap32: return true; default: return false; diff --git a/src/shader_recompiler/ir/passes/ring_access_elimination.cpp b/src/shader_recompiler/ir/passes/ring_access_elimination.cpp index b292b41b9..02745bf9a 100644 --- a/src/shader_recompiler/ir/passes/ring_access_elimination.cpp +++ b/src/shader_recompiler/ir/passes/ring_access_elimination.cpp @@ -39,13 +39,11 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim ASSERT(addr->Arg(1).IsImmediate()); offset = addr->Arg(1).U32(); } - IR::Value data = is_composite ? ir.UnpackUint2x32(IR::U64{inst.Arg(1).Resolve()}) - : inst.Arg(1).Resolve(); + IR::Value data = inst.Arg(1).Resolve(); for (s32 i = 0; i < num_components; i++) { const auto attrib = IR::Attribute::Param0 + (offset / 16); const auto comp = (offset / 4) % 4; - const IR::U32 value = - IR::U32{is_composite ? ir.CompositeExtract(data, i) : data}; + const IR::U32 value = IR::U32{is_composite ? data.Inst()->Arg(i) : data}; ir.SetAttribute(attrib, ir.BitCast(value), comp); offset += 4; } diff --git a/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp b/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp index baf6ad0d1..10d6a285c 100644 --- a/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp +++ b/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp @@ -9,12 +9,14 @@ namespace Shader::Optimization { static bool IsLoadShared(const IR::Inst& inst) { - return inst.GetOpcode() == IR::Opcode::LoadSharedU32 || + return inst.GetOpcode() == IR::Opcode::LoadSharedU16 || + inst.GetOpcode() == IR::Opcode::LoadSharedU32 || inst.GetOpcode() == IR::Opcode::LoadSharedU64; } static bool IsWriteShared(const IR::Inst& inst) { - return inst.GetOpcode() == IR::Opcode::WriteSharedU32 || + return inst.GetOpcode() == IR::Opcode::WriteSharedU16 || + inst.GetOpcode() == IR::Opcode::WriteSharedU32 || inst.GetOpcode() == IR::Opcode::WriteSharedU64; } diff --git a/src/shader_recompiler/ir/passes/shared_memory_to_storage_pass.cpp b/src/shader_recompiler/ir/passes/shared_memory_to_storage_pass.cpp index 12d4d0659..f1d4cbc63 100644 --- a/src/shader_recompiler/ir/passes/shared_memory_to_storage_pass.cpp +++ b/src/shader_recompiler/ir/passes/shared_memory_to_storage_pass.cpp @@ -10,18 +10,23 @@ namespace Shader::Optimization { static bool IsSharedAccess(const IR::Inst& inst) { const auto opcode = inst.GetOpcode(); switch (opcode) { + case IR::Opcode::LoadSharedU16: case IR::Opcode::LoadSharedU32: case IR::Opcode::LoadSharedU64: + case IR::Opcode::WriteSharedU16: case IR::Opcode::WriteSharedU32: case IR::Opcode::WriteSharedU64: - case IR::Opcode::SharedAtomicAnd32: case IR::Opcode::SharedAtomicIAdd32: case IR::Opcode::SharedAtomicIAdd64: - case IR::Opcode::SharedAtomicOr32: - case IR::Opcode::SharedAtomicSMax32: - case IR::Opcode::SharedAtomicUMax32: + case IR::Opcode::SharedAtomicISub32: case IR::Opcode::SharedAtomicSMin32: case IR::Opcode::SharedAtomicUMin32: + case IR::Opcode::SharedAtomicSMax32: + case IR::Opcode::SharedAtomicUMax32: + case IR::Opcode::SharedAtomicInc32: + case IR::Opcode::SharedAtomicDec32: + case IR::Opcode::SharedAtomicAnd32: + case IR::Opcode::SharedAtomicOr32: case IR::Opcode::SharedAtomicXor32: return true; default: @@ -56,46 +61,53 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_ } IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)}; const IR::U32 handle = ir.Imm32(binding); + const IR::U32 offset = ir.IMul(ir.GetAttributeU32(IR::Attribute::WorkgroupIndex), + ir.Imm32(shared_memory_size)); + const IR::U32 address = ir.IAdd(IR::U32{inst.Arg(0)}, offset); // Replace shared atomics first switch (inst.GetOpcode()) { - case IR::Opcode::SharedAtomicAnd32: - inst.ReplaceUsesWithAndRemove( - ir.BufferAtomicAnd(handle, inst.Arg(0), inst.Arg(1), {})); - continue; case IR::Opcode::SharedAtomicIAdd32: case IR::Opcode::SharedAtomicIAdd64: inst.ReplaceUsesWithAndRemove( - ir.BufferAtomicIAdd(handle, inst.Arg(0), inst.Arg(1), {})); + ir.BufferAtomicIAdd(handle, address, inst.Arg(1), {})); continue; - case IR::Opcode::SharedAtomicOr32: + case IR::Opcode::SharedAtomicISub32: inst.ReplaceUsesWithAndRemove( - ir.BufferAtomicOr(handle, inst.Arg(0), inst.Arg(1), {})); + ir.BufferAtomicISub(handle, address, inst.Arg(1), {})); continue; - case IR::Opcode::SharedAtomicSMax32: - case IR::Opcode::SharedAtomicUMax32: { - const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMax32; - inst.ReplaceUsesWithAndRemove( - ir.BufferAtomicIMax(handle, inst.Arg(0), inst.Arg(1), is_signed, {})); - continue; - } case IR::Opcode::SharedAtomicSMin32: case IR::Opcode::SharedAtomicUMin32: { const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMin32; inst.ReplaceUsesWithAndRemove( - ir.BufferAtomicIMin(handle, inst.Arg(0), inst.Arg(1), is_signed, {})); + ir.BufferAtomicIMin(handle, address, inst.Arg(1), is_signed, {})); continue; } - case IR::Opcode::SharedAtomicXor32: + case IR::Opcode::SharedAtomicSMax32: + case IR::Opcode::SharedAtomicUMax32: { + const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMax32; inst.ReplaceUsesWithAndRemove( - ir.BufferAtomicXor(handle, inst.Arg(0), inst.Arg(1), {})); + ir.BufferAtomicIMax(handle, address, inst.Arg(1), is_signed, {})); + continue; + } + case IR::Opcode::SharedAtomicInc32: + inst.ReplaceUsesWithAndRemove(ir.BufferAtomicInc(handle, address, {})); + continue; + case IR::Opcode::SharedAtomicDec32: + inst.ReplaceUsesWithAndRemove(ir.BufferAtomicDec(handle, address, {})); + continue; + case IR::Opcode::SharedAtomicAnd32: + inst.ReplaceUsesWithAndRemove(ir.BufferAtomicAnd(handle, address, inst.Arg(1), {})); + continue; + case IR::Opcode::SharedAtomicOr32: + inst.ReplaceUsesWithAndRemove(ir.BufferAtomicOr(handle, address, inst.Arg(1), {})); + continue; + case IR::Opcode::SharedAtomicXor32: + inst.ReplaceUsesWithAndRemove(ir.BufferAtomicXor(handle, address, inst.Arg(1), {})); continue; default: break; } // Replace shared operations. - const IR::U32 offset = ir.IMul(ir.GetAttributeU32(IR::Attribute::WorkgroupIndex), - ir.Imm32(shared_memory_size)); - const IR::U32 address = ir.IAdd(IR::U32{inst.Arg(0)}, offset); switch (inst.GetOpcode()) { case IR::Opcode::LoadSharedU16: inst.ReplaceUsesWithAndRemove(ir.LoadBufferU16(handle, address, {}));