shader_recompiler: Various fixes to shared memory and atomics.

This commit is contained in:
squidbus 2025-06-09 21:43:48 -07:00
parent e2b726382e
commit 70613dd0df
13 changed files with 169 additions and 128 deletions

View File

@ -27,6 +27,19 @@ Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value,
}); });
} }
Id SharedAtomicU32IncDec(EmitContext& ctx, Id offset,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id)) {
const Id shift_id{ctx.ConstU32(2U)};
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)};
const Id pointer{
ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index)};
const auto [scope, semantics]{AtomicArgs(ctx)};
return AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] {
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics);
});
}
Id SharedAtomicU64(EmitContext& ctx, Id offset, Id value, Id SharedAtomicU64(EmitContext& ctx, Id offset, Id value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
const Id shift_id{ctx.ConstU32(3U)}; const Id shift_id{ctx.ConstU32(3U)};
@ -40,19 +53,6 @@ Id SharedAtomicU64(EmitContext& ctx, Id offset, Id value,
}); });
} }
Id SharedAtomicU32_IncDec(EmitContext& ctx, Id offset,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id)) {
const Id shift_id{ctx.ConstU32(2U)};
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)};
const Id pointer{
ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index)};
const auto [scope, semantics]{AtomicArgs(ctx)};
return AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] {
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics);
});
}
Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value, Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
const auto& buffer = ctx.buffers[handle]; const auto& buffer = ctx.buffers[handle];
@ -68,6 +68,21 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id
}); });
} }
Id BufferAtomicU32IncDec(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id)) {
const auto& buffer = ctx.buffers[handle];
if (Sirit::ValidId(buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
}
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
const auto [scope, semantics]{AtomicArgs(ctx)};
return AccessBoundsCheck<32>(ctx, index, buffer.size_dwords, [&] {
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics);
});
}
Id BufferAtomicU32CmpSwap(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value, Id BufferAtomicU32CmpSwap(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
Id cmp_value, Id cmp_value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id, Id, Id)) { Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id, Id, Id)) {
@ -156,12 +171,12 @@ Id EmitSharedAtomicISub32(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicISub); return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicISub);
} }
Id EmitSharedAtomicIIncrement32(EmitContext& ctx, Id offset) { Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset) {
return SharedAtomicU32_IncDec(ctx, offset, &Sirit::Module::OpAtomicIIncrement); return SharedAtomicU32IncDec(ctx, offset, &Sirit::Module::OpAtomicIIncrement);
} }
Id EmitSharedAtomicIDecrement32(EmitContext& ctx, Id offset) { Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset) {
return SharedAtomicU32_IncDec(ctx, offset, &Sirit::Module::OpAtomicIDecrement); return SharedAtomicU32IncDec(ctx, offset, &Sirit::Module::OpAtomicIDecrement);
} }
Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
@ -172,6 +187,10 @@ Id EmitBufferAtomicIAdd64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
return BufferAtomicU64(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd); return BufferAtomicU64(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd);
} }
Id EmitBufferAtomicISub32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicISub);
}
Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMin); return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMin);
} }
@ -188,14 +207,12 @@ Id EmitBufferAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMax); return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMax);
} }
Id EmitBufferAtomicInc32(EmitContext&, IR::Inst*, u32, Id, Id) { Id EmitBufferAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
// TODO return BufferAtomicU32IncDec(ctx, inst, handle, address, &Sirit::Module::OpAtomicIIncrement);
UNREACHABLE_MSG("Unsupported BUFFER_ATOMIC opcode: ", IR::Opcode::BufferAtomicInc32);
} }
Id EmitBufferAtomicDec32(EmitContext&, IR::Inst*, u32, Id, Id) { Id EmitBufferAtomicDec32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
// TODO return BufferAtomicU32IncDec(ctx, inst, handle, address, &Sirit::Module::OpAtomicIDecrement);
UNREACHABLE_MSG("Unsupported BUFFER_ATOMIC opcode: ", IR::Opcode::BufferAtomicDec32);
} }
Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {

View File

@ -87,12 +87,13 @@ void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicIAdd64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicIAdd64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicISub32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitBufferAtomicDec32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicDec32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
@ -136,8 +137,8 @@ Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicIIncrement32(EmitContext& ctx, Id offset); Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset);
Id EmitSharedAtomicIDecrement32(EmitContext& ctx, Id offset); Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset);
Id EmitSharedAtomicISub32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicISub32(EmitContext& ctx, Id offset, Id value);
Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2);

View File

@ -13,10 +13,10 @@ Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)}; const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 2u)}; const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 2u)};
return AccessBoundsCheck<16>(ctx, index, ctx.ConstU32(num_elements), [&] { return AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] {
const Id pointer = const Id pointer =
ctx.OpAccessChain(ctx.shared_u16, ctx.shared_memory_u16, ctx.u32_zero_value, index); ctx.OpAccessChain(ctx.shared_u16, ctx.shared_memory_u16, ctx.u32_zero_value, index);
return ctx.OpLoad(ctx.U16, pointer); return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
}); });
} }
@ -40,7 +40,7 @@ Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
return AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] { return AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] {
const Id pointer{ const Id pointer{
ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)}; ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)};
return ctx.OpLoad(ctx.U64, pointer); return ctx.OpBitcast(ctx.U32[2], ctx.OpLoad(ctx.U64, pointer));
}); });
} }
@ -52,7 +52,7 @@ void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) {
AccessBoundsCheck<16>(ctx, index, ctx.ConstU32(num_elements), [&] { AccessBoundsCheck<16>(ctx, index, ctx.ConstU32(num_elements), [&] {
const Id pointer = const Id pointer =
ctx.OpAccessChain(ctx.shared_u16, ctx.shared_memory_u16, ctx.u32_zero_value, index); ctx.OpAccessChain(ctx.shared_u16, ctx.shared_memory_u16, ctx.u32_zero_value, index);
ctx.OpStore(pointer, value); ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value));
return Id{0}; return Id{0};
}); });
} }
@ -78,7 +78,7 @@ void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] { AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] {
const Id pointer{ const Id pointer{
ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)}; ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)};
ctx.OpStore(pointer, value); ctx.OpStore(pointer, ctx.OpBitcast(ctx.U64, value));
return Id{0}; return Id{0};
}); });
} }

View File

@ -216,31 +216,26 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool strid
if (is_pair) { if (is_pair) {
const u32 adj = (bit_size == 32 ? 4 : 8) * (stride64 ? 64 : 1); const u32 adj = (bit_size == 32 ? 4 : 8) * (stride64 ? 64 : 1);
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0 * adj))); const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0 * adj)));
if (bit_size == 32) { if (bit_size == 64) {
ir.WriteShared(32, ir.GetVectorReg(data0), addr0); ir.WriteShared(
64, ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)),
addr0);
} else { } else {
ir.WriteShared(64, ir.WriteShared(32, ir.GetVectorReg(data0), addr0);
ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data0),
ir.GetVectorReg(data0 + 1))),
addr0);
} }
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj))); const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj)));
if (bit_size == 32) { if (bit_size == 64) {
ir.WriteShared(32, ir.GetVectorReg(data1), addr1); ir.WriteShared(
64, ir.CompositeConstruct(ir.GetVectorReg(data1), ir.GetVectorReg(data1 + 1)),
addr1);
} else { } else {
ir.WriteShared(64, ir.WriteShared(32, ir.GetVectorReg(data1), addr1);
ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data1),
ir.GetVectorReg(data1 + 1))),
addr1);
} }
} else if (bit_size == 64) { } else if (bit_size == 64) {
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset)); const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
const IR::Value data = const IR::Value data =
ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)); ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1));
ir.WriteShared(bit_size, ir.PackUint2x32(data), addr0); ir.WriteShared(bit_size, data, addr0);
} else if (bit_size == 16) {
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0);
} else { } else {
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset)); const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0); ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0);
@ -264,7 +259,7 @@ void Translator::DS_INC_U32(const GcnInst& inst, bool rtn) {
const IR::U32 offset = const IR::U32 offset =
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)); ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset); const IR::U32 addr_offset = ir.IAdd(addr, offset);
const IR::Value original_val = ir.SharedAtomicIIncrement(addr_offset); const IR::Value original_val = ir.SharedAtomicInc(addr_offset);
if (rtn) { if (rtn) {
SetDst(inst.dst[0], IR::U32{original_val}); SetDst(inst.dst[0], IR::U32{original_val});
} }
@ -275,7 +270,7 @@ void Translator::DS_DEC_U32(const GcnInst& inst, bool rtn) {
const IR::U32 offset = const IR::U32 offset =
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)); ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset); const IR::U32 addr_offset = ir.IAdd(addr, offset);
const IR::Value original_val = ir.SharedAtomicIDecrement(addr_offset); const IR::Value original_val = ir.SharedAtomicDec(addr_offset);
if (rtn) { if (rtn) {
SetDst(inst.dst[0], IR::U32{original_val}); SetDst(inst.dst[0], IR::U32{original_val});
} }
@ -309,32 +304,25 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride
const u32 adj = (bit_size == 32 ? 4 : 8) * (stride64 ? 64 : 1); const u32 adj = (bit_size == 32 ? 4 : 8) * (stride64 ? 64 : 1);
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0 * adj))); const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0 * adj)));
const IR::Value data0 = ir.LoadShared(bit_size, is_signed, addr0); const IR::Value data0 = ir.LoadShared(bit_size, is_signed, addr0);
if (bit_size == 32) { if (bit_size == 64) {
ir.SetVectorReg(dst_reg++, IR::U32{data0}); ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data0, 0)});
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data0, 1)});
} else { } else {
const auto vector = ir.UnpackUint2x32(IR::U64{data0}); ir.SetVectorReg(dst_reg++, IR::U32{data0});
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 0)});
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 1)});
} }
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj))); const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj)));
const IR::Value data1 = ir.LoadShared(bit_size, is_signed, addr1); const IR::Value data1 = ir.LoadShared(bit_size, is_signed, addr1);
if (bit_size == 32) { if (bit_size == 64) {
ir.SetVectorReg(dst_reg++, IR::U32{data1}); ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data1, 0)});
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data1, 1)});
} else { } else {
const auto vector = ir.UnpackUint2x32(IR::U64{data1}); ir.SetVectorReg(dst_reg++, IR::U32{data1});
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 0)});
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 1)});
} }
} else if (bit_size == 64) { } else if (bit_size == 64) {
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset)); const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
const IR::Value data = ir.LoadShared(bit_size, is_signed, addr0); const IR::Value data = ir.LoadShared(bit_size, is_signed, addr0);
const auto vector = ir.UnpackUint2x32(IR::U64{data}); ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(data, 0)});
ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(vector, 0)}); ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(data, 1)});
ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(vector, 1)});
} else if (bit_size == 16) {
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
const IR::U16 data = IR::U16{ir.LoadShared(bit_size, is_signed, addr0)};
ir.SetVectorReg(dst_reg, ir.UConvert(32, data));
} else { } else {
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset)); const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
const IR::U32 data = IR::U32{ir.LoadShared(bit_size, is_signed, addr0)}; const IR::U32 data = IR::U32{ir.LoadShared(bit_size, is_signed, addr0)};

View File

@ -354,9 +354,9 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
case AtomicOp::Xor: case AtomicOp::Xor:
return ir.BufferAtomicXor(handle, address, vdata_val, buffer_info); return ir.BufferAtomicXor(handle, address, vdata_val, buffer_info);
case AtomicOp::Inc: case AtomicOp::Inc:
return ir.BufferAtomicInc(handle, address, vdata_val, buffer_info); return ir.BufferAtomicInc(handle, address, buffer_info);
case AtomicOp::Dec: case AtomicOp::Dec:
return ir.BufferAtomicDec(handle, address, vdata_val, buffer_info); return ir.BufferAtomicDec(handle, address, buffer_info);
default: default:
UNREACHABLE(); UNREACHABLE();
} }

View File

@ -294,11 +294,11 @@ void IREmitter::SetPatch(Patch patch, const F32& value) {
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) { Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
switch (bit_size) { switch (bit_size) {
case 16: case 16:
return Inst<U16>(Opcode::LoadSharedU16, offset); return Inst<U32>(Opcode::LoadSharedU16, offset);
case 32: case 32:
return Inst<U32>(Opcode::LoadSharedU32, offset); return Inst<U32>(Opcode::LoadSharedU32, offset);
case 64: case 64:
return Inst<U64>(Opcode::LoadSharedU64, offset); return Inst(Opcode::LoadSharedU64, offset);
default: default:
UNREACHABLE_MSG("Invalid bit size {}", bit_size); UNREACHABLE_MSG("Invalid bit size {}", bit_size);
} }
@ -353,12 +353,12 @@ U32 IREmitter::SharedAtomicXor(const U32& address, const U32& data) {
return Inst<U32>(Opcode::SharedAtomicXor32, address, data); return Inst<U32>(Opcode::SharedAtomicXor32, address, data);
} }
U32 IREmitter::SharedAtomicIIncrement(const U32& address) { U32 IREmitter::SharedAtomicInc(const U32& address) {
return Inst<U32>(Opcode::SharedAtomicIIncrement32, address); return Inst<U32>(Opcode::SharedAtomicInc32, address);
} }
U32 IREmitter::SharedAtomicIDecrement(const U32& address) { U32 IREmitter::SharedAtomicDec(const U32& address) {
return Inst<U32>(Opcode::SharedAtomicIDecrement32, address); return Inst<U32>(Opcode::SharedAtomicDec32, address);
} }
U32 IREmitter::SharedAtomicISub(const U32& address, const U32& data) { U32 IREmitter::SharedAtomicISub(const U32& address, const U32& data) {
@ -474,7 +474,19 @@ void IREmitter::StoreBufferFormat(const Value& handle, const Value& address, con
Value IREmitter::BufferAtomicIAdd(const Value& handle, const Value& address, const Value& value, Value IREmitter::BufferAtomicIAdd(const Value& handle, const Value& address, const Value& value,
BufferInstInfo info) { BufferInstInfo info) {
return Inst(Opcode::BufferAtomicIAdd32, Flags{info}, handle, address, value); switch (value.Type()) {
case Type::U32:
return Inst(Opcode::BufferAtomicIAdd32, Flags{info}, handle, address, value);
case Type::U64:
return Inst(Opcode::BufferAtomicIAdd64, Flags{info}, handle, address, value);
default:
ThrowInvalidType(value.Type());
}
}
Value IREmitter::BufferAtomicISub(const Value& handle, const Value& address, const Value& value,
BufferInstInfo info) {
return Inst(Opcode::BufferAtomicISub32, Flags{info}, handle, address, value);
} }
Value IREmitter::BufferAtomicIMin(const Value& handle, const Value& address, const Value& value, Value IREmitter::BufferAtomicIMin(const Value& handle, const Value& address, const Value& value,
@ -489,14 +501,12 @@ Value IREmitter::BufferAtomicIMax(const Value& handle, const Value& address, con
: Inst(Opcode::BufferAtomicUMax32, Flags{info}, handle, address, value); : Inst(Opcode::BufferAtomicUMax32, Flags{info}, handle, address, value);
} }
Value IREmitter::BufferAtomicInc(const Value& handle, const Value& address, const Value& value, Value IREmitter::BufferAtomicInc(const Value& handle, const Value& address, BufferInstInfo info) {
BufferInstInfo info) { return Inst(Opcode::BufferAtomicInc32, Flags{info}, handle, address);
return Inst(Opcode::BufferAtomicInc32, Flags{info}, handle, address, value);
} }
Value IREmitter::BufferAtomicDec(const Value& handle, const Value& address, const Value& value, Value IREmitter::BufferAtomicDec(const Value& handle, const Value& address, BufferInstInfo info) {
BufferInstInfo info) { return Inst(Opcode::BufferAtomicDec32, Flags{info}, handle, address);
return Inst(Opcode::BufferAtomicDec32, Flags{info}, handle, address, value);
} }
Value IREmitter::BufferAtomicAnd(const Value& handle, const Value& address, const Value& value, Value IREmitter::BufferAtomicAnd(const Value& handle, const Value& address, const Value& value,

View File

@ -100,16 +100,15 @@ public:
void WriteShared(int bit_size, const Value& value, const U32& offset); void WriteShared(int bit_size, const Value& value, const U32& offset);
[[nodiscard]] U32U64 SharedAtomicIAdd(const U32& address, const U32U64& data); [[nodiscard]] U32U64 SharedAtomicIAdd(const U32& address, const U32U64& data);
[[nodiscard]] U32 SharedAtomicISub(const U32& address, const U32& data);
[[nodiscard]] U32 SharedAtomicIMin(const U32& address, const U32& data, bool is_signed); [[nodiscard]] U32 SharedAtomicIMin(const U32& address, const U32& data, bool is_signed);
[[nodiscard]] U32 SharedAtomicIMax(const U32& address, const U32& data, bool is_signed); [[nodiscard]] U32 SharedAtomicIMax(const U32& address, const U32& data, bool is_signed);
[[nodiscard]] U32 SharedAtomicInc(const U32& address);
[[nodiscard]] U32 SharedAtomicDec(const U32& address);
[[nodiscard]] U32 SharedAtomicAnd(const U32& address, const U32& data); [[nodiscard]] U32 SharedAtomicAnd(const U32& address, const U32& data);
[[nodiscard]] U32 SharedAtomicOr(const U32& address, const U32& data); [[nodiscard]] U32 SharedAtomicOr(const U32& address, const U32& data);
[[nodiscard]] U32 SharedAtomicXor(const U32& address, const U32& data); [[nodiscard]] U32 SharedAtomicXor(const U32& address, const U32& data);
[[nodiscard]] U32 SharedAtomicIIncrement(const U32& address);
[[nodiscard]] U32 SharedAtomicIDecrement(const U32& address);
[[nodiscard]] U32 SharedAtomicISub(const U32& address, const U32& data);
[[nodiscard]] U32 ReadConst(const Value& base, const U32& offset); [[nodiscard]] U32 ReadConst(const Value& base, const U32& offset);
[[nodiscard]] U32 ReadConstBuffer(const Value& handle, const U32& index); [[nodiscard]] U32 ReadConstBuffer(const Value& handle, const U32& index);
@ -134,14 +133,16 @@ public:
[[nodiscard]] Value BufferAtomicIAdd(const Value& handle, const Value& address, [[nodiscard]] Value BufferAtomicIAdd(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info); const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicISub(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicIMin(const Value& handle, const Value& address, [[nodiscard]] Value BufferAtomicIMin(const Value& handle, const Value& address,
const Value& value, bool is_signed, BufferInstInfo info); const Value& value, bool is_signed, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicIMax(const Value& handle, const Value& address, [[nodiscard]] Value BufferAtomicIMax(const Value& handle, const Value& address,
const Value& value, bool is_signed, BufferInstInfo info); const Value& value, bool is_signed, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicInc(const Value& handle, const Value& address, [[nodiscard]] Value BufferAtomicInc(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info); BufferInstInfo info);
[[nodiscard]] Value BufferAtomicDec(const Value& handle, const Value& address, [[nodiscard]] Value BufferAtomicDec(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info); BufferInstInfo info);
[[nodiscard]] Value BufferAtomicAnd(const Value& handle, const Value& address, [[nodiscard]] Value BufferAtomicAnd(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info); const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicOr(const Value& handle, const Value& address, [[nodiscard]] Value BufferAtomicOr(const Value& handle, const Value& address,

View File

@ -66,6 +66,8 @@ bool Inst::MayHaveSideEffects() const noexcept {
case Opcode::StoreBufferF32x4: case Opcode::StoreBufferF32x4:
case Opcode::StoreBufferFormatF32: case Opcode::StoreBufferFormatF32:
case Opcode::BufferAtomicIAdd32: case Opcode::BufferAtomicIAdd32:
case Opcode::BufferAtomicIAdd64:
case Opcode::BufferAtomicISub32:
case Opcode::BufferAtomicSMin32: case Opcode::BufferAtomicSMin32:
case Opcode::BufferAtomicUMin32: case Opcode::BufferAtomicUMin32:
case Opcode::BufferAtomicSMax32: case Opcode::BufferAtomicSMax32:
@ -76,15 +78,21 @@ bool Inst::MayHaveSideEffects() const noexcept {
case Opcode::BufferAtomicOr32: case Opcode::BufferAtomicOr32:
case Opcode::BufferAtomicXor32: case Opcode::BufferAtomicXor32:
case Opcode::BufferAtomicSwap32: case Opcode::BufferAtomicSwap32:
case Opcode::BufferAtomicCmpSwap32:
case Opcode::DataAppend: case Opcode::DataAppend:
case Opcode::DataConsume: case Opcode::DataConsume:
case Opcode::WriteSharedU64: case Opcode::WriteSharedU16:
case Opcode::WriteSharedU32: case Opcode::WriteSharedU32:
case Opcode::WriteSharedU64:
case Opcode::SharedAtomicIAdd32: case Opcode::SharedAtomicIAdd32:
case Opcode::SharedAtomicIAdd64:
case Opcode::SharedAtomicISub32:
case Opcode::SharedAtomicSMin32: case Opcode::SharedAtomicSMin32:
case Opcode::SharedAtomicUMin32: case Opcode::SharedAtomicUMin32:
case Opcode::SharedAtomicSMax32: case Opcode::SharedAtomicSMax32:
case Opcode::SharedAtomicUMax32: case Opcode::SharedAtomicUMax32:
case Opcode::SharedAtomicInc32:
case Opcode::SharedAtomicDec32:
case Opcode::SharedAtomicAnd32: case Opcode::SharedAtomicAnd32:
case Opcode::SharedAtomicOr32: case Opcode::SharedAtomicOr32:
case Opcode::SharedAtomicXor32: case Opcode::SharedAtomicXor32:

View File

@ -30,26 +30,26 @@ OPCODE(EmitVertex, Void,
OPCODE(EmitPrimitive, Void, ) OPCODE(EmitPrimitive, Void, )
// Shared memory operations // Shared memory operations
OPCODE(LoadSharedU16, U16, U32, ) OPCODE(LoadSharedU16, U32, U32, )
OPCODE(LoadSharedU32, U32, U32, ) OPCODE(LoadSharedU32, U32, U32, )
OPCODE(LoadSharedU64, U64, U32, ) OPCODE(LoadSharedU64, U32x2, U32, )
OPCODE(WriteSharedU16, Void, U32, U16, ) OPCODE(WriteSharedU16, Void, U32, U32, )
OPCODE(WriteSharedU32, Void, U32, U32, ) OPCODE(WriteSharedU32, Void, U32, U32, )
OPCODE(WriteSharedU64, Void, U32, U64, ) OPCODE(WriteSharedU64, Void, U32, U32x2, )
// Shared atomic operations // Shared atomic operations
OPCODE(SharedAtomicIAdd32, U32, U32, U32, ) OPCODE(SharedAtomicIAdd32, U32, U32, U32, )
OPCODE(SharedAtomicIAdd64, U64, U32, U64, ) OPCODE(SharedAtomicIAdd64, U64, U32, U64, )
OPCODE(SharedAtomicISub32, U32, U32, U32, )
OPCODE(SharedAtomicSMin32, U32, U32, U32, ) OPCODE(SharedAtomicSMin32, U32, U32, U32, )
OPCODE(SharedAtomicUMin32, U32, U32, U32, ) OPCODE(SharedAtomicUMin32, U32, U32, U32, )
OPCODE(SharedAtomicSMax32, U32, U32, U32, ) OPCODE(SharedAtomicSMax32, U32, U32, U32, )
OPCODE(SharedAtomicUMax32, U32, U32, U32, ) OPCODE(SharedAtomicUMax32, U32, U32, U32, )
OPCODE(SharedAtomicInc32, U32, U32, )
OPCODE(SharedAtomicDec32, U32, U32, )
OPCODE(SharedAtomicAnd32, U32, U32, U32, ) OPCODE(SharedAtomicAnd32, U32, U32, U32, )
OPCODE(SharedAtomicOr32, U32, U32, U32, ) OPCODE(SharedAtomicOr32, U32, U32, U32, )
OPCODE(SharedAtomicXor32, U32, U32, U32, ) OPCODE(SharedAtomicXor32, U32, U32, U32, )
OPCODE(SharedAtomicISub32, U32, U32, U32, )
OPCODE(SharedAtomicIIncrement32, U32, U32, )
OPCODE(SharedAtomicIDecrement32, U32, U32, )
// Context getters/setters // Context getters/setters
OPCODE(GetUserData, U32, ScalarReg, ) OPCODE(GetUserData, U32, ScalarReg, )
@ -120,12 +120,13 @@ OPCODE(StoreBufferFormatF32, Void, Opaq
// Buffer atomic operations // Buffer atomic operations
OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicIAdd64, U64, Opaque, Opaque, U64 ) OPCODE(BufferAtomicIAdd64, U64, Opaque, Opaque, U64 )
OPCODE(BufferAtomicISub32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicUMax32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicUMax32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicInc32, U32, Opaque, Opaque, U32, ) OPCODE(BufferAtomicInc32, U32, Opaque, Opaque, )
OPCODE(BufferAtomicDec32, U32, Opaque, Opaque, U32, ) OPCODE(BufferAtomicDec32, U32, Opaque, Opaque, )
OPCODE(BufferAtomicAnd32, U32, Opaque, Opaque, U32, ) OPCODE(BufferAtomicAnd32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicOr32, U32, Opaque, Opaque, U32, ) OPCODE(BufferAtomicOr32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicXor32, U32, Opaque, Opaque, U32, ) OPCODE(BufferAtomicXor32, U32, Opaque, Opaque, U32, )

View File

@ -17,6 +17,8 @@ using SharpLocation = u32;
bool IsBufferAtomic(const IR::Inst& inst) { bool IsBufferAtomic(const IR::Inst& inst) {
switch (inst.GetOpcode()) { switch (inst.GetOpcode()) {
case IR::Opcode::BufferAtomicIAdd32: case IR::Opcode::BufferAtomicIAdd32:
case IR::Opcode::BufferAtomicIAdd64:
case IR::Opcode::BufferAtomicISub32:
case IR::Opcode::BufferAtomicSMin32: case IR::Opcode::BufferAtomicSMin32:
case IR::Opcode::BufferAtomicUMin32: case IR::Opcode::BufferAtomicUMin32:
case IR::Opcode::BufferAtomicSMax32: case IR::Opcode::BufferAtomicSMax32:
@ -27,6 +29,7 @@ bool IsBufferAtomic(const IR::Inst& inst) {
case IR::Opcode::BufferAtomicOr32: case IR::Opcode::BufferAtomicOr32:
case IR::Opcode::BufferAtomicXor32: case IR::Opcode::BufferAtomicXor32:
case IR::Opcode::BufferAtomicSwap32: case IR::Opcode::BufferAtomicSwap32:
case IR::Opcode::BufferAtomicCmpSwap32:
return true; return true;
default: default:
return false; return false;

View File

@ -39,13 +39,11 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
ASSERT(addr->Arg(1).IsImmediate()); ASSERT(addr->Arg(1).IsImmediate());
offset = addr->Arg(1).U32(); offset = addr->Arg(1).U32();
} }
IR::Value data = is_composite ? ir.UnpackUint2x32(IR::U64{inst.Arg(1).Resolve()}) IR::Value data = inst.Arg(1).Resolve();
: inst.Arg(1).Resolve();
for (s32 i = 0; i < num_components; i++) { for (s32 i = 0; i < num_components; i++) {
const auto attrib = IR::Attribute::Param0 + (offset / 16); const auto attrib = IR::Attribute::Param0 + (offset / 16);
const auto comp = (offset / 4) % 4; const auto comp = (offset / 4) % 4;
const IR::U32 value = const IR::U32 value = IR::U32{is_composite ? data.Inst()->Arg(i) : data};
IR::U32{is_composite ? ir.CompositeExtract(data, i) : data};
ir.SetAttribute(attrib, ir.BitCast<IR::F32, IR::U32>(value), comp); ir.SetAttribute(attrib, ir.BitCast<IR::F32, IR::U32>(value), comp);
offset += 4; offset += 4;
} }

View File

@ -9,12 +9,14 @@
namespace Shader::Optimization { namespace Shader::Optimization {
static bool IsLoadShared(const IR::Inst& inst) { static bool IsLoadShared(const IR::Inst& inst) {
return inst.GetOpcode() == IR::Opcode::LoadSharedU32 || return inst.GetOpcode() == IR::Opcode::LoadSharedU16 ||
inst.GetOpcode() == IR::Opcode::LoadSharedU32 ||
inst.GetOpcode() == IR::Opcode::LoadSharedU64; inst.GetOpcode() == IR::Opcode::LoadSharedU64;
} }
static bool IsWriteShared(const IR::Inst& inst) { static bool IsWriteShared(const IR::Inst& inst) {
return inst.GetOpcode() == IR::Opcode::WriteSharedU32 || return inst.GetOpcode() == IR::Opcode::WriteSharedU16 ||
inst.GetOpcode() == IR::Opcode::WriteSharedU32 ||
inst.GetOpcode() == IR::Opcode::WriteSharedU64; inst.GetOpcode() == IR::Opcode::WriteSharedU64;
} }

View File

@ -10,18 +10,23 @@ namespace Shader::Optimization {
static bool IsSharedAccess(const IR::Inst& inst) { static bool IsSharedAccess(const IR::Inst& inst) {
const auto opcode = inst.GetOpcode(); const auto opcode = inst.GetOpcode();
switch (opcode) { switch (opcode) {
case IR::Opcode::LoadSharedU16:
case IR::Opcode::LoadSharedU32: case IR::Opcode::LoadSharedU32:
case IR::Opcode::LoadSharedU64: case IR::Opcode::LoadSharedU64:
case IR::Opcode::WriteSharedU16:
case IR::Opcode::WriteSharedU32: case IR::Opcode::WriteSharedU32:
case IR::Opcode::WriteSharedU64: case IR::Opcode::WriteSharedU64:
case IR::Opcode::SharedAtomicAnd32:
case IR::Opcode::SharedAtomicIAdd32: case IR::Opcode::SharedAtomicIAdd32:
case IR::Opcode::SharedAtomicIAdd64: case IR::Opcode::SharedAtomicIAdd64:
case IR::Opcode::SharedAtomicOr32: case IR::Opcode::SharedAtomicISub32:
case IR::Opcode::SharedAtomicSMax32:
case IR::Opcode::SharedAtomicUMax32:
case IR::Opcode::SharedAtomicSMin32: case IR::Opcode::SharedAtomicSMin32:
case IR::Opcode::SharedAtomicUMin32: case IR::Opcode::SharedAtomicUMin32:
case IR::Opcode::SharedAtomicSMax32:
case IR::Opcode::SharedAtomicUMax32:
case IR::Opcode::SharedAtomicInc32:
case IR::Opcode::SharedAtomicDec32:
case IR::Opcode::SharedAtomicAnd32:
case IR::Opcode::SharedAtomicOr32:
case IR::Opcode::SharedAtomicXor32: case IR::Opcode::SharedAtomicXor32:
return true; return true;
default: default:
@ -56,46 +61,53 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_
} }
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)}; IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
const IR::U32 handle = ir.Imm32(binding); const IR::U32 handle = ir.Imm32(binding);
const IR::U32 offset = ir.IMul(ir.GetAttributeU32(IR::Attribute::WorkgroupIndex),
ir.Imm32(shared_memory_size));
const IR::U32 address = ir.IAdd(IR::U32{inst.Arg(0)}, offset);
// Replace shared atomics first // Replace shared atomics first
switch (inst.GetOpcode()) { switch (inst.GetOpcode()) {
case IR::Opcode::SharedAtomicAnd32:
inst.ReplaceUsesWithAndRemove(
ir.BufferAtomicAnd(handle, inst.Arg(0), inst.Arg(1), {}));
continue;
case IR::Opcode::SharedAtomicIAdd32: case IR::Opcode::SharedAtomicIAdd32:
case IR::Opcode::SharedAtomicIAdd64: case IR::Opcode::SharedAtomicIAdd64:
inst.ReplaceUsesWithAndRemove( inst.ReplaceUsesWithAndRemove(
ir.BufferAtomicIAdd(handle, inst.Arg(0), inst.Arg(1), {})); ir.BufferAtomicIAdd(handle, address, inst.Arg(1), {}));
continue; continue;
case IR::Opcode::SharedAtomicOr32: case IR::Opcode::SharedAtomicISub32:
inst.ReplaceUsesWithAndRemove( inst.ReplaceUsesWithAndRemove(
ir.BufferAtomicOr(handle, inst.Arg(0), inst.Arg(1), {})); ir.BufferAtomicISub(handle, address, inst.Arg(1), {}));
continue; continue;
case IR::Opcode::SharedAtomicSMax32:
case IR::Opcode::SharedAtomicUMax32: {
const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMax32;
inst.ReplaceUsesWithAndRemove(
ir.BufferAtomicIMax(handle, inst.Arg(0), inst.Arg(1), is_signed, {}));
continue;
}
case IR::Opcode::SharedAtomicSMin32: case IR::Opcode::SharedAtomicSMin32:
case IR::Opcode::SharedAtomicUMin32: { case IR::Opcode::SharedAtomicUMin32: {
const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMin32; const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMin32;
inst.ReplaceUsesWithAndRemove( inst.ReplaceUsesWithAndRemove(
ir.BufferAtomicIMin(handle, inst.Arg(0), inst.Arg(1), is_signed, {})); ir.BufferAtomicIMin(handle, address, inst.Arg(1), is_signed, {}));
continue; continue;
} }
case IR::Opcode::SharedAtomicXor32: case IR::Opcode::SharedAtomicSMax32:
case IR::Opcode::SharedAtomicUMax32: {
const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMax32;
inst.ReplaceUsesWithAndRemove( inst.ReplaceUsesWithAndRemove(
ir.BufferAtomicXor(handle, inst.Arg(0), inst.Arg(1), {})); ir.BufferAtomicIMax(handle, address, inst.Arg(1), is_signed, {}));
continue;
}
case IR::Opcode::SharedAtomicInc32:
inst.ReplaceUsesWithAndRemove(ir.BufferAtomicInc(handle, address, {}));
continue;
case IR::Opcode::SharedAtomicDec32:
inst.ReplaceUsesWithAndRemove(ir.BufferAtomicDec(handle, address, {}));
continue;
case IR::Opcode::SharedAtomicAnd32:
inst.ReplaceUsesWithAndRemove(ir.BufferAtomicAnd(handle, address, inst.Arg(1), {}));
continue;
case IR::Opcode::SharedAtomicOr32:
inst.ReplaceUsesWithAndRemove(ir.BufferAtomicOr(handle, address, inst.Arg(1), {}));
continue;
case IR::Opcode::SharedAtomicXor32:
inst.ReplaceUsesWithAndRemove(ir.BufferAtomicXor(handle, address, inst.Arg(1), {}));
continue; continue;
default: default:
break; break;
} }
// Replace shared operations. // Replace shared operations.
const IR::U32 offset = ir.IMul(ir.GetAttributeU32(IR::Attribute::WorkgroupIndex),
ir.Imm32(shared_memory_size));
const IR::U32 address = ir.IAdd(IR::U32{inst.Arg(0)}, offset);
switch (inst.GetOpcode()) { switch (inst.GetOpcode()) {
case IR::Opcode::LoadSharedU16: case IR::Opcode::LoadSharedU16:
inst.ReplaceUsesWithAndRemove(ir.LoadBufferU16(handle, address, {})); inst.ReplaceUsesWithAndRemove(ir.LoadBufferU16(handle, address, {}));