Properly use float type for float buffer atomics (#3480)

Co-authored-by: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com>
This commit is contained in:
Stephen Miller
2025-08-29 19:18:10 -05:00
committed by GitHub
parent 6f26f66d77
commit 56626111ab
4 changed files with 15 additions and 6 deletions

View File

@@ -74,7 +74,7 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id
if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, offset);
}
const auto [id, pointer_type] = buffer.Alias(PointerType::U32);
const auto [id, pointer_type] = buffer.Alias(is_float ? PointerType::F32 : PointerType::U32);
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address);
const auto [scope, semantics]{AtomicArgs(ctx)};
return AccessBoundsCheck<32, 1, is_float>(ctx, address, buffer.Size(PointerSize::B32), [&] {

View File

@@ -113,9 +113,9 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
case Opcode::BUFFER_ATOMIC_DEC:
return BUFFER_ATOMIC(AtomicOp::Dec, inst);
case Opcode::BUFFER_ATOMIC_FMIN:
return BUFFER_ATOMIC(AtomicOp::Fmin, inst);
return BUFFER_ATOMIC<IR::F32>(AtomicOp::Fmin, inst);
case Opcode::BUFFER_ATOMIC_FMAX:
return BUFFER_ATOMIC(AtomicOp::Fmax, inst);
return BUFFER_ATOMIC<IR::F32>(AtomicOp::Fmax, inst);
// MIMG
// Image load operations
@@ -399,6 +399,8 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
IR::Value vdata_val = [&] {
if constexpr (std::is_same_v<T, IR::U32>) {
return ir.GetVectorReg<Shader::IR::U32>(vdata);
} else if constexpr (std::is_same_v<T, IR::F32>) {
return ir.GetVectorReg<Shader::IR::F32>(vdata);
} else if constexpr (std::is_same_v<T, IR::U64>) {
return ir.PackUint2x32(
ir.CompositeConstruct(ir.GetVectorReg<Shader::IR::U32>(vdata),
@@ -449,7 +451,11 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
}();
if (mubuf.glc) {
ir.SetVectorReg(vdata, IR::U32{original_val});
if constexpr (std::is_same_v<T, IR::U64>) {
UNREACHABLE();
} else {
ir.SetVectorReg(vdata, T{original_val});
}
}
}

View File

@@ -137,12 +137,12 @@ OPCODE(BufferAtomicSMin32, U32, Opaq
OPCODE(BufferAtomicSMin64, U64, Opaque, Opaque, U64 )
OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicUMin64, U64, Opaque, Opaque, U64 )
OPCODE(BufferAtomicFMin32, U32, Opaque, Opaque, F32 )
OPCODE(BufferAtomicFMin32, F32, Opaque, Opaque, F32 )
OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicSMax64, U64, Opaque, Opaque, U64 )
OPCODE(BufferAtomicUMax32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicUMax64, U64, Opaque, Opaque, U64 )
OPCODE(BufferAtomicFMax32, U32, Opaque, Opaque, F32 )
OPCODE(BufferAtomicFMax32, F32, Opaque, Opaque, F32 )
OPCODE(BufferAtomicInc32, U32, Opaque, Opaque, )
OPCODE(BufferAtomicDec32, U32, Opaque, Opaque, )
OPCODE(BufferAtomicAnd32, U32, Opaque, Opaque, U32, )

View File

@@ -139,6 +139,9 @@ IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
case IR::Opcode::BufferAtomicUMax64:
case IR::Opcode::BufferAtomicUMin64:
return IR::Type::U64;
case IR::Opcode::BufferAtomicFMax32:
case IR::Opcode::BufferAtomicFMin32:
return IR::Type::F32;
case IR::Opcode::LoadBufferFormatF32:
case IR::Opcode::StoreBufferFormatF32:
// Formatted buffer loads can use a variety of types.