From b327f4a4bc3ec31752e1d42467f351eec246cb0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Miko=C5=82ajczyk?= Date: Sun, 20 Apr 2025 19:46:58 +0100 Subject: [PATCH] Implement IMAGE_ATOMIC_FMIN and IMAGE_ATOMIC_FMAX for 32bit floats --- externals/sirit | 2 +- .../backend/spirv/emit_spirv.cpp | 4 ++++ .../backend/spirv/emit_spirv_atomic.cpp | 16 ++++++++++++++++ .../backend/spirv/emit_spirv_instructions.h | 2 ++ .../backend/spirv/spirv_emit_context.cpp | 1 + .../backend/spirv/spirv_emit_context.h | 1 + .../frontend/translate/vector_memory.cpp | 6 ++++++ src/shader_recompiler/info.h | 1 + src/shader_recompiler/ir/ir_emitter.cpp | 10 ++++++++++ src/shader_recompiler/ir/ir_emitter.h | 4 ++++ src/shader_recompiler/ir/opcodes.inc | 2 ++ .../ir/passes/shader_info_collection_pass.cpp | 4 ++++ 12 files changed, 52 insertions(+), 1 deletion(-) diff --git a/externals/sirit b/externals/sirit index 427a42c9e..4744731a1 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit 427a42c9ed99b38204d9107bc3dc14e92458acf1 +Subproject commit 4744731a1c3d7868d43036c507909dae8321ad99 diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 936f82cd6..ff38bb5d8 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -270,6 +270,10 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct if (info.has_image_query) { ctx.AddCapability(spv::Capability::ImageQuery); } + if (info.uses_atomic_float_min_max) { + ctx.AddExtension("SPV_EXT_shader_atomic_float_min_max"); + ctx.AddCapability(spv::Capability::AtomicFloat32MinMaxEXT); + } if (info.uses_lane_id) { ctx.AddCapability(spv::Capability::GroupNonUniform); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 211899714..c42131c11 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -75,6 +75,14 @@ Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id va const auto [scope, semantics]{AtomicArgs(ctx)}; return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); } + +Id ImageAtomicF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { + const auto& texture = ctx.images[handle & 0xFFFF]; + const Id pointer{ctx.OpImageTexelPointer(ctx.image_f32, texture.id, coords, ctx.ConstU32(0U))}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return (ctx.*atomic_func)(ctx.F32[1], pointer, scope, semantics, value); +} } // Anonymous namespace Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) { @@ -187,6 +195,14 @@ Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicUMax); } +Id EmitImageAtomicFMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) { + return ImageAtomicF32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicFMax); +} + +Id EmitImageAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) { + return ImageAtomicF32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicFMin); +} + Id EmitImageAtomicInc32(EmitContext&, IR::Inst*, u32, Id, Id) { // TODO: This is not yet implemented throw NotImplementedException("SPIR-V Instruction"); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 079f1005d..269f372d5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -482,6 +482,8 @@ Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); +Id EmitImageAtomicFMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); +Id EmitImageAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 8433251ff..2640030df 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -869,6 +869,7 @@ void EmitContext::DefineImagesAndSamplers() { } if (std::ranges::any_of(info.images, &ImageResource::is_atomic)) { image_u32 = TypePointer(spv::StorageClass::Image, U32[1]); + image_f32 = TypePointer(spv::StorageClass::Image, F32[1]); } if (info.samplers.empty()) { return; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 784748658..38d55e0e4 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -207,6 +207,7 @@ public: Id invocation_id{}; Id subgroup_local_invocation_id{}; Id image_u32{}; + Id image_f32{}; Id shared_memory_u8{}; Id shared_memory_u16{}; diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index ed7788d8c..06799b0dd 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -117,6 +117,8 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { return IMAGE_ATOMIC(AtomicOp::Umin, inst); case Opcode::IMAGE_ATOMIC_SMAX: return IMAGE_ATOMIC(AtomicOp::Smax, inst); + case Opcode::IMAGE_ATOMIC_FMAX: + return IMAGE_ATOMIC(AtomicOp::Fmax, inst); case Opcode::IMAGE_ATOMIC_UMAX: return IMAGE_ATOMIC(AtomicOp::Umax, inst); case Opcode::IMAGE_ATOMIC_AND: @@ -466,6 +468,10 @@ void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) { return ir.ImageAtomicIMax(handle, body, value, true, info); case AtomicOp::Umax: return ir.ImageAtomicUMax(handle, body, value, info); + case AtomicOp::Fmax: + return ir.ImageAtomicFMax(handle, body, value, info); + case AtomicOp::Fmin: + return ir.ImageAtomicFMin(handle, body, value, info); case AtomicOp::And: return ir.ImageAtomicAnd(handle, body, value, info); case AtomicOp::Or: diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 8dcf9c5c4..784f8b4d2 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -196,6 +196,7 @@ struct Info { bool has_discard{}; bool has_image_gather{}; bool has_image_query{}; + bool uses_atomic_float_min_max{}; bool uses_lane_id{}; bool uses_group_quad{}; bool uses_group_ballot{}; diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index e1ebf2206..01d945178 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -1870,6 +1870,16 @@ Value IREmitter::ImageAtomicUMax(const Value& handle, const Value& coords, const return Inst(Opcode::ImageAtomicUMax32, Flags{info}, handle, coords, value); } +Value IREmitter::ImageAtomicFMax(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + return Inst(Opcode::ImageAtomicFMax32, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicFMin(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + return Inst(Opcode::ImageAtomicFMin32, Flags{info}, handle, coords, value); +} + Value IREmitter::ImageAtomicIMax(const Value& handle, const Value& coords, const Value& value, bool is_signed, TextureInstInfo info) { return is_signed ? ImageAtomicSMax(handle, coords, value, info) diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index d978b3b4f..8f8a12736 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -321,6 +321,10 @@ public: const Value& value, TextureInstInfo info); [[nodiscard]] Value ImageAtomicUMax(const Value& handle, const Value& coords, const Value& value, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicFMax(const Value& handle, const Value& coords, + const Value& value, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicFMin(const Value& handle, const Value& coords, + const Value& value, TextureInstInfo info); [[nodiscard]] Value ImageAtomicIMax(const Value& handle, const Value& coords, const Value& value, bool is_signed, TextureInstInfo info); [[nodiscard]] Value ImageAtomicInc(const Value& handle, const Value& coords, const Value& value, diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 6f186808c..ab6dbfde9 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -420,6 +420,8 @@ OPCODE(ImageAtomicSMin32, U32, Opaq OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicFMax32, F32, Opaque, Opaque, F32, ) +OPCODE(ImageAtomicFMin32, F32, Opaque, Opaque, F32, ) OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, ) diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index d739b2da5..f53a0f4d4 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -71,6 +71,10 @@ void Visit(Info& info, const IR::Inst& inst) { case IR::Opcode::ImageQueryLod: info.has_image_query = true; break; + case IR::Opcode::ImageAtomicFMax32: + case IR::Opcode::ImageAtomicFMin32: + info.uses_atomic_float_min_max = true; + break; case IR::Opcode::LaneId: info.uses_lane_id = true; break;