From eeb5da33d0ae14c3eddef77a8cf44f07729b3976 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Thu, 22 May 2025 19:47:17 +0200 Subject: [PATCH] Make ReadConst a SPIR-V function --- .../spirv/emit_spirv_context_get_set.cpp | 27 ++-- .../backend/spirv/spirv_emit_context.cpp | 128 ++++++++++++++++-- .../backend/spirv/spirv_emit_context.h | 68 ++++------ src/shader_recompiler/info.h | 11 +- .../ir/passes/shader_info_collection_pass.cpp | 10 +- 5 files changed, 165 insertions(+), 79 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index be85c8145..9234f80be 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -163,25 +163,14 @@ void EmitGetGotoVariable(EmitContext&) { using PointerType = EmitContext::PointerType; Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) { - const Id base_lo = ctx.OpUConvert(ctx.U64, ctx.OpCompositeExtract(ctx.U32[1], addr, 0)); - const Id base_hi = ctx.OpUConvert(ctx.U64, ctx.OpCompositeExtract(ctx.U32[1], addr, 1)); - const Id base_sift = ctx.OpShiftLeftLogical(ctx.U64, base_hi, ctx.ConstU32(32u)); - const Id base = ctx.OpBitwiseOr(ctx.U64, base_lo, base_sift); - const Id offset_bytes = ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.ConstU32(2u)); - const Id address = ctx.OpIAdd(ctx.U64, base, ctx.OpUConvert(ctx.U64, offset_bytes)); - return ctx.EmitMemoryAccess(ctx.U32[1], address, [&]() { - const u32 flatbuf_off_dw = inst->Flags(); - if (flatbuf_off_dw == 0) { - return ctx.u32_zero_value; - } else { - const auto& srt_flatbuf = ctx.buffers[ctx.flatbuf_index]; - ASSERT(srt_flatbuf.binding >= 0 && srt_flatbuf.buffer_type == BufferType::Flatbuf); - const auto [id, pointer_type] = srt_flatbuf[PointerType::U32]; - const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, - ctx.ConstU32(flatbuf_off_dw))}; - return ctx.OpLoad(ctx.U32[1], ptr); - } - }); + const u32 flatbuf_off_dw = inst->Flags(); + // We can only provide a fallback for immediate offsets. + if (flatbuf_off_dw == 0) { + return ctx.OpFunctionCall(ctx.U32[1], ctx.read_const_dynamic, addr, offset); + } else { + return ctx.OpFunctionCall(ctx.U32[1], ctx.read_const, addr, offset, + ctx.ConstU32(flatbuf_off_dw)); + } } Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) { diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index e9aea3fb8..68bfcc0d0 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -144,9 +144,12 @@ void EmitContext::DefineArithmeticTypes() { true_value = ConstantTrue(U1[1]); false_value = ConstantFalse(U1[1]); + u8_one_value = Constant(U8, 1U); + u8_zero_value = Constant(U8, 0U); u32_one_value = ConstU32(1U); u32_zero_value = ConstU32(0U); f32_zero_value = ConstF32(0.0f); + u64_one_value = Constant(U64, 1ULL); u64_zero_value = Constant(U64, 0ULL); pi_x2 = ConstF32(2.0f * float{std::numbers::pi}); @@ -194,17 +197,6 @@ void EmitContext::DefineArithmeticTypes() { physical_pointer_types[PointerType::U8] = TypePointer(spv::StorageClass::PhysicalStorageBuffer, U8); } - - if (info.dma_types != IR::Type::Void) { - - caching_pagebits_value = - Constant(U64, static_cast(VideoCore::BufferCache::CACHING_PAGEBITS)); - caching_pagemask_value = Constant(U64, VideoCore::BufferCache::CACHING_PAGESIZE - 1); - - // Used to calculate fault buffer position and mask - u32_three_value = ConstU32(3U); - u32_seven_value = ConstU32(7U); - } } void EmitContext::DefineInterfaces() { @@ -760,16 +752,19 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte }; void EmitContext::DefineBuffers() { - if (!profile.supports_robust_buffer_access && !info.has_readconst) { + if (!profile.supports_robust_buffer_access && + info.readconst_types == Info::ReadConstType::None) { // In case Flatbuf has not already been bound by IR and is needed // to query buffer sizes, bind it now. info.buffers.push_back({ .used_types = IR::Type::U32, - // We can't guarantee that flatbuf will now grow bast UBO + // We can't guarantee that flatbuf will not grow past UBO // limit if there are a lot of ReadConsts. (We could specialize) .inline_cbuf = AmdGpu::Buffer::Placeholder(std::numeric_limits::max()), .buffer_type = BufferType::Flatbuf, }); + // In the future we may want to read buffer sizes from GPU memory if available. + // info.readconst_types |= Info::ReadConstType::Immediate; } for (const auto& desc : info.buffers) { const auto buf_sharp = desc.GetSharp(info); @@ -1073,6 +1068,101 @@ Id EmitContext::DefineUfloatM5ToFloat32(u32 mantissa_bits, const std::string_vie return func; } +Id EmitContext::DefineGetBdaPointer() { + const auto caching_pagebits{ + Constant(U64, static_cast(VideoCore::BufferCache::CACHING_PAGEBITS))}; + const auto caching_pagemask{Constant(U64, VideoCore::BufferCache::CACHING_PAGESIZE - 1)}; + + const auto func_type{TypeFunction(U64, U64)}; + const auto func{OpFunction(U64, spv::FunctionControlMask::MaskNone, func_type)}; + const auto address{OpFunctionParameter(U64)}; + Name(func, "get_bda_pointer"); + AddLabel(); + + const auto fault_label{OpLabel()}; + const auto available_label{OpLabel()}; + const auto merge_label{OpLabel()}; + + // Get page BDA + const auto page{OpShiftRightLogical(U64, address, caching_pagebits)}; + const auto page32{OpUConvert(U32[1], page)}; + const auto& bda_buffer{buffers[bda_pagetable_index]}; + const auto [bda_buffer_id, bda_pointer_type] = bda_buffer[PointerType::U64]; + const auto bda_ptr{OpAccessChain(bda_pointer_type, bda_buffer_id, u32_zero_value, page32)}; + const auto bda{OpLoad(U64, bda_ptr)}; + + // Check if page is GPU cached + const auto is_fault{OpIEqual(U1[1], bda, u64_zero_value)}; + OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone); + OpBranchConditional(is_fault, fault_label, available_label); + + // First time acces, mark as fault + AddLabel(fault_label); + const auto& fault_buffer{buffers[fault_buffer_index]}; + const auto [fault_buffer_id, fault_pointer_type] = fault_buffer[PointerType::U8]; + const auto page_div8{OpShiftRightLogical(U32[1], page32, ConstU32(3U))}; + const auto page_mod8{OpBitwiseAnd(U32[1], page32, ConstU32(7U))}; + const auto page_mask{OpShiftLeftLogical(U8, u8_one_value, page_mod8)}; + const auto fault_ptr{ + OpAccessChain(fault_pointer_type, fault_buffer_id, u32_zero_value, page_div8)}; + const auto fault_value{OpLoad(U8, fault_ptr)}; + const auto fault_value_masked{OpBitwiseOr(U8, fault_value, page_mask)}; + OpStore(fault_ptr, fault_value_masked); + + // Return null pointer + const auto fallback_result{u64_zero_value}; + OpBranch(merge_label); + + // Value is available, compute address + AddLabel(available_label); + const auto offset_in_bda{OpBitwiseAnd(U64, address, caching_pagemask)}; + const auto addr{OpIAdd(U64, bda, offset_in_bda)}; + OpBranch(merge_label); + + // Merge + AddLabel(merge_label); + const auto result{OpPhi(U64, addr, available_label, fallback_result, fault_label)}; + OpReturnValue(result); + OpFunctionEnd(); + return func; +} + +Id EmitContext::DefineReadConst(bool dynamic) { + const auto func_type{!dynamic ? TypeFunction(U32[1], U32[2], U32[1], U32[1]) + : TypeFunction(U32[1], U32[2], U32[1])}; + const auto func{OpFunction(U32[1], spv::FunctionControlMask::MaskNone, func_type)}; + const auto base{OpFunctionParameter(U32[2])}; + const auto offset{OpFunctionParameter(U32[1])}; + const auto flatbuf_offset{!dynamic ? OpFunctionParameter(U32[1]) : Id{}}; + Name(func, dynamic ? "read_const_dynamic" : "read_const"); + AddLabel(); + + const auto base_lo{OpUConvert(U64, OpCompositeExtract(U32[1], base, 0))}; + const auto base_hi{OpUConvert(U64, OpCompositeExtract(U32[1], base, 1))}; + const auto base_shift{OpShiftLeftLogical(U64, base_hi, ConstU32(32U))}; + const auto base_addr{OpBitwiseOr(U64, base_lo, base_shift)}; + const auto offset_bytes{OpShiftLeftLogical(U32[1], offset, ConstU32(2U))}; + const auto addr{OpIAdd(U64, base_addr, OpUConvert(U64, offset_bytes))}; + + const auto result = EmitMemoryRead(U32[1], addr, [&]() { + if (dynamic) { + return u32_zero_value; + } else { + const auto& flatbuf_buffer{buffers[flatbuf_index]}; + ASSERT(flatbuf_buffer.binding >= 0 && + flatbuf_buffer.buffer_type == BufferType::Flatbuf); + const auto [flatbuf_buffer_id, flatbuf_pointer_type] = flatbuf_buffer[PointerType::U32]; + const auto ptr{OpAccessChain(flatbuf_pointer_type, flatbuf_buffer_id, u32_zero_value, + flatbuf_offset)}; + return OpLoad(U32[1], ptr); + } + }); + + OpReturnValue(result); + OpFunctionEnd(); + return func; +} + void EmitContext::DefineFunctions() { if (info.uses_pack_10_11_11) { f32_to_uf11 = DefineFloat32ToUfloatM5(6, "f32_to_uf11"); @@ -1082,6 +1172,18 @@ void EmitContext::DefineFunctions() { uf11_to_f32 = DefineUfloatM5ToFloat32(6, "uf11_to_f32"); uf10_to_f32 = DefineUfloatM5ToFloat32(5, "uf10_to_f32"); } + if (info.dma_types != IR::Type::Void) { + get_bda_pointer = DefineGetBdaPointer(); + } + + if (True(info.readconst_types & Info::ReadConstType::Immediate)) { + LOG_DEBUG(Render_Recompiler, "Shader {:#x} uses immediate ReadConst", info.pgm_hash); + read_const = DefineReadConst(false); + } + if (True(info.readconst_types & Info::ReadConstType::Dynamic)) { + LOG_DEBUG(Render_Recompiler, "Shader {:#x} uses dynamic ReadConst", info.pgm_hash); + read_const_dynamic = DefineReadConst(true); + } } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 84545a534..a2e0d2f47 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -173,55 +173,33 @@ public: UNREACHABLE_MSG("Unknown type for pointer"); } - template - Id EmitMemoryAccess(Id type, Id address, Func&& fallback) { - const Id fault_label = OpLabel(); + Id EmitMemoryRead(Id type, Id address, auto&& fallback) { const Id available_label = OpLabel(); + const Id fallback_label = OpLabel(); const Id merge_label = OpLabel(); - // Get page BDA - const Id page = OpShiftRightLogical(U64, address, caching_pagebits_value); - const Id page32 = OpUConvert(U32[1], page); - const auto& bda_buffer = buffers[bda_pagetable_index]; - const auto [bda_buffer_id, bda_pointer_type] = bda_buffer[PointerType::U64]; - const Id bda_ptr = OpAccessChain(bda_pointer_type, bda_buffer_id, u32_zero_value, page32); - const Id bda = OpLoad(U64, bda_ptr); - - // Check if the page is GPU mapped - const Id is_fault = OpIEqual(U1[1], bda, u64_zero_value); + const Id addr = OpFunctionCall(U64, get_bda_pointer, address); + const Id is_available = OpINotEqual(U1[1], addr, u64_zero_value); OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone); - OpBranchConditional(is_fault, fault_label, available_label); + OpBranchConditional(is_available, available_label, fallback_label); - // First time access - AddLabel(fault_label); - const auto& fault_buffer = buffers[fault_buffer_index]; - const auto [fault_buffer_id, fault_pointer_type] = fault_buffer[PointerType::U8]; - const Id page_div8 = OpShiftRightLogical(U32[1], page32, u32_three_value); - const Id page_mod8 = OpBitwiseAnd(U32[1], page32, u32_seven_value); - const Id page_mask = OpShiftLeftLogical(U32[1], u32_one_value, page_mod8); - const Id fault_ptr = - OpAccessChain(fault_pointer_type, fault_buffer_id, u32_zero_value, page_div8); - const Id fault_value = OpLoad(U8, fault_ptr); - const Id page_mask8 = OpUConvert(U8, page_mask); - const Id fault_value_masked = OpBitwiseOr(U8, fault_value, page_mask8); - OpStore(fault_ptr, fault_value_masked); - // Fallback (we are not able to access the page) - const Id fallback_result = fallback(); - OpBranch(merge_label); - - // Value is available + // Available AddLabel(available_label); - const Id offset_in_bda = OpBitwiseAnd(U64, address, caching_pagemask_value); - const Id addr = OpIAdd(U64, bda, offset_in_bda); - const PointerType pointer_type = PointerTypeFromType(type); + const auto pointer_type = PointerTypeFromType(type); const Id pointer_type_id = physical_pointer_types[pointer_type]; const Id addr_ptr = OpConvertUToPtr(pointer_type_id, addr); const Id result = OpLoad(type, addr_ptr, spv::MemoryAccessMask::Aligned, 4u); OpBranch(merge_label); + // Fallback + AddLabel(fallback_label); + const Id fallback_result = fallback(); + OpBranch(merge_label); + // Merge AddLabel(merge_label); - const Id final_result = OpPhi(type, fallback_result, fault_label, result, available_label); + const Id final_result = + OpPhi(type, fallback_result, fallback_label, result, available_label); return final_result; } @@ -255,16 +233,13 @@ public: Id true_value{}; Id false_value{}; - Id u32_seven_value{}; - Id u32_three_value{}; + Id u8_one_value{}; + Id u8_zero_value{}; Id u32_one_value{}; Id u32_zero_value{}; Id f32_zero_value{}; - Id u64_zero_value{}; Id u64_one_value{}; - - Id caching_pagebits_value{}; - Id caching_pagemask_value{}; + Id u64_zero_value{}; Id shared_u8{}; Id shared_u16{}; @@ -403,6 +378,11 @@ public: Id uf10_to_f32{}; Id f32_to_uf10{}; + Id get_bda_pointer{}; + + Id read_const{}; + Id read_const_dynamic{}; + private: void DefineArithmeticTypes(); void DefineInterfaces(); @@ -423,6 +403,10 @@ private: Id DefineFloat32ToUfloatM5(u32 mantissa_bits, std::string_view name); Id DefineUfloatM5ToFloat32(u32 mantissa_bits, std::string_view name); + Id DefineGetBdaPointer(); + + Id DefineReadConst(bool dynamic); + Id GetBufferSize(u32 sharp_idx); }; diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 88eab20b6..d349d7827 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -217,12 +217,18 @@ struct Info { bool stores_tess_level_outer{}; bool stores_tess_level_inner{}; bool translation_failed{}; - bool has_readconst{}; - IR::Type dma_types{IR::Type::Void}; u8 mrt_mask{0u}; bool has_fetch_shader{false}; u32 fetch_shader_sgpr_base{0u}; + enum class ReadConstType { + None = 0, + Immediate = 1 << 0, + Dynamic = 1 << 1, + }; + ReadConstType readconst_types{}; + IR::Type dma_types{IR::Type::Void}; + explicit Info(Stage stage_, LogicalStage l_stage_, ShaderParams params) : stage{stage_}, l_stage{l_stage_}, pgm_hash{params.hash}, pgm_base{params.Base()}, user_data{params.user_data} {} @@ -280,6 +286,7 @@ struct Info { sizeof(tess_constants)); } }; +DECLARE_ENUM_FLAG_OPERATORS(Info::ReadConstType); constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept { return inline_cbuf ? inline_cbuf : info.ReadUdSharp(sharp_idx); diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index b5edb854e..d4759b32e 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -80,15 +80,19 @@ void Visit(Info& info, const IR::Inst& inst) { info.uses_lane_id = true; break; case IR::Opcode::ReadConst: - if (!info.has_readconst) { + if (info.readconst_types == Info::ReadConstType::None) { info.buffers.push_back({ .used_types = IR::Type::U32, - // We can't guarantee that flatbuf will now grow bast UBO + // We can't guarantee that flatbuf will not grow past UBO // limit if there are a lot of ReadConsts. (We could specialize) .inline_cbuf = AmdGpu::Buffer::Placeholder(std::numeric_limits::max()), .buffer_type = BufferType::Flatbuf, }); - info.has_readconst = true; + } + if (inst.Flags() != 0) { + info.readconst_types |= Info::ReadConstType::Immediate; + } else { + info.readconst_types |= Info::ReadConstType::Dynamic; } info.dma_types |= IR::Type::U32; break;