From 9356779bb38455a752cce6d444aff0209203cbc7 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sat, 19 Apr 2025 15:13:14 +0200 Subject: [PATCH] Preparations for implementing SPV DMA access --- .../backend/spirv/emit_spirv.cpp | 4 ++ .../backend/spirv/emit_spirv_atomic.cpp | 6 +- .../spirv/emit_spirv_context_get_set.cpp | 58 +++++++++---------- .../backend/spirv/spirv_emit_context.cpp | 44 ++++++++++++-- .../backend/spirv/spirv_emit_context.h | 36 ++++++++++-- src/shader_recompiler/info.h | 2 +- .../ir/passes/shader_info_collection_pass.cpp | 29 +++++----- src/video_core/buffer_cache/buffer_cache.cpp | 8 +-- .../renderer_vulkan/vk_rasterizer.cpp | 2 +- 9 files changed, 127 insertions(+), 62 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 9ebb842cc..b5f63d98e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -298,6 +298,10 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct if (stage == LogicalStage::TessellationControl || stage == LogicalStage::TessellationEval) { ctx.AddCapability(spv::Capability::Tessellation); } + if (info.dma_types != IR::Type::Void) { + ctx.AddCapability(spv::Capability::PhysicalStorageBufferAddresses); + ctx.AddExtension("SPV_KHR_physical_storage_buffer"); + } } void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index c3799fb4b..d7c73ca8f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -60,7 +60,7 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); } const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); - const auto [id, pointer_type] = buffer[EmitContext::BufferAlias::U32]; + const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32]; const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index); const auto [scope, semantics]{AtomicArgs(ctx)}; return BufferAtomicU32BoundsCheck(ctx, index, buffer.size_dwords, [&] { @@ -257,7 +257,7 @@ Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id co Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding) { const auto& buffer = ctx.buffers[binding]; - const auto [id, pointer_type] = buffer[EmitContext::BufferAlias::U32]; + const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32]; const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(gds_addr)); const auto [scope, semantics]{AtomicArgs(ctx)}; return ctx.OpAtomicIIncrement(ctx.U32[1], ptr, scope, semantics); @@ -265,7 +265,7 @@ Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding) { Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding) { const auto& buffer = ctx.buffers[binding]; - const auto [id, pointer_type] = buffer[EmitContext::BufferAlias::U32]; + const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32]; const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(gds_addr)); const auto [scope, semantics]{AtomicArgs(ctx)}; return ctx.OpAtomicIDecrement(ctx.U32[1], ptr, scope, semantics); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index a009a837d..35671a840 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -160,14 +160,14 @@ void EmitGetGotoVariable(EmitContext&) { UNREACHABLE_MSG("Unreachable instruction"); } -using BufferAlias = EmitContext::BufferAlias; +using PointerType = EmitContext::PointerType; Id EmitReadConst(EmitContext& ctx, IR::Inst* inst) { const u32 flatbuf_off_dw = inst->Flags(); const auto& srt_flatbuf = ctx.buffers[ctx.flatbuf_index]; ASSERT(srt_flatbuf.binding >= 0 && flatbuf_off_dw > 0 && srt_flatbuf.buffer_type == BufferType::ReadConstUbo); - const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32]; + const auto [id, pointer_type] = srt_flatbuf[PointerType::U32]; const Id ptr{ ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(flatbuf_off_dw))}; return ctx.OpLoad(ctx.U32[1], ptr); @@ -176,7 +176,7 @@ Id EmitReadConst(EmitContext& ctx, IR::Inst* inst) { Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) { const auto& buffer = ctx.buffers[handle]; index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords); - const auto [id, pointer_type] = buffer[BufferAlias::U32]; + const auto [id, pointer_type] = buffer[PointerType::U32]; const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; const Id result{ctx.OpLoad(ctx.U32[1], ptr)}; @@ -430,7 +430,7 @@ static Id EmitLoadBufferBoundsCheck(EmitContext& ctx, Id index, Id buffer_size, return result; } -template +template static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { const auto flags = inst->Flags(); const auto& spv_buffer = ctx.buffers[handle]; @@ -438,7 +438,7 @@ static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); } const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); - const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32; + const auto& data_types = alias == PointerType::U32 ? ctx.U32 : ctx.F32; const auto [id, pointer_type] = spv_buffer[alias]; boost::container::static_vector ids; @@ -449,7 +449,7 @@ static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a if (!flags.typed) { // Untyped loads have bounds checking per-component. ids.push_back(EmitLoadBufferBoundsCheck<1>(ctx, index_i, spv_buffer.size_dwords, - result_i, alias == BufferAlias::F32)); + result_i, alias == PointerType::F32)); } else { ids.push_back(result_i); } @@ -459,7 +459,7 @@ static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a if (flags.typed) { // Typed loads have single bounds check for the whole load. return EmitLoadBufferBoundsCheck(ctx, index, spv_buffer.size_dwords, result, - alias == BufferAlias::F32); + alias == PointerType::F32); } return result; } @@ -469,7 +469,7 @@ Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { if (Sirit::ValidId(spv_buffer.offset)) { address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); } - const auto [id, pointer_type] = spv_buffer[BufferAlias::U8]; + const auto [id, pointer_type] = spv_buffer[PointerType::U8]; const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)}; const Id result{ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, ptr))}; return EmitLoadBufferBoundsCheck<1>(ctx, address, spv_buffer.size, result, false); @@ -480,7 +480,7 @@ Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { if (Sirit::ValidId(spv_buffer.offset)) { address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); } - const auto [id, pointer_type] = spv_buffer[BufferAlias::U16]; + const auto [id, pointer_type] = spv_buffer[PointerType::U16]; const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u)); const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; const Id result{ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, ptr))}; @@ -488,35 +488,35 @@ Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { } Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - return EmitLoadBufferB32xN<1, BufferAlias::U32>(ctx, inst, handle, address); + return EmitLoadBufferB32xN<1, PointerType::U32>(ctx, inst, handle, address); } Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - return EmitLoadBufferB32xN<2, BufferAlias::U32>(ctx, inst, handle, address); + return EmitLoadBufferB32xN<2, PointerType::U32>(ctx, inst, handle, address); } Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - return EmitLoadBufferB32xN<3, BufferAlias::U32>(ctx, inst, handle, address); + return EmitLoadBufferB32xN<3, PointerType::U32>(ctx, inst, handle, address); } Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - return EmitLoadBufferB32xN<4, BufferAlias::U32>(ctx, inst, handle, address); + return EmitLoadBufferB32xN<4, PointerType::U32>(ctx, inst, handle, address); } Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - return EmitLoadBufferB32xN<1, BufferAlias::F32>(ctx, inst, handle, address); + return EmitLoadBufferB32xN<1, PointerType::F32>(ctx, inst, handle, address); } Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - return EmitLoadBufferB32xN<2, BufferAlias::F32>(ctx, inst, handle, address); + return EmitLoadBufferB32xN<2, PointerType::F32>(ctx, inst, handle, address); } Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - return EmitLoadBufferB32xN<3, BufferAlias::F32>(ctx, inst, handle, address); + return EmitLoadBufferB32xN<3, PointerType::F32>(ctx, inst, handle, address); } Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - return EmitLoadBufferB32xN<4, BufferAlias::F32>(ctx, inst, handle, address); + return EmitLoadBufferB32xN<4, PointerType::F32>(ctx, inst, handle, address); } Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { @@ -546,7 +546,7 @@ void EmitStoreBufferBoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto emit_func(); } -template +template static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { const auto flags = inst->Flags(); @@ -555,7 +555,7 @@ static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, I address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); } const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); - const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32; + const auto& data_types = alias == PointerType::U32 ? ctx.U32 : ctx.F32; const auto [id, pointer_type] = spv_buffer[alias]; auto store = [&] { @@ -586,7 +586,7 @@ void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id v if (Sirit::ValidId(spv_buffer.offset)) { address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); } - const auto [id, pointer_type] = spv_buffer[BufferAlias::U8]; + const auto [id, pointer_type] = spv_buffer[PointerType::U8]; const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)}; const Id result{ctx.OpUConvert(ctx.U8, value)}; EmitStoreBufferBoundsCheck<1>(ctx, address, spv_buffer.size, [&] { ctx.OpStore(ptr, result); }); @@ -597,7 +597,7 @@ void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id if (Sirit::ValidId(spv_buffer.offset)) { address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); } - const auto [id, pointer_type] = spv_buffer[BufferAlias::U16]; + const auto [id, pointer_type] = spv_buffer[PointerType::U16]; const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u)); const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; const Id result{ctx.OpUConvert(ctx.U16, value)}; @@ -606,35 +606,35 @@ void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id } void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<1, BufferAlias::U32>(ctx, inst, handle, address, value); + EmitStoreBufferB32xN<1, PointerType::U32>(ctx, inst, handle, address, value); } void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<2, BufferAlias::U32>(ctx, inst, handle, address, value); + EmitStoreBufferB32xN<2, PointerType::U32>(ctx, inst, handle, address, value); } void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<3, BufferAlias::U32>(ctx, inst, handle, address, value); + EmitStoreBufferB32xN<3, PointerType::U32>(ctx, inst, handle, address, value); } void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<4, BufferAlias::U32>(ctx, inst, handle, address, value); + EmitStoreBufferB32xN<4, PointerType::U32>(ctx, inst, handle, address, value); } void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<1, BufferAlias::F32>(ctx, inst, handle, address, value); + EmitStoreBufferB32xN<1, PointerType::F32>(ctx, inst, handle, address, value); } void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<2, BufferAlias::F32>(ctx, inst, handle, address, value); + EmitStoreBufferB32xN<2, PointerType::F32>(ctx, inst, handle, address, value); } void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<3, BufferAlias::F32>(ctx, inst, handle, address, value); + EmitStoreBufferB32xN<3, PointerType::F32>(ctx, inst, handle, address, value); } void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<4, BufferAlias::F32>(ctx, inst, handle, address, value); + EmitStoreBufferB32xN<4, PointerType::F32>(ctx, inst, handle, address, value); } void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 0463da8d3..bc3213cee 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -70,6 +70,12 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf Bindings& binding_) : Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_}, profile{profile_}, stage{info.stage}, l_stage{info.l_stage}, binding{binding_} { + if (info.dma_types != IR::Type::Void) { + SetMemoryModel(spv::AddressingModel::PhysicalStorageBuffer64, spv::MemoryModel::GLSL450); + } else { + SetMemoryModel(spv::AddressingModel::Logical, spv::MemoryModel::GLSL450); + } + AddCapability(spv::Capability::Shader); DefineArithmeticTypes(); DefineInterfaces(); @@ -157,6 +163,30 @@ void EmitContext::DefineArithmeticTypes() { if (info.uses_fp64) { frexp_result_f64 = Name(TypeStruct(F64[1], S32[1]), "frexp_result_f64"); } + + if (True(info.dma_types & IR::Type::F64)) { + physical_pointer_types[PointerType::F64] = TypePointer(spv::StorageClass::PhysicalStorageBuffer, F64[1]); + } + if (True(info.dma_types & IR::Type::U64)) { + physical_pointer_types[PointerType::U64] = TypePointer(spv::StorageClass::PhysicalStorageBuffer, U64); + } + if (True(info.dma_types & IR::Type::F32)) { + physical_pointer_types[PointerType::F32] = TypePointer(spv::StorageClass::PhysicalStorageBuffer, F32[1]); + } + if (True(info.dma_types & IR::Type::U32)) { + physical_pointer_types[PointerType::U32] = TypePointer(spv::StorageClass::PhysicalStorageBuffer, U32[1]); + } + if (True(info.dma_types & IR::Type::F16)) { + physical_pointer_types[PointerType::F16] = TypePointer(spv::StorageClass::PhysicalStorageBuffer, F16[1]); + } + if (True(info.dma_types & IR::Type::U16)) { + physical_pointer_types[PointerType::U32] = TypePointer(spv::StorageClass::PhysicalStorageBuffer, U16); + } + + // We allways want U8 if using DMA, for the fault readback buffer + if (info.dma_types != IR::Type::Void) { + physical_pointer_types[PointerType::U32] = TypePointer(spv::StorageClass::PhysicalStorageBuffer, U8); + } } void EmitContext::DefineInterfaces() { @@ -198,7 +228,7 @@ Id EmitContext::GetBufferSize(const u32 sharp_idx) { // Can this be done with memory access? Like we do now with ReadConst const auto& srt_flatbuf = buffers[flatbuf_index]; ASSERT(srt_flatbuf.buffer_type == BufferType::ReadConstUbo); - const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32]; + const auto [id, pointer_type] = srt_flatbuf[PointerType::U32]; const auto rsrc1{ OpLoad(U32[1], OpAccessChain(pointer_type, id, u32_zero_value, ConstU32(sharp_idx + 1)))}; @@ -736,20 +766,24 @@ void EmitContext::DefineBuffers() { // Define aliases depending on the shader usage. auto& spv_buffer = buffers.emplace_back(binding.buffer++, desc.buffer_type); + if (True(desc.used_types & IR::Type::U64)) { + spv_buffer[PointerType::U64] = + DefineBuffer(is_storage, desc.is_written, 0, desc.buffer_type, U64); + } if (True(desc.used_types & IR::Type::U32)) { - spv_buffer[BufferAlias::U32] = + spv_buffer[PointerType::U32] = DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, U32[1]); } if (True(desc.used_types & IR::Type::F32)) { - spv_buffer[BufferAlias::F32] = + spv_buffer[PointerType::F32] = DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, F32[1]); } if (True(desc.used_types & IR::Type::U16)) { - spv_buffer[BufferAlias::U16] = + spv_buffer[PointerType::U16] = DefineBuffer(is_storage, desc.is_written, 1, desc.buffer_type, U16); } if (True(desc.used_types & IR::Type::U8)) { - spv_buffer[BufferAlias::U8] = + spv_buffer[PointerType::U8] = DefineBuffer(is_storage, desc.is_written, 0, desc.buffer_type, U8); } ++binding.unified; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 61a839cda..3b7a311c6 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -133,12 +133,24 @@ public: return ConstantComposite(type, constituents); } + inline Id OpLabel(std::string_view label_name) { + last_label = Module::OpLabel(label_name); + return last_label; + } + + inline Id OpLabel() { + last_label = Module::OpLabel(); + return last_label; + } + Info& info; const RuntimeInfo& runtime_info; const Profile& profile; Stage stage; LogicalStage l_stage{}; + Id last_label{}; + Id void_id{}; Id U8{}; Id S8{}; @@ -231,11 +243,14 @@ public: bool is_storage = false; }; - enum class BufferAlias : u32 { + enum class PointerType : u32 { U8, U16, + F16, U32, F32, + U64, + F64, NumAlias, }; @@ -252,22 +267,35 @@ public: Id size; Id size_shorts; Id size_dwords; - std::array aliases; + std::array aliases; - const BufferSpv& operator[](BufferAlias alias) const { + const BufferSpv& operator[](PointerType alias) const { return aliases[u32(alias)]; } - BufferSpv& operator[](BufferAlias alias) { + BufferSpv& operator[](PointerType alias) { return aliases[u32(alias)]; } }; + struct PhysicalPointerTypes { + std::array types; + + const Id& operator[](PointerType type) const { + return types[u32(type)]; + } + + Id& operator[](PointerType type) { + return types[u32(type)]; + } + }; + Bindings& binding; boost::container::small_vector buf_type_ids; boost::container::small_vector buffers; boost::container::small_vector images; boost::container::small_vector samplers; + PhysicalPointerTypes physical_pointer_types; size_t flatbuf_index{}; size_t bda_pagetable_index{}; diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 08b42b3a2..efaa4ea41 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -211,7 +211,7 @@ struct Info { bool stores_tess_level_inner{}; bool translation_failed{}; bool has_readconst{}; - bool uses_dma{}; + IR::Type dma_types{IR::Type::Void}; u8 mrt_mask{0u}; bool has_fetch_shader{false}; u32 fetch_shader_sgpr_base{0u}; diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index 48f7bacd0..c87aafbc7 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -87,21 +87,7 @@ void Visit(Info& info, const IR::Inst& inst) { }); info.has_readconst = true; } - if (!info.uses_dma) { - // For now, we only need U32, but we may need - // to add more types in the future for other porposes. - info.buffers.push_back({ - .used_types = IR::Type::U32, - .inline_cbuf = AmdGpu::Buffer::Null(), - .buffer_type = BufferType::BdaPagetable, - }); - info.buffers.push_back({ - .used_types = IR::Type::U32, - .inline_cbuf = AmdGpu::Buffer::Null(), - .buffer_type = BufferType::FaultReadback, - }); - info.uses_dma = true; - } + info.dma_types |= IR::Type::U32; break; case IR::Opcode::PackUfloat10_11_11: info.uses_pack_10_11_11 = true; @@ -120,6 +106,19 @@ void CollectShaderInfoPass(IR::Program& program) { Visit(program.info, inst); } } + + if (program.info.dma_types != IR::Type::Void) { + program.info.buffers.push_back({ + .used_types = IR::Type::U64, + .inline_cbuf = AmdGpu::Buffer::Null(), + .buffer_type = BufferType::BdaPagetable, + }); + program.info.buffers.push_back({ + .used_types = IR::Type::U8, + .inline_cbuf = AmdGpu::Buffer::Null(), + .buffer_type = BufferType::FaultReadback, + }); + } } } // namespace Shader::Optimization diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index ea3ebdd89..6e021d3e2 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -372,10 +372,10 @@ void BufferCache::ImportMemory(u64 start, u64 end) { bda_addrs.clear(); bda_addrs.reserve(range_pages); for (u64 i = 0; i < range_pages; ++i) { - // Mark the page as host imported to let the shader know + // Don't mark the page as GPU local to let the shader know // so that it can notify us if it accesses the page, so we can // create a GPU local buffer. - bda_addrs.push_back((bda_addr + (i << CACHING_PAGEBITS)) | 0x1); + bda_addrs.push_back(bda_addr + (i << CACHING_PAGEBITS)); } WriteDataBuffer(bda_pagetable_buffer, range_start * sizeof(vk::DeviceAddress), bda_addrs.data(), bda_addrs.size() * sizeof(vk::DeviceAddress)); @@ -531,8 +531,8 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) { const u64 size_pages = size >> CACHING_PAGEBITS; bda_addrs.reserve(size_pages); for (u64 i = 0; i < size_pages; ++i) { - // Here, we do not set the host imported bit. - bda_addrs.push_back(new_buffer.BufferDeviceAddress() + (i << CACHING_PAGEBITS)); + // Here, we mark the page as backed by a GPU local buffer + bda_addrs.push_back((new_buffer.BufferDeviceAddress() + (i << CACHING_PAGEBITS)) | 0x1); } WriteDataBuffer(bda_pagetable_buffer, start_page * sizeof(vk::DeviceAddress), bda_addrs.data(), bda_addrs.size() * sizeof(vk::DeviceAddress)); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index c86eaaf2b..c5333cf08 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -460,7 +460,7 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { BindBuffers(*stage, binding, push_data); BindTextures(*stage, binding); - dma_enabled |= stage->uses_dma; + dma_enabled |= stage->dma_types != Shader::IR::Type::Void; } pipeline->BindResources(set_writes, buffer_barriers, push_data);