From c077fb97da133529ca164558758b06d2c02a81b8 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sat, 19 Apr 2025 00:17:16 +0200 Subject: [PATCH] Base DMA implementation --- .../spirv/emit_spirv_context_get_set.cpp | 2 +- .../backend/spirv/spirv_emit_context.cpp | 18 +++++++++++++++- .../backend/spirv/spirv_emit_context.h | 4 ++++ .../frontend/translate/scalar_memory.cpp | 13 ++++++------ src/shader_recompiler/info.h | 3 +++ .../ir/passes/shader_info_collection_pass.cpp | 15 +++++++++++++ src/video_core/buffer_cache/buffer_cache.cpp | 7 ++----- src/video_core/buffer_cache/buffer_cache.h | 8 +++---- .../renderer_vulkan/vk_rasterizer.cpp | 21 +++++++++++++++++++ .../renderer_vulkan/vk_rasterizer.h | 8 +++++++ 10 files changed, 82 insertions(+), 17 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 83e8afd78..a009a837d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -164,7 +164,7 @@ using BufferAlias = EmitContext::BufferAlias; Id EmitReadConst(EmitContext& ctx, IR::Inst* inst) { const u32 flatbuf_off_dw = inst->Flags(); - const auto& srt_flatbuf = ctx.buffers.back(); + const auto& srt_flatbuf = ctx.buffers[ctx.flatbuf_index]; ASSERT(srt_flatbuf.binding >= 0 && flatbuf_off_dw > 0 && srt_flatbuf.buffer_type == BufferType::ReadConstUbo); const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32]; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 2640030df..0463da8d3 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -195,7 +195,8 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f } Id EmitContext::GetBufferSize(const u32 sharp_idx) { - const auto& srt_flatbuf = buffers.back(); + // Can this be done with memory access? Like we do now with ReadConst + const auto& srt_flatbuf = buffers[flatbuf_index]; ASSERT(srt_flatbuf.buffer_type == BufferType::ReadConstUbo); const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32]; @@ -693,6 +694,12 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte case Shader::BufferType::ReadConstUbo: Name(id, "srt_flatbuf_ubo"); break; + case Shader::BufferType::BdaPagetable: + Name(id, "bda_pagetable"); + break; + case Shader::BufferType::FaultReadback: + Name(id, "fault_readback"); + break; case Shader::BufferType::SharedMemory: Name(id, "ssbo_shmem"); break; @@ -718,6 +725,15 @@ void EmitContext::DefineBuffers() { const auto buf_sharp = desc.GetSharp(info); const bool is_storage = desc.IsStorage(buf_sharp, profile); + // Set indexes for special buffers. + if (desc.buffer_type == BufferType::ReadConstUbo) { + flatbuf_index = buffers.size(); + } else if (desc.buffer_type == BufferType::BdaPagetable) { + bda_pagetable_index = buffers.size(); + } else if (desc.buffer_type == BufferType::FaultReadback) { + fault_readback_index = buffers.size(); + } + // Define aliases depending on the shader usage. auto& spv_buffer = buffers.emplace_back(binding.buffer++, desc.buffer_type); if (True(desc.used_types & IR::Type::U32)) { diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 38d55e0e4..61a839cda 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -269,6 +269,10 @@ public: boost::container::small_vector images; boost::container::small_vector samplers; + size_t flatbuf_index{}; + size_t bda_pagetable_index{}; + size_t fault_readback_index{}; + Id sampler_type{}; Id sampler_pointer_type{}; diff --git a/src/shader_recompiler/frontend/translate/scalar_memory.cpp b/src/shader_recompiler/frontend/translate/scalar_memory.cpp index 89426e080..60267d674 100644 --- a/src/shader_recompiler/frontend/translate/scalar_memory.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_memory.cpp @@ -39,21 +39,22 @@ void Translator::EmitScalarMemory(const GcnInst& inst) { void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) { const auto& smrd = inst.control.smrd; - const u32 dword_offset = [&] -> u32 { + const IR::ScalarReg sbase{inst.src[0].code * 2}; + const IR::U32 dword_offset = [&] -> IR::U32 { if (smrd.imm) { - return smrd.offset; + return ir.Imm32(smrd.offset); } if (smrd.offset == SQ_SRC_LITERAL) { - return inst.src[1].code; + return ir.Imm32(inst.src[1].code); } - UNREACHABLE(); + return ir.ShiftRightLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)), ir.Imm32(2)); }(); - const IR::ScalarReg sbase{inst.src[0].code * 2}; const IR::Value base = ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1)); IR::ScalarReg dst_reg{inst.dst[0].code}; for (u32 i = 0; i < num_dwords; i++) { - ir.SetScalarReg(dst_reg++, ir.ReadConst(base, ir.Imm32(dword_offset + i))); + IR::U32 index = ir.IAdd(dword_offset, ir.Imm32(i)); + ir.SetScalarReg(dst_reg++, ir.ReadConst(base, index)); } } diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 784f8b4d2..08b42b3a2 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -42,6 +42,8 @@ constexpr u32 NUM_TEXTURE_TYPES = 7; enum class BufferType : u32 { Guest, ReadConstUbo, + BdaPagetable, + FaultReadback, GdsBuffer, SharedMemory, }; @@ -209,6 +211,7 @@ struct Info { bool stores_tess_level_inner{}; bool translation_failed{}; bool has_readconst{}; + bool uses_dma{}; u8 mrt_mask{0u}; bool has_fetch_shader{false}; u32 fetch_shader_sgpr_base{0u}; diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index f53a0f4d4..48f7bacd0 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -87,6 +87,21 @@ void Visit(Info& info, const IR::Inst& inst) { }); info.has_readconst = true; } + if (!info.uses_dma) { + // For now, we only need U32, but we may need + // to add more types in the future for other porposes. + info.buffers.push_back({ + .used_types = IR::Type::U32, + .inline_cbuf = AmdGpu::Buffer::Null(), + .buffer_type = BufferType::BdaPagetable, + }); + info.buffers.push_back({ + .used_types = IR::Type::U32, + .inline_cbuf = AmdGpu::Buffer::Null(), + .buffer_type = BufferType::FaultReadback, + }); + info.uses_dma = true; + } break; case IR::Opcode::PackUfloat10_11_11: info.uses_pack_10_11_11 = true; diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index db35099f8..ea3ebdd89 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -379,10 +379,7 @@ void BufferCache::ImportMemory(u64 start, u64 end) { } WriteDataBuffer(bda_pagetable_buffer, range_start * sizeof(vk::DeviceAddress), bda_addrs.data(), bda_addrs.size() * sizeof(vk::DeviceAddress)); - { - std::scoped_lock lk{mutex}; - imported_buffers.emplace_back(std::move(buffer)); - } + imported_buffers.emplace_back(std::move(buffer)); // Mark the pages as covered imported_regions += range; } @@ -850,7 +847,7 @@ void BufferCache::WriteDataBuffer(Buffer& buffer, VAddr address, const void* val vk::Buffer src_buffer = staging_buffer.Handle(); if (num_bytes < StagingBufferSize) { const auto [staging, offset] = staging_buffer.Map(num_bytes); - std::memcpy(staging + offset, value, num_bytes); + std::memcpy(staging, value, num_bytes); copy.srcOffset = offset; staging_buffer.Commit(); } else { diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index b970d8acb..2963df34a 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -79,13 +79,13 @@ public: } /// Retrieves the device local DBA page table buffer. - [[nodiscard]] Buffer& GetBdaPageTableBuffer() noexcept { - return bda_pagetable_buffer; + [[nodiscard]] Buffer* GetBdaPageTableBuffer() noexcept { + return &bda_pagetable_buffer; } /// Retrieves the fault readback buffer. - [[nodiscard]] Buffer& GetFaultReadbackBuffer() noexcept { - return fault_readback_buffer; + [[nodiscard]] Buffer* GetFaultReadbackBuffer() noexcept { + return &fault_readback_buffer; } /// Retrieves the buffer with the specified id. diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index c91ece24a..c86eaaf2b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -459,9 +459,24 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { stage->PushUd(binding, push_data); BindBuffers(*stage, binding, push_data); BindTextures(*stage, binding); + + dma_enabled |= stage->uses_dma; } pipeline->BindResources(set_writes, buffer_barriers, push_data); + + if (dma_enabled) { + // First, import any queued host memory, then sync every mapped + // region that is cached on GPU memory. + buffer_cache.ImportQueuedRegions(); + { + std::shared_lock lock(mapped_ranges_mutex); + for (const auto& range : mapped_ranges) { + buffer_cache.SynchronizeRange(range.lower(), range.upper() - range.lower()); + } + } + } + return true; } @@ -526,6 +541,12 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding const u64 offset = vk_buffer.Copy(stage.flattened_ud_buf.data(), ubo_size, instance.UniformMinAlignment()); buffer_infos.emplace_back(vk_buffer.Handle(), offset, ubo_size); + } else if (desc.buffer_type == Shader::BufferType::BdaPagetable) { + const auto* bda_buffer = buffer_cache.GetBdaPageTableBuffer(); + buffer_infos.emplace_back(bda_buffer->Handle(), 0, bda_buffer->SizeBytes()); + } else if (desc.buffer_type == Shader::BufferType::FaultReadback) { + const auto* fault_buffer = buffer_cache.GetFaultReadbackBuffer(); + buffer_infos.emplace_back(fault_buffer->Handle(), 0, fault_buffer->SizeBytes()); } else if (desc.buffer_type == Shader::BufferType::SharedMemory) { auto& lds_buffer = buffer_cache.GetStreamBuffer(); const auto& cs_program = liverpool->GetCsRegs(); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 4e0ed0996..c4b9040f6 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -95,6 +95,13 @@ private: texture_cache.GetImage(image_id).binding.Reset(); } bound_images.clear(); + + if (dma_enabled) { + dma_enabled = false; + // If a shader accesses a buffer that is not cached, we need to + // cache it. + buffer_cache.CreateFaultBuffers(); + } } bool IsComputeMetaClear(const Pipeline* pipeline); @@ -126,6 +133,7 @@ private: boost::container::static_vector buffer_bindings; using ImageBindingInfo = std::pair; boost::container::static_vector image_bindings; + bool dma_enabled{false}; }; } // namespace Vulkan