From 3ece6fc8d368506d33f9e91d9aa472f0328f850b Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Fri, 18 Oct 2024 22:02:06 +0300 Subject: [PATCH] liverpool: More dma data impl --- src/core/address_space.h | 4 ++ src/core/memory.cpp | 11 ++++ src/core/memory.h | 2 + src/video_core/amdgpu/liverpool.cpp | 41 +++++++++++- src/video_core/amdgpu/pm4_cmds.h | 29 ++++++--- src/video_core/buffer_cache/buffer_cache.cpp | 62 +++++++++---------- src/video_core/buffer_cache/buffer_cache.h | 15 ++--- .../renderer_vulkan/vk_compute_pipeline.cpp | 19 +++--- .../renderer_vulkan/vk_graphics_pipeline.cpp | 15 +++-- .../renderer_vulkan/vk_pipeline_common.cpp | 37 +++++------ .../renderer_vulkan/vk_rasterizer.cpp | 4 +- .../renderer_vulkan/vk_rasterizer.h | 2 +- 12 files changed, 157 insertions(+), 84 deletions(-) diff --git a/src/core/address_space.h b/src/core/address_space.h index 3233c7588..7ccc2cd1e 100644 --- a/src/core/address_space.h +++ b/src/core/address_space.h @@ -45,6 +45,10 @@ public: explicit AddressSpace(); ~AddressSpace(); + [[nodiscard]] u8* BackingBase() const noexcept { + return backing_base; + } + [[nodiscard]] VAddr SystemManagedVirtualBase() noexcept { return reinterpret_cast(system_managed_base); } diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 644f9389f..3c1cb0a98 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -54,6 +54,17 @@ void MemoryManager::SetupMemoryRegions(u64 flexible_size) { total_flexible_size, total_direct_size); } +bool MemoryManager::TryWriteBacking(void* address, const void* data, u32 num_bytes) { + const VAddr virtual_addr = std::bit_cast(address); + const auto& vma = FindVMA(virtual_addr)->second; + if (vma.type != VMAType::Direct) { + return false; + } + u8* backing = impl.BackingBase() + vma.phys_base + (virtual_addr - vma.base); + memcpy(backing, data, num_bytes); + return true; +} + PAddr MemoryManager::PoolExpand(PAddr search_start, PAddr search_end, size_t size, u64 alignment) { std::scoped_lock lk{mutex}; diff --git a/src/core/memory.h b/src/core/memory.h index 320aa367a..286f1c979 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -149,6 +149,8 @@ public: return impl.SystemReservedVirtualBase(); } + bool TryWriteBacking(void* address, const void* data, u32 num_bytes); + void SetupMemoryRegions(u64 flexible_size); PAddr PoolExpand(PAddr search_start, PAddr search_end, size_t size, u64 alignment); diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 7b4727088..d26ed96cc 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -8,6 +8,7 @@ #include "common/thread.h" #include "core/debug_state.h" #include "core/libraries/videoout/driver.h" +#include "core/memory.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/pm4_cmds.h" #include "video_core/renderdoc.h" @@ -504,7 +505,12 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); - event_eos->SignalFence(); + event_eos->SignalFence([](void* address, u64 data, u32 num_bytes) { + auto* memory = Core::Memory::Instance(); + if (!memory->TryWriteBacking(address, &data, num_bytes)) { + memcpy(address, &data, num_bytes); + } + }); if (event_eos->command == PM4CmdEventWriteEos::Command::GdsStore) { ASSERT(event_eos->size == 1); if (rasterizer) { @@ -517,13 +523,42 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); - event_eop->SignalFence(); + event_eop->SignalFence([](void* address, u64 data, u32 num_bytes) { + auto* memory = Core::Memory::Instance(); + if (!memory->TryWriteBacking(address, &data, num_bytes)) { + memcpy(address, &data, num_bytes); + } + }); break; } case PM4ItOpcode::DmaData: { const auto* dma_data = reinterpret_cast(header); + if (dma_data->dst_addr_lo == 0x3022C) { + break; + } if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Gds) { - rasterizer->InlineDataToGds(dma_data->dst_addr_lo, dma_data->data); + rasterizer->InlineData(dma_data->dst_addr_lo, &dma_data->data, sizeof(u32), + true); + } else if (dma_data->src_sel == DmaDataSrc::Memory && + dma_data->dst_sel == DmaDataDst::Gds) { + rasterizer->InlineData(dma_data->dst_addr_lo, + dma_data->SrcAddress(), + dma_data->NumBytes(), true); + } else if (dma_data->src_sel == DmaDataSrc::Data && + dma_data->dst_sel == DmaDataDst::Memory) { + rasterizer->InlineData(dma_data->DstAddress(), &dma_data->data, + sizeof(u32), false); + } else if (dma_data->src_sel == DmaDataSrc::Gds && + dma_data->dst_sel == DmaDataDst::Memory) { + LOG_WARNING(Render_Vulkan, "GDS memory read"); + } else if (dma_data->src_sel == DmaDataSrc::Memory && + dma_data->dst_sel == DmaDataDst::Memory) { + rasterizer->InlineData(dma_data->DstAddress(), + dma_data->SrcAddress(), + dma_data->NumBytes(), false); + } else { + UNREACHABLE_MSG("WriteData src_sel = {}, dst_sel = {}", + u32(dma_data->src_sel.Value()), u32(dma_data->dst_sel.Value())); } break; } diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index a7a862ea3..fb71fc832 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -313,25 +313,26 @@ struct PM4CmdEventWriteEop { return data_lo | u64(data_hi) << 32; } - void SignalFence() const { + void SignalFence(auto&& write_mem) const { + u32* address = Address(); switch (data_sel.Value()) { case DataSelect::None: { break; } case DataSelect::Data32Low: { - *Address() = DataDWord(); + write_mem(address, DataDWord(), sizeof(u32)); break; } case DataSelect::Data64: { - *Address() = DataQWord(); + write_mem(address, DataQWord(), sizeof(u64)); break; } case DataSelect::GpuClock64: { - *Address() = GetGpuClock64(); + write_mem(address, GetGpuClock64(), sizeof(u64)); break; } case DataSelect::PerfCounter: { - *Address() = Common::FencedRDTSC(); + write_mem(address, Common::FencedRDTSC(), sizeof(u64)); break; } default: { @@ -401,6 +402,20 @@ struct PM4DmaData { u32 dst_addr_lo; u32 dst_addr_hi; u32 command; + + template + T SrcAddress() const { + return reinterpret_cast(src_addr_lo | u64(src_addr_hi) << 32); + } + + template + T DstAddress() const { + return reinterpret_cast(dst_addr_lo | u64(dst_addr_hi) << 32); + } + + u32 NumBytes() const noexcept { + return command & 0x1fffff; + } }; struct PM4CmdWaitRegMem { @@ -534,11 +549,11 @@ struct PM4CmdEventWriteEos { return this->data; } - void SignalFence() const { + void SignalFence(auto&& write_mem) const { const auto cmd = command.Value(); switch (cmd) { case Command::SignalFence: { - *Address() = DataDWord(); + write_mem(Address(), DataDWord(), sizeof(u32)); break; } case Command::GdsStore: { diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index e4bdb5339..f665ba512 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -20,7 +20,7 @@ static constexpr size_t StagingBufferSize = 1_GB; static constexpr size_t UboStreamBufferSize = 64_MB; BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, - const AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_, + AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_, PageManager& tracker_) : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, texture_cache{texture_cache_}, tracker{tracker_}, @@ -70,11 +70,10 @@ void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) { void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size) { boost::container::small_vector copies; u64 total_size_bytes = 0; - u64 largest_copy = 0; memory_tracker.ForEachDownloadRange( device_addr, size, [&](u64 device_addr_out, u64 range_size) { const VAddr buffer_addr = buffer.CpuAddr(); - const auto add_download = [&](VAddr start, VAddr end, u64) { + const auto add_download = [&](VAddr start, VAddr end) { const u64 new_offset = start - buffer_addr; const u64 new_size = end - start; copies.push_back(vk::BufferCopy{ @@ -82,12 +81,10 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si .dstOffset = total_size_bytes, .size = new_size, }); - // Align up to avoid cache conflicts - constexpr u64 align = 64ULL; - constexpr u64 mask = ~(align - 1ULL); - total_size_bytes += (new_size + align - 1) & mask; - largest_copy = std::max(largest_copy, new_size); + total_size_bytes += new_size; }; + gpu_modified_ranges.ForEachInRange(device_addr_out, range_size, add_download); + gpu_modified_ranges.Subtract(device_addr_out, range_size); }); if (total_size_bytes == 0) { return; @@ -181,6 +178,9 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) { .divisor = 1, }); } + if (ranges.empty()) { + return false; + } std::ranges::sort(ranges, [](const BufferRange& lhv, const BufferRange& rhv) { return lhv.base_address < rhv.base_address; @@ -269,34 +269,45 @@ u32 BufferCache::BindIndexBuffer(bool& is_indexed, u32 index_offset) { return regs.num_indices; } -void BufferCache::InlineDataToGds(u32 gds_offset, u32 value) { - ASSERT_MSG(gds_offset % 4 == 0, "GDS offset must be dword aligned"); +void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) { + ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned"); + if (!is_gds && !IsRegionRegistered(address, num_bytes)) { + memcpy(std::bit_cast(address), value, num_bytes); + return; + } scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); + const Buffer* buffer = [&] { + if (is_gds) { + return &gds_buffer; + } + const BufferId buffer_id = FindBuffer(address, num_bytes); + return &slot_buffers[buffer_id]; + }(); const vk::BufferMemoryBarrier2 buf_barrier = { .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, - .buffer = gds_buffer.Handle(), - .offset = gds_offset, - .size = sizeof(u32), + .buffer = buffer->Handle(), + .offset = buffer->Offset(address), + .size = num_bytes, }; cmdbuf.pipelineBarrier2(vk::DependencyInfo{ .dependencyFlags = vk::DependencyFlagBits::eByRegion, .bufferMemoryBarrierCount = 1, .pBufferMemoryBarriers = &buf_barrier, }); - cmdbuf.updateBuffer(gds_buffer.Handle(), gds_offset, sizeof(u32), &value); + cmdbuf.updateBuffer(buffer->Handle(), buf_barrier.offset, num_bytes, value); } std::pair BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written, bool is_texel_buffer, BufferId buffer_id) { + // For small uniform buffers that have not been modified by gpu + // use device local stream buffer to reduce renderpass breaks. static constexpr u64 StreamThreshold = CACHING_PAGESIZE; const bool is_gpu_dirty = memory_tracker.IsRegionGpuModified(device_addr, size); if (!is_written && size <= StreamThreshold && !is_gpu_dirty) { - // For small uniform buffers that have not been modified by gpu - // use device local stream buffer to reduce renderpass breaks. const u64 offset = stream_buffer.Copy(device_addr, size, instance.UniformMinAlignment()); return {&stream_buffer, offset}; } @@ -308,7 +319,7 @@ std::pair BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b SynchronizeBuffer(buffer, device_addr, size, is_texel_buffer); if (is_written) { memory_tracker.MarkRegionAsGpuModified(device_addr, size); - gpu_regions.Add(device_addr, size); + gpu_modified_ranges.Add(device_addr, size); } return {&buffer, buffer.Offset(device_addr)}; } @@ -477,7 +488,7 @@ void BufferCache::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {}); - DeleteBuffer(overlap_id, true); + DeleteBuffer(overlap_id); } BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) { @@ -532,7 +543,7 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, u64 total_size_bytes = 0; u64 largest_copy = 0; VAddr buffer_start = buffer.CpuAddr(); - const auto add_copy = [&](VAddr device_addr_out, u64 range_size) { + memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) { copies.push_back(vk::BufferCopy{ .srcOffset = total_size_bytes, .dstOffset = device_addr_out - buffer_start, @@ -540,13 +551,6 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, }); total_size_bytes += range_size; largest_copy = std::max(largest_copy, range_size); - }; - memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) { - bool has_gpu = false; - gpu_regions.ForEachInRange(device_addr_out, range_size, [&](VAddr, VAddr) { has_gpu = true; }); - add_copy(device_addr_out, range_size); - // Prevent uploading to gpu modified regions. - // gpu_modified_ranges.ForEachNotInRange(device_addr_out, range_size, add_copy); }); SCOPE_EXIT { if (is_texel_buffer) { @@ -659,12 +663,8 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, return true; } -void BufferCache::DeleteBuffer(BufferId buffer_id, bool do_not_mark) { - // Mark the whole buffer as CPU written to stop tracking CPU writes +void BufferCache::DeleteBuffer(BufferId buffer_id) { Buffer& buffer = slot_buffers[buffer_id]; - if (!do_not_mark) { - memory_tracker.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes()); - } Unregister(buffer_id); scheduler.DeferOperation([this, buffer_id] { slot_buffers.erase(buffer_id); }); buffer.is_deleted = true; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 510f2525c..6710c8615 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -54,7 +54,7 @@ public: public: explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, - const AmdGpu::Liverpool* liverpool, TextureCache& texture_cache, + AmdGpu::Liverpool* liverpool, TextureCache& texture_cache, PageManager& tracker); ~BufferCache(); @@ -81,12 +81,13 @@ public: /// Bind host index buffer for the current draw. u32 BindIndexBuffer(bool& is_indexed, u32 index_offset); - /// Writes a value to GDS buffer. - void InlineDataToGds(u32 gds_offset, u32 value); + /// Writes a value to GPU buffer. + void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); /// Obtains a buffer for the specified region. [[nodiscard]] std::pair ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written, - bool is_texel_buffer = false, BufferId buffer_id = {}); + bool is_texel_buffer = false, + BufferId buffer_id = {}); /// Attempts to obtain a buffer without modifying the cache contents. [[nodiscard]] std::pair ObtainViewBuffer(VAddr gpu_addr, u32 size); @@ -139,11 +140,11 @@ private: bool SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size); - void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false); + void DeleteBuffer(BufferId buffer_id); const Vulkan::Instance& instance; Vulkan::Scheduler& scheduler; - const AmdGpu::Liverpool* liverpool; + AmdGpu::Liverpool* liverpool; TextureCache& texture_cache; PageManager& tracker; StreamBuffer staging_buffer; @@ -151,7 +152,7 @@ private: Buffer gds_buffer; std::mutex mutex; Common::SlotVector slot_buffers; - RangeSet gpu_regions; + RangeSet gpu_modified_ranges; vk::BufferView null_buffer_view; MemoryTracker memory_tracker; PageTable page_table; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index b65ec4481..7122ca134 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -119,14 +119,15 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, info->PushUd(binding, push_data); + buffer_infos.clear(); + buffer_views.clear(); + image_infos.clear(); + // Most of the time when a metadata is updated with a shader it gets cleared. It means // we can skip the whole dispatch and update the tracked state instead. Also, it is not // intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we // will need its full emulation anyways. For cases of metadata read a warning will be logged. - for (const auto& desc : info->buffers) { - if (desc.is_gds_buffer) { - continue; - } + for (const auto& desc : info->texture_buffers) { const VAddr address = desc.GetSharp(*info).base_address; if (desc.is_written) { if (texture_cache.TouchMeta(address, true)) { @@ -140,14 +141,15 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, } } - BindBuffers(buffer_cache, texture_cache, *info, binding, push_data, - set_writes, buffer_barriers); + BindBuffers(buffer_cache, texture_cache, *info, binding, push_data, set_writes, + buffer_barriers); BindTextures(texture_cache, *info, binding, set_writes); if (set_writes.empty()) { return false; } + const auto cmdbuf = scheduler.CommandBuffer(); if (!buffer_barriers.empty()) { const auto dependencies = vk::DependencyInfo{ @@ -158,8 +160,10 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, scheduler.EndRendering(); cmdbuf.pipelineBarrier2(dependencies); } + cmdbuf.pushConstants(*pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0u, sizeof(push_data), &push_data); + // Bind descriptor set. if (uses_push_descriptors) { cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0, @@ -171,8 +175,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, set_write.dstSet = desc_set; } instance.GetDevice().updateDescriptorSets(set_writes, {}); - cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0, desc_set, - {}); + cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0, desc_set, {}); return true; } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 48854b418..f6d0b49b6 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -5,8 +5,8 @@ #include #include -#include "common/scope_exit.h" #include "common/assert.h" +#include "common/scope_exit.h" #include "video_core/amdgpu/resource.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" @@ -389,6 +389,10 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, Shader::PushData push_data{}; Shader::Backend::Bindings binding{}; + buffer_infos.clear(); + buffer_views.clear(); + image_infos.clear(); + for (const auto* stage : stages) { if (!stage) { continue; @@ -399,8 +403,8 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, } stage->PushUd(binding, push_data); - BindBuffers(buffer_cache, texture_cache, *stage, binding, push_data, - set_writes, buffer_barriers); + BindBuffers(buffer_cache, texture_cache, *stage, binding, push_data, set_writes, + buffer_barriers); BindTextures(texture_cache, *stage, binding, set_writes); } @@ -414,6 +418,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, if (set_writes.empty()) { return; } + if (!buffer_barriers.empty()) { const auto dependencies = vk::DependencyInfo{ .dependencyFlags = vk::DependencyFlagBits::eByRegion, @@ -423,6 +428,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, scheduler.EndRendering(); cmdbuf.pipelineBarrier2(dependencies); } + // Bind descriptor set. if (uses_push_descriptors) { cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0, @@ -434,8 +440,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, set_write.dstSet = desc_set; } instance.GetDevice().updateDescriptorSets(set_writes, {}); - cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0, - desc_set, {}); + cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0, desc_set, {}); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_common.cpp b/src/video_core/renderer_vulkan/vk_pipeline_common.cpp index 8af7ec55e..efe2838e4 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_common.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_common.cpp @@ -4,11 +4,11 @@ #include #include "shader_recompiler/info.h" +#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_pipeline_common.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/texture_cache/texture_cache.h" -#include "video_core/buffer_cache/buffer_cache.h" namespace Vulkan { @@ -22,15 +22,14 @@ Pipeline::Pipeline(const Instance& instance_, Scheduler& scheduler_, DescriptorH Pipeline::~Pipeline() = default; -void Pipeline::BindBuffers(VideoCore::BufferCache& buffer_cache, VideoCore::TextureCache& texture_cache, - const Shader::Info& stage, - Shader::Backend::Bindings& binding, Shader::PushData& push_data, - DescriptorWrites& set_writes, BufferBarriers& buffer_barriers) const { +void Pipeline::BindBuffers(VideoCore::BufferCache& buffer_cache, + VideoCore::TextureCache& texture_cache, const Shader::Info& stage, + Shader::Backend::Bindings& binding, Shader::PushData& push_data, + DescriptorWrites& set_writes, BufferBarriers& buffer_barriers) const { using BufferBindingInfo = std::pair; static boost::container::static_vector buffer_bindings; buffer_bindings.clear(); - buffer_infos.clear(); for (const auto& desc : stage.buffers) { const auto vsharp = desc.GetSharp(stage); @@ -46,7 +45,6 @@ void Pipeline::BindBuffers(VideoCore::BufferCache& buffer_cache, VideoCore::Text static boost::container::static_vector texbuffer_bindings; texbuffer_bindings.clear(); - buffer_views.clear(); for (const auto& desc : stage.texture_buffers) { const auto vsharp = desc.GetSharp(stage); @@ -75,16 +73,16 @@ void Pipeline::BindBuffers(VideoCore::BufferCache& buffer_cache, VideoCore::Text buffer_infos.emplace_back(null_buffer.Handle(), 0, VK_WHOLE_SIZE); } } else { - const auto [vk_buffer, offset] = - buffer_cache.ObtainBuffer(vsharp.base_address, vsharp.GetSize(), desc.is_written, - false, buffer_id); + const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer( + vsharp.base_address, vsharp.GetSize(), desc.is_written, false, buffer_id); const u32 alignment = is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment(); const u32 offset_aligned = Common::AlignDown(offset, alignment); const u32 adjust = offset - offset_aligned; ASSERT(adjust % 4 == 0); push_data.AddOffset(binding.buffer, adjust); - buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, vsharp.GetSize() + adjust); + buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, + vsharp.GetSize() + adjust); } set_writes.push_back({ @@ -107,9 +105,8 @@ void Pipeline::BindBuffers(VideoCore::BufferCache& buffer_cache, VideoCore::Text vk::BufferView& buffer_view = buffer_views.emplace_back(null_buffer_view); if (buffer_id) { const u32 alignment = instance.TexelBufferMinAlignment(); - const auto [vk_buffer, offset] = - buffer_cache.ObtainBuffer(vsharp.base_address, vsharp.GetSize(), desc.is_written, true, - buffer_id); + const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer( + vsharp.base_address, vsharp.GetSize(), desc.is_written, true, buffer_id); const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3; ASSERT_MSG(fmt_stride == vsharp.GetStride(), "Texel buffer stride must match format stride"); @@ -117,12 +114,13 @@ void Pipeline::BindBuffers(VideoCore::BufferCache& buffer_cache, VideoCore::Text const u32 adjust = offset - offset_aligned; ASSERT(adjust % fmt_stride == 0); push_data.AddOffset(binding.buffer, adjust / fmt_stride); - buffer_view = vk_buffer->View(offset_aligned, vsharp.GetSize() + adjust, desc.is_written, - vsharp.GetDataFmt(), vsharp.GetNumberFmt()); + buffer_view = + vk_buffer->View(offset_aligned, vsharp.GetSize() + adjust, desc.is_written, + vsharp.GetDataFmt(), vsharp.GetNumberFmt()); if (auto barrier = - vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite - : vk::AccessFlagBits2::eShaderRead, - vk::PipelineStageFlagBits2::eComputeShader)) { + vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite + : vk::AccessFlagBits2::eShaderRead, + vk::PipelineStageFlagBits2::eComputeShader)) { buffer_barriers.emplace_back(*barrier); } if (desc.is_written) { @@ -151,7 +149,6 @@ void Pipeline::BindTextures(VideoCore::TextureCache& texture_cache, const Shader static boost::container::static_vector image_bindings; image_bindings.clear(); - image_infos.clear(); for (const auto& image_desc : stage.images) { const auto tsharp = image_desc.GetSharp(stage); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 88e91696a..14a73261d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -258,8 +258,8 @@ void Rasterizer::BeginRendering(const GraphicsPipeline& pipeline) { scheduler.BeginRendering(state); } -void Rasterizer::InlineDataToGds(u32 gds_offset, u32 value) { - buffer_cache.InlineDataToGds(gds_offset, value); +void Rasterizer::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) { + buffer_cache.InlineData(address, value, num_bytes, is_gds); } u32 Rasterizer::ReadDataFromGds(u32 gds_offset) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index bc14f39a4..d5cfbfd66 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -42,7 +42,7 @@ public: void ScopedMarkerInsert(const std::string_view& str); void ScopedMarkerInsertColor(const std::string_view& str, const u32 color); - void InlineDataToGds(u32 gds_offset, u32 value); + void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); u32 ReadDataFromGds(u32 gsd_offset); void InvalidateMemory(VAddr addr, u64 size); void MapMemory(VAddr addr, u64 size);