From 21f5d8e60822a039494454dfe495ded15f55b1e3 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Sun, 22 Jun 2025 22:37:30 +0300 Subject: [PATCH] buffer_cache: Simplify download copy generation --- src/video_core/amdgpu/liverpool.cpp | 4 +- src/video_core/buffer_cache/buffer_cache.cpp | 47 +++++++------------ src/video_core/buffer_cache/buffer_cache.h | 4 +- .../renderer_vulkan/vk_rasterizer.cpp | 4 +- .../renderer_vulkan/vk_rasterizer.h | 2 +- 5 files changed, 23 insertions(+), 38 deletions(-) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 0db1a71d4..5394ff5ff 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -621,7 +621,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); if (rasterizer) { - rasterizer->CommitAsyncFlushes(); + rasterizer->CommitPendingDownloads(); } ++fence_tick; event_eop->SignalFence([](void* address, u64 data, u32 num_bytes) { @@ -1023,7 +1023,7 @@ Liverpool::Task Liverpool::ProcessCompute(const u32* acb, u32 acb_dwords, u32 vq const auto* release_mem = reinterpret_cast(header); ++fence_tick; if (rasterizer) { - rasterizer->CommitAsyncFlushes(); + rasterizer->CommitPendingDownloads(); } release_mem->SignalFence(static_cast(queue.pipe_id)); break; diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index c5d415c2c..33746a223 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -187,13 +187,12 @@ void BufferCache::DownloadBufferMemory(const Buffer& buffer, VAddr device_addr, } } -bool BufferCache::CommitAsyncFlushes() { +bool BufferCache::CommitPendingDownloads() { if (pending_download_ranges.Empty()) { return false; } using BufferCopies = boost::container::small_vector; - boost::container::small_vector copies; - boost::container::small_vector buffer_ids; + tsl::robin_map copies; u64 total_size_bytes = 0; pending_download_ranges.ForEach([&](VAddr interval_lower, VAddr interval_upper) { const std::size_t size = interval_upper - interval_lower; @@ -203,27 +202,16 @@ bool BufferCache::CommitAsyncFlushes() { const VAddr buffer_end = buffer_start + buffer.SizeBytes(); const VAddr new_start = std::max(buffer_start, device_addr); const VAddr new_end = std::min(buffer_end, device_addr + size); - auto& buffer_copies = copies.emplace_back(); - buffer_ids.emplace_back(buffer_id); - memory_tracker.ForEachDownloadRange(new_start, new_end - new_start, - [&](u64 device_addr_out, u64 range_size) { - const VAddr buffer_addr = buffer.CpuAddr(); - const auto add_download = [&](VAddr start, VAddr end) { - const u64 new_offset = start - buffer_addr; - const u64 new_size = end - start; - buffer_copies.emplace_back(new_offset, total_size_bytes, new_size); - // Align up to avoid cache conflicts - constexpr u64 align = std::hardware_destructive_interference_size; - constexpr u64 mask = ~(align - 1ULL); - total_size_bytes += (new_size + align - 1) & mask; - }; - gpu_modified_ranges.ForEachInRange(device_addr_out, range_size, - add_download); - }); + const u64 new_size = new_end - new_start; + copies[buffer_id].emplace_back(new_start - buffer_start, total_size_bytes, new_size); + // Align up to avoid cache conflicts + constexpr u64 align = std::hardware_destructive_interference_size; + constexpr u64 mask = ~(align - 1ULL); + total_size_bytes += (new_size + align - 1) & mask; }); }); pending_download_ranges.Clear(); - if (copies.empty()) { + if (total_size_bytes == 0) { return false; } const auto [download, offset] = download_buffer.Map(total_size_bytes); @@ -239,26 +227,23 @@ bool BufferCache::CommitAsyncFlushes() { .memoryBarrierCount = 1u, .pMemoryBarriers = &read_barrier, }); - for (s32 i = 0; i < buffer_ids.size(); ++i) { - auto& buffer_copies = copies[i]; + for (auto it = copies.begin(); it != copies.end(); ++it) { + auto& buffer_copies = it.value(); if (buffer_copies.empty()) { continue; } for (auto& copy : buffer_copies) { copy.dstOffset += offset; } - const BufferId buffer_id = buffer_ids[i]; + const BufferId buffer_id = it.key(); Buffer& buffer = slot_buffers[buffer_id]; cmdbuf.copyBuffer(buffer.Handle(), download_buffer.Handle(), buffer_copies); } - scheduler.DeferOperation([this, download, offset, buffer_ids, copies]() { + scheduler.DeferOperation([this, download, offset, copies = std::move(copies)]() { auto* memory = Core::Memory::Instance(); - for (s32 i = 0; i < buffer_ids.size(); ++i) { - auto& buffer_copies = copies[i]; - if (buffer_copies.empty()) { - continue; - } - const BufferId buffer_id = buffer_ids[i]; + for (auto it = copies.begin(); it != copies.end(); ++it) { + auto& buffer_copies = it.value(); + const BufferId buffer_id = it.key(); Buffer& buffer = slot_buffers[buffer_id]; for (auto& copy : buffer_copies) { const VAddr copy_device_addr = buffer.CpuAddr() + copy.srcOffset; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 8437b02f1..a1453af06 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -126,8 +126,8 @@ public: /// Performs buffer to buffer data copy on the GPU. void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds); - /// Schedules all GPU modified ranges since last commit to be copied back the host memory. - bool CommitAsyncFlushes(); + /// Schedules pending GPU modified ranges since last commit to be copied back the host memory. + bool CommitPendingDownloads(); /// Obtains a buffer for the specified region. [[nodiscard]] std::pair ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 18673390c..21b6ee057 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -60,9 +60,9 @@ void Rasterizer::CpSync() { vk::DependencyFlagBits::eByRegion, ib_barrier, {}, {}); } -bool Rasterizer::CommitAsyncFlushes() { +bool Rasterizer::CommitPendingDownloads() { scheduler.PopPendingOperations(); - return buffer_cache.CommitAsyncFlushes(); + return buffer_cache.CommitPendingDownloads(); } bool Rasterizer::FilterDraw() { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index d81459be6..1ae615b52 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -65,7 +65,7 @@ public: void UnmapMemory(VAddr addr, u64 size); void CpSync(); - bool CommitAsyncFlushes(); + bool CommitPendingDownloads(); u64 Flush(); void Finish(); void ProcessFaults();