Sync the whole buffer insteed of only the range

2025-07-27 20:44:28 +00:00 · 2025-05-05 22:23:29 +02:00 · 2025-05-05 22:23:29 +02:00 · 01a0e00dbb
commit 01a0e00dbb
parent c6757763e4
4 changed files with 22 additions and 15 deletions
--- a/src/video_core/buffer_cache/buffer_cache.cpp
+++ b/src/video_core/buffer_cache/buffer_cache.cpp
@ -909,18 +909,18 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
    return true;
 }

-void BufferCache::SynchronizeRange(VAddr device_addr, u64 size) {
+void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) {
    if (device_addr == 0) {
        return;
    }
    VAddr device_addr_end = device_addr + size;
    ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
-        VAddr buffer_start = buffer.CpuAddr();
-        VAddr buffer_end = buffer_start + buffer.SizeBytes();
-        VAddr start = std::max(buffer_start, device_addr);
-        VAddr end = std::min(buffer_end, device_addr_end);
-        u32 size = static_cast<u32>(end - start);
-        SynchronizeBuffer(buffer, start, size, false);
+        // Note that this function synchronizes the whole buffer, not just the range.
+        // This is because this function is used to sync buffers before using a
+        // shader that uses DMA.
+        // The ideal solution would be to sync all the mapped regions but it is
+        // very slow.
+        SynchronizeBuffer(buffer, buffer.CpuAddr(), buffer.SizeBytes(), false);
    });
 }

--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@ -137,7 +137,7 @@ public:
    void ProcessFaultBuffer();

    /// Synchronizes all buffers in the specified range.
-    void SynchronizeRange(VAddr device_addr, u64 size);
+    void SynchronizeBuffersInRange(VAddr device_addr, u64 size);

    /// Record memory barrier. Used for buffers when accessed via BDA.
    void MemoryBarrier();
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@ -474,14 +474,15 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {

    pipeline->BindResources(set_writes, buffer_barriers, push_data);

-    if (uses_dma && !fault_process_pending) {
+    if (uses_dma) {
        // We only use fault buffer for DMA right now.
        {
            std::shared_lock lock{dma_sync_mapped_ranges_mutex};
            for (const auto& range : dma_sync_mapped_ranges) {
-                buffer_cache.SynchronizeRange(range.lower(), range.upper() - range.lower());
+                buffer_cache.SynchronizeBuffersInRange(range.lower(), range.upper() - range.lower());
            }
        }
+        buffer_cache.MemoryBarrier();
    }

    fault_process_pending |= uses_dma;
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@ -91,11 +91,17 @@ void Scheduler::Wait(u64 tick) {
    }
    master_semaphore.Wait(tick);

-    // Apply pending operations until the wait tick
-    while (!pending_ops.empty() && pending_ops.front().gpu_tick <= tick) {
-        pending_ops.front().callback();
-        pending_ops.pop();
-    }
+    // TODO: We should be applyting pending operations here because that gives us
+    // the ability to use mapped regions on stream buffers in deferred operations.
+    // We don't do that right now because it might introduce varioations in the
+    // timing and, since we don't sync the GPU some games might be affected by that.
+    // It shouldn't be an issue right now, because we only use mapped regions in
+    // deferred operations to download faulted addresses. That is only 8KB every tick
+    // and the stream buffer is 256MB. GPU doesn't go that behind.
+    // while (!pending_ops.empty() && pending_ops.front().gpu_tick <= tick) {
+    //     pending_ops.front().callback();
+    //     pending_ops.pop();
+    // }
 }

 void Scheduler::AllocateWorkerCommandBuffers() {