Sync the whole buffer insteed of only the range

This commit is contained in:
Lander Gallastegi 2025-05-05 22:23:29 +02:00
parent c6757763e4
commit 01a0e00dbb
4 changed files with 22 additions and 15 deletions

View File

@ -909,18 +909,18 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
return true; return true;
} }
void BufferCache::SynchronizeRange(VAddr device_addr, u64 size) { void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) {
if (device_addr == 0) { if (device_addr == 0) {
return; return;
} }
VAddr device_addr_end = device_addr + size; VAddr device_addr_end = device_addr + size;
ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) { ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
VAddr buffer_start = buffer.CpuAddr(); // Note that this function synchronizes the whole buffer, not just the range.
VAddr buffer_end = buffer_start + buffer.SizeBytes(); // This is because this function is used to sync buffers before using a
VAddr start = std::max(buffer_start, device_addr); // shader that uses DMA.
VAddr end = std::min(buffer_end, device_addr_end); // The ideal solution would be to sync all the mapped regions but it is
u32 size = static_cast<u32>(end - start); // very slow.
SynchronizeBuffer(buffer, start, size, false); SynchronizeBuffer(buffer, buffer.CpuAddr(), buffer.SizeBytes(), false);
}); });
} }

View File

@ -137,7 +137,7 @@ public:
void ProcessFaultBuffer(); void ProcessFaultBuffer();
/// Synchronizes all buffers in the specified range. /// Synchronizes all buffers in the specified range.
void SynchronizeRange(VAddr device_addr, u64 size); void SynchronizeBuffersInRange(VAddr device_addr, u64 size);
/// Record memory barrier. Used for buffers when accessed via BDA. /// Record memory barrier. Used for buffers when accessed via BDA.
void MemoryBarrier(); void MemoryBarrier();

View File

@ -474,14 +474,15 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
pipeline->BindResources(set_writes, buffer_barriers, push_data); pipeline->BindResources(set_writes, buffer_barriers, push_data);
if (uses_dma && !fault_process_pending) { if (uses_dma) {
// We only use fault buffer for DMA right now. // We only use fault buffer for DMA right now.
{ {
std::shared_lock lock{dma_sync_mapped_ranges_mutex}; std::shared_lock lock{dma_sync_mapped_ranges_mutex};
for (const auto& range : dma_sync_mapped_ranges) { for (const auto& range : dma_sync_mapped_ranges) {
buffer_cache.SynchronizeRange(range.lower(), range.upper() - range.lower()); buffer_cache.SynchronizeBuffersInRange(range.lower(), range.upper() - range.lower());
} }
} }
buffer_cache.MemoryBarrier();
} }
fault_process_pending |= uses_dma; fault_process_pending |= uses_dma;

View File

@ -91,11 +91,17 @@ void Scheduler::Wait(u64 tick) {
} }
master_semaphore.Wait(tick); master_semaphore.Wait(tick);
// Apply pending operations until the wait tick // TODO: We should be applyting pending operations here because that gives us
while (!pending_ops.empty() && pending_ops.front().gpu_tick <= tick) { // the ability to use mapped regions on stream buffers in deferred operations.
pending_ops.front().callback(); // We don't do that right now because it might introduce varioations in the
pending_ops.pop(); // timing and, since we don't sync the GPU some games might be affected by that.
} // It shouldn't be an issue right now, because we only use mapped regions in
// deferred operations to download faulted addresses. That is only 8KB every tick
// and the stream buffer is 256MB. GPU doesn't go that behind.
// while (!pending_ops.empty() && pending_ops.front().gpu_tick <= tick) {
// pending_ops.front().callback();
// pending_ops.pop();
// }
} }
void Scheduler::AllocateWorkerCommandBuffers() { void Scheduler::AllocateWorkerCommandBuffers() {