Sync the whole buffer insteed of only the range

This commit is contained in:
Lander Gallastegi 2025-05-05 22:23:29 +02:00
parent c6757763e4
commit 01a0e00dbb
4 changed files with 22 additions and 15 deletions

View File

@ -909,18 +909,18 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
return true;
}
void BufferCache::SynchronizeRange(VAddr device_addr, u64 size) {
void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) {
if (device_addr == 0) {
return;
}
VAddr device_addr_end = device_addr + size;
ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
VAddr buffer_start = buffer.CpuAddr();
VAddr buffer_end = buffer_start + buffer.SizeBytes();
VAddr start = std::max(buffer_start, device_addr);
VAddr end = std::min(buffer_end, device_addr_end);
u32 size = static_cast<u32>(end - start);
SynchronizeBuffer(buffer, start, size, false);
// Note that this function synchronizes the whole buffer, not just the range.
// This is because this function is used to sync buffers before using a
// shader that uses DMA.
// The ideal solution would be to sync all the mapped regions but it is
// very slow.
SynchronizeBuffer(buffer, buffer.CpuAddr(), buffer.SizeBytes(), false);
});
}

View File

@ -137,7 +137,7 @@ public:
void ProcessFaultBuffer();
/// Synchronizes all buffers in the specified range.
void SynchronizeRange(VAddr device_addr, u64 size);
void SynchronizeBuffersInRange(VAddr device_addr, u64 size);
/// Record memory barrier. Used for buffers when accessed via BDA.
void MemoryBarrier();

View File

@ -474,14 +474,15 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
pipeline->BindResources(set_writes, buffer_barriers, push_data);
if (uses_dma && !fault_process_pending) {
if (uses_dma) {
// We only use fault buffer for DMA right now.
{
std::shared_lock lock{dma_sync_mapped_ranges_mutex};
for (const auto& range : dma_sync_mapped_ranges) {
buffer_cache.SynchronizeRange(range.lower(), range.upper() - range.lower());
buffer_cache.SynchronizeBuffersInRange(range.lower(), range.upper() - range.lower());
}
}
buffer_cache.MemoryBarrier();
}
fault_process_pending |= uses_dma;

View File

@ -91,11 +91,17 @@ void Scheduler::Wait(u64 tick) {
}
master_semaphore.Wait(tick);
// Apply pending operations until the wait tick
while (!pending_ops.empty() && pending_ops.front().gpu_tick <= tick) {
pending_ops.front().callback();
pending_ops.pop();
}
// TODO: We should be applyting pending operations here because that gives us
// the ability to use mapped regions on stream buffers in deferred operations.
// We don't do that right now because it might introduce varioations in the
// timing and, since we don't sync the GPU some games might be affected by that.
// It shouldn't be an issue right now, because we only use mapped regions in
// deferred operations to download faulted addresses. That is only 8KB every tick
// and the stream buffer is 256MB. GPU doesn't go that behind.
// while (!pending_ops.empty() && pending_ops.front().gpu_tick <= tick) {
// pending_ops.front().callback();
// pending_ops.pop();
// }
}
void Scheduler::AllocateWorkerCommandBuffers() {