mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-27 20:44:28 +00:00
Sync the whole buffer insteed of only the range
This commit is contained in:
parent
c6757763e4
commit
01a0e00dbb
@ -909,18 +909,18 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
|
||||
return true;
|
||||
}
|
||||
|
||||
void BufferCache::SynchronizeRange(VAddr device_addr, u64 size) {
|
||||
void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) {
|
||||
if (device_addr == 0) {
|
||||
return;
|
||||
}
|
||||
VAddr device_addr_end = device_addr + size;
|
||||
ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
|
||||
VAddr buffer_start = buffer.CpuAddr();
|
||||
VAddr buffer_end = buffer_start + buffer.SizeBytes();
|
||||
VAddr start = std::max(buffer_start, device_addr);
|
||||
VAddr end = std::min(buffer_end, device_addr_end);
|
||||
u32 size = static_cast<u32>(end - start);
|
||||
SynchronizeBuffer(buffer, start, size, false);
|
||||
// Note that this function synchronizes the whole buffer, not just the range.
|
||||
// This is because this function is used to sync buffers before using a
|
||||
// shader that uses DMA.
|
||||
// The ideal solution would be to sync all the mapped regions but it is
|
||||
// very slow.
|
||||
SynchronizeBuffer(buffer, buffer.CpuAddr(), buffer.SizeBytes(), false);
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -137,7 +137,7 @@ public:
|
||||
void ProcessFaultBuffer();
|
||||
|
||||
/// Synchronizes all buffers in the specified range.
|
||||
void SynchronizeRange(VAddr device_addr, u64 size);
|
||||
void SynchronizeBuffersInRange(VAddr device_addr, u64 size);
|
||||
|
||||
/// Record memory barrier. Used for buffers when accessed via BDA.
|
||||
void MemoryBarrier();
|
||||
|
@ -474,14 +474,15 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
|
||||
|
||||
pipeline->BindResources(set_writes, buffer_barriers, push_data);
|
||||
|
||||
if (uses_dma && !fault_process_pending) {
|
||||
if (uses_dma) {
|
||||
// We only use fault buffer for DMA right now.
|
||||
{
|
||||
std::shared_lock lock{dma_sync_mapped_ranges_mutex};
|
||||
for (const auto& range : dma_sync_mapped_ranges) {
|
||||
buffer_cache.SynchronizeRange(range.lower(), range.upper() - range.lower());
|
||||
buffer_cache.SynchronizeBuffersInRange(range.lower(), range.upper() - range.lower());
|
||||
}
|
||||
}
|
||||
buffer_cache.MemoryBarrier();
|
||||
}
|
||||
|
||||
fault_process_pending |= uses_dma;
|
||||
|
@ -91,11 +91,17 @@ void Scheduler::Wait(u64 tick) {
|
||||
}
|
||||
master_semaphore.Wait(tick);
|
||||
|
||||
// Apply pending operations until the wait tick
|
||||
while (!pending_ops.empty() && pending_ops.front().gpu_tick <= tick) {
|
||||
pending_ops.front().callback();
|
||||
pending_ops.pop();
|
||||
}
|
||||
// TODO: We should be applyting pending operations here because that gives us
|
||||
// the ability to use mapped regions on stream buffers in deferred operations.
|
||||
// We don't do that right now because it might introduce varioations in the
|
||||
// timing and, since we don't sync the GPU some games might be affected by that.
|
||||
// It shouldn't be an issue right now, because we only use mapped regions in
|
||||
// deferred operations to download faulted addresses. That is only 8KB every tick
|
||||
// and the stream buffer is 256MB. GPU doesn't go that behind.
|
||||
// while (!pending_ops.empty() && pending_ops.front().gpu_tick <= tick) {
|
||||
// pending_ops.front().callback();
|
||||
// pending_ops.pop();
|
||||
// }
|
||||
}
|
||||
|
||||
void Scheduler::AllocateWorkerCommandBuffers() {
|
||||
|
Loading…
Reference in New Issue
Block a user