diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 65dad7da8..cfd03a6af 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -73,6 +73,20 @@ Liverpool::~Liverpool() { process_thread.join(); } +void Liverpool::ProcessCommands() { + // Process incoming commands with high priority + while (num_commands) { + Common::UniqueFunction callback{}; + { + std::unique_lock lk{submit_mutex}; + callback = std::move(command_queue.front()); + command_queue.pop(); + --num_commands; + } + callback(); + } +} + void Liverpool::Process(std::stop_token stoken) { Common::SetCurrentThreadName("shadPS4:GpuCommandProcessor"); @@ -91,18 +105,7 @@ void Liverpool::Process(std::stop_token stoken) { curr_qid = -1; while (num_submits || num_commands) { - - // Process incoming commands with high priority - while (num_commands) { - Common::UniqueFunction callback{}; - { - std::unique_lock lk{submit_mutex}; - callback = std::move(command_queue.front()); - command_queue.pop(); - --num_commands; - } - callback(); - } + ProcessCommands(); curr_qid = (curr_qid + 1) % num_mapped_queues; @@ -148,6 +151,8 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span ccb) { FIBER_ENTER(ccb_task_name); while (!ccb.empty()) { + ProcessCommands(); + const auto* header = reinterpret_cast(ccb.data()); const u32 type = header->type; if (type != 3) { @@ -228,6 +233,8 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(dcb.data()); const u32 type = header->type; + ProcessCommands(); + switch (type) { default: UNREACHABLE_MSG("Wrong PM4 type {}", type); @@ -815,6 +822,8 @@ Liverpool::Task Liverpool::ProcessCompute(const u32* acb, u32 acb_dwords, u32 vq auto base_addr = reinterpret_cast(acb); while (acb_dwords > 0) { + ProcessCommands(); + auto* header = reinterpret_cast(acb); u32 next_dw_off = header->type3.NumWords() + 1; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 7036e1d26..3c509223b 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -1587,6 +1587,7 @@ private: template Task ProcessCompute(const u32* acb, u32 acb_dwords, u32 vqid); + void ProcessCommands(); void Process(std::stop_token stoken); struct GpuQueue { diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 5f4996d21..e8e19a509 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -140,14 +140,22 @@ void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) { } void BufferCache::ReadMemory(VAddr device_addr, u64 size) { - ForEachBufferInRange(device_addr, size, [this, device_addr, size](BufferId buffer_id, Buffer& buffer) { - const VAddr buffer_start = buffer.CpuAddr(); - const VAddr buffer_end = buffer_start + buffer.SizeBytes(); - const VAddr download_start = std::max(buffer_start, device_addr); - const VAddr download_end = std::min(buffer_end, device_addr + size); - const u64 download_size = download_end - download_start; - DownloadBufferMemory(buffer, download_start, download_size); + if (!memory_tracker.IsRegionGpuModified(device_addr, size)) { + return; + } + std::binary_semaphore sem{0}; + liverpool->SendCommand([this, &sem, device_addr, size] { + ForEachBufferInRange(device_addr, size, [this, device_addr, size](BufferId buffer_id, Buffer& buffer) { + const VAddr buffer_start = buffer.CpuAddr(); + const VAddr buffer_end = buffer_start + buffer.SizeBytes(); + const VAddr download_start = std::max(buffer_start, device_addr); + const VAddr download_end = std::min(buffer_end, device_addr + size); + const u64 download_size = download_end - download_start; + DownloadBufferMemory(buffer, download_start, download_size); + }); + sem.release(); }); + sem.acquire(); } void BufferCache::DownloadBufferMemory(const Buffer& buffer, VAddr device_addr, u64 size) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index e16d12810..056d2fbad 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -488,7 +488,7 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { uses_dma |= stage->dma_types != Shader::IR::Type::Void; } - if (uses_dma && !fault_process_pending) { + if (uses_dma) { // We only use fault buffer for DMA right now. { // TODO: GPU might have written to memory (for example with EVENT_WRITE_EOP)