diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index cfd03a6af..593e9d1ba 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -816,31 +816,31 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span -Liverpool::Task Liverpool::ProcessCompute(const u32* acb, u32 acb_dwords, u32 vqid) { +Liverpool::Task Liverpool::ProcessCompute(std::span acb, u32 vqid) { FIBER_ENTER(acb_task_name[vqid]); auto& queue = asc_queues[{vqid}]; - auto base_addr = reinterpret_cast(acb); - while (acb_dwords > 0) { + auto base_addr = reinterpret_cast(acb.data()); + while (!acb.empty()) { ProcessCommands(); - auto* header = reinterpret_cast(acb); + auto* header = reinterpret_cast(acb.data()); u32 next_dw_off = header->type3.NumWords() + 1; // If we have a buffered packet, use it. if (queue.tmp_dwords > 0) [[unlikely]] { header = reinterpret_cast(queue.tmp_packet.data()); next_dw_off = header->type3.NumWords() + 1 - queue.tmp_dwords; - std::memcpy(queue.tmp_packet.data() + queue.tmp_dwords, acb, next_dw_off * sizeof(u32)); + std::memcpy(queue.tmp_packet.data() + queue.tmp_dwords, acb.data(), next_dw_off * sizeof(u32)); queue.tmp_dwords = 0; } // If the packet is split across ring boundary, buffer until next submission - if (next_dw_off > acb_dwords) [[unlikely]] { - std::memcpy(queue.tmp_packet.data(), acb, acb_dwords * sizeof(u32)); - queue.tmp_dwords = acb_dwords; + if (next_dw_off > acb.size()) [[unlikely]] { + std::memcpy(queue.tmp_packet.data(), acb.data(), acb.size_bytes()); + queue.tmp_dwords = acb.size(); if constexpr (!is_indirect) { - *queue.read_addr += acb_dwords; + *queue.read_addr += acb.size(); *queue.read_addr %= queue.ring_size_dw; } break; @@ -849,8 +849,7 @@ Liverpool::Task Liverpool::ProcessCompute(const u32* acb, u32 acb_dwords, u32 vq if (header->type == 2) { // Type-2 packet are used for padding purposes next_dw_off = 1; - acb += next_dw_off; - acb_dwords -= next_dw_off; + acb = NextPacket(acb, next_dw_off); if constexpr (!is_indirect) { *queue.read_addr += next_dw_off; @@ -873,8 +872,8 @@ Liverpool::Task Liverpool::ProcessCompute(const u32* acb, u32 acb_dwords, u32 vq } case PM4ItOpcode::IndirectBuffer: { const auto* indirect_buffer = reinterpret_cast(header); - auto task = ProcessCompute(indirect_buffer->Address(), - indirect_buffer->ib_size, vqid); + auto task = ProcessCompute({indirect_buffer->Address(), + indirect_buffer->ib_size}, vqid); RESUME_ASC(task, vqid); while (!task.handle.done()) { @@ -1040,8 +1039,7 @@ Liverpool::Task Liverpool::ProcessCompute(const u32* acb, u32 acb_dwords, u32 vq static_cast(opcode), header->type3.NumWords()); } - acb += next_dw_off; - acb_dwords -= next_dw_off; + acb = NextPacket(acb, next_dw_off); if constexpr (!is_indirect) { *queue.read_addr += next_dw_off; @@ -1111,7 +1109,7 @@ void Liverpool::SubmitAsc(u32 gnm_vqid, std::span acb) { auto& queue = mapped_queues[gnm_vqid]; const auto vqid = gnm_vqid - 1; - const auto& task = ProcessCompute(acb.data(), acb.size(), vqid); + const auto& task = ProcessCompute(acb, vqid); { std::scoped_lock lock{queue.m_access}; queue.submits.emplace(task.handle); diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 3c509223b..43bf5af49 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -1585,7 +1585,7 @@ private: Task ProcessGraphics(std::span dcb, std::span ccb); Task ProcessCeUpdate(std::span ccb); template - Task ProcessCompute(const u32* acb, u32 acb_dwords, u32 vqid); + Task ProcessCompute(std::span acb, u32 vqid); void ProcessCommands(); void Process(std::stop_token stoken); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 056d2fbad..e16d12810 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -488,7 +488,7 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { uses_dma |= stage->dma_types != Shader::IR::Type::Void; } - if (uses_dma) { + if (uses_dma && !fault_process_pending) { // We only use fault buffer for DMA right now. { // TODO: GPU might have written to memory (for example with EVENT_WRITE_EOP) diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index c4f781cf2..4cf6ec9d0 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -68,7 +68,10 @@ void Scheduler::EndRendering() { void Scheduler::PopPendingOperations() { master_semaphore.Refresh(); while (!pending_ops.empty() && master_semaphore.IsFree(pending_ops.front().gpu_tick)) { + ASSERT(op_scope == 0); + ++op_scope; pending_ops.front().callback(); + --op_scope; pending_ops.pop(); } } diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index c19c326c7..1ec0e7cea 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -379,6 +379,7 @@ private: u64 gpu_tick; }; std::queue pending_ops; + u32 op_scope{}; RenderState render_state; DynamicState dynamic_state; bool is_rendering = false;