From 46018530e8656f5fdb1debc6062104b55218f624 Mon Sep 17 00:00:00 2001 From: Vinicius Rangel Date: Sun, 6 Oct 2024 02:44:08 -0300 Subject: [PATCH] devtools: pm4 - show program state --- CMakeLists.txt | 2 ++ src/core/debug_state.cpp | 30 +++++++++++++++- src/core/debug_state.h | 33 +++++++++++------ src/core/devtools/widget/cmd_list.cpp | 42 +++++++++++++++------- src/core/devtools/widget/cmd_list.h | 11 ++++-- src/core/devtools/widget/frame_dump.cpp | 2 +- src/core/devtools/widget/reg_view.cpp | 37 +++++++++++++++++++ src/core/devtools/widget/reg_view.h | 22 ++++++++++++ src/core/libraries/gnmdriver/gnmdriver.cpp | 13 ++++--- src/video_core/amdgpu/liverpool.cpp | 25 +++++++++++++ src/video_core/amdgpu/liverpool.h | 2 +- 11 files changed, 184 insertions(+), 35 deletions(-) create mode 100644 src/core/devtools/widget/reg_view.cpp create mode 100644 src/core/devtools/widget/reg_view.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 1706e50ad..798bf27ce 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -356,6 +356,8 @@ set(DEV_TOOLS src/core/devtools/layer.cpp src/core/devtools/widget/frame_graph.cpp src/core/devtools/widget/frame_graph.h src/core/devtools/widget/imgui_memory_editor.h + src/core/devtools/widget/reg_view.cpp + src/core/devtools/widget/reg_view.h src/core/devtools/widget/types.h ) diff --git a/src/core/debug_state.cpp b/src/core/debug_state.cpp index 050143e6e..813623ee0 100644 --- a/src/core/debug_state.cpp +++ b/src/core/debug_state.cpp @@ -5,9 +5,10 @@ #include "common/native_clock.h" #include "common/singleton.h" #include "debug_state.h" -#include "libraries/kernel/event_queues.h" +#include "devtools/widget/types.h" #include "libraries/kernel/time_management.h" #include "libraries/system/msgdialog.h" +#include "video_core/amdgpu/pm4_cmds.h" using namespace DebugStateType; @@ -95,8 +96,35 @@ void DebugStateImpl::ResumeGuestThreads() { } void DebugStateImpl::RequestFrameDump(s32 count) { + ASSERT(!DumpingCurrentFrame()); gnm_frame_dump_request_count = count; frame_dump_list.clear(); frame_dump_list.resize(count); waiting_submit_pause = true; } + +void DebugStateImpl::PushQueueDump(QueueDump dump) { + ASSERT(DumpingCurrentFrame()); + std::unique_lock lock{frame_dump_list_mutex}; + GetFrameDump().queues.push_back(std::move(dump)); +} + +void DebugStateImpl::PushRegsDump(uintptr_t base_addr, const AmdGpu::Liverpool::Regs& regs) { + ASSERT(DumpingCurrentReg()); + std::unique_lock lock{frame_dump_list_mutex}; + auto& dump = + frame_dump_list[frame_dump_list.size() - liverpool_dump_request_count].regs[base_addr]; + dump.regs = regs; + for (int i = 0; i < RegDump::MaxShaderStages; i++) { + if (regs.stage_enable.IsStageEnabled(i)) { + auto stage = regs.ProgramForStage(i); + if (stage->address_lo != 0) { + auto code = stage->Code(); + dump.stages[i] = ShaderDump{ + .user_data = *stage, + .code = std::vector{code.begin(), code.end()}, + }; + } + } + } +} diff --git a/src/core/debug_state.h b/src/core/debug_state.h index f578b658a..59e1ea4e8 100644 --- a/src/core/debug_state.h +++ b/src/core/debug_state.h @@ -9,6 +9,8 @@ #include #include "common/types.h" +#include "video_core/amdgpu/liverpool.h" +#include "video_core/renderer_vulkan/vk_pipeline_cache.h" #ifdef _WIN32 #ifndef WIN32_LEAN_AND_MEAN @@ -45,8 +47,20 @@ struct QueueDump { uintptr_t base_addr; }; +struct ShaderDump { + Vulkan::Liverpool::ShaderProgram user_data{}; + std::vector code{}; +}; + +struct RegDump { + static constexpr size_t MaxShaderStages = 5; + Vulkan::Liverpool::Regs regs{}; + std::array stages{}; +}; + struct FrameDump { std::vector queues; + std::unordered_map regs; // address -> reg dump }; class DebugStateImpl { @@ -71,6 +85,13 @@ class DebugStateImpl { std::queue debug_message_popup; public: + void ShowDebugMessage(std::string message) { + if (message.empty()) { + return; + } + debug_message_popup.push(std::move(message)); + } + void AddCurrentThreadToGuestList(); void RemoveCurrentThreadFromGuestList(); @@ -110,17 +131,9 @@ public: return frame_dump_list[frame_dump_list.size() - gnm_frame_dump_request_count]; } - void PushQueueDump(QueueDump dump) { - std::unique_lock lock{frame_dump_list_mutex}; - GetFrameDump().queues.push_back(std::move(dump)); - } + void PushQueueDump(QueueDump dump); - void ShowDebugMessage(std::string message) { - if (message.empty()) { - return; - } - debug_message_popup.push(std::move(message)); - } + void PushRegsDump(uintptr_t base_addr, const AmdGpu::Liverpool::Regs& regs); }; } // namespace DebugStateType diff --git a/src/core/devtools/widget/cmd_list.cpp b/src/core/devtools/widget/cmd_list.cpp index 89b0fda78..a334421cc 100644 --- a/src/core/devtools/widget/cmd_list.cpp +++ b/src/core/devtools/widget/cmd_list.cpp @@ -1044,9 +1044,9 @@ void CmdListViewer::OnDispatch(AmdGpu::PM4Type3Header const* header, u32 const* EndGroup(); } -CmdListViewer::CmdListViewer(const std::vector& cmd_list, uintptr_t base_addr, - std::string name) - : base_addr(base_addr), name(std::move(name)) { +CmdListViewer::CmdListViewer(const FrameDumpViewer* parent, const std::vector& cmd_list, + uintptr_t base_addr, std::string name) + : parent(parent), base_addr(base_addr), name(std::move(name)) { using namespace AmdGpu; cmdb_addr = (uintptr_t)cmd_list.data(); @@ -1244,6 +1244,12 @@ void CmdListViewer::Draw() { if (!group_batches || CollapsingHeader(batch_hdr)) { auto bb = ctx.LastItemData.Rect; if (group_batches) { + if (IsItemToggledOpen()) { + if (parent->frame_dump.regs.contains(batch.command_addr)) { + batch_view.data = parent->frame_dump.regs.at(batch.command_addr); + batch_view.open = true; + } + } Indent(); } auto const batch_sz = batch.end_addr - batch.start_addr; @@ -1262,19 +1268,19 @@ void CmdListViewer::Draw() { Gcn::GetOpCodeName((u32)op)); if (TreeNode(header_name)) { - bool just_opened = IsItemToggledOpen(); + const bool just_opened = IsItemToggledOpen(); + if (just_opened) { + // Editor + cmdb_view.GotoAddrAndHighlight( + reinterpret_cast(pm4_hdr) - cmdb_addr, + reinterpret_cast(pm4_hdr) - cmdb_addr + + (pm4_hdr->count + 2) * 4); + } + if (BeginTable("split", 1)) { TableNextColumn(); Text("size: %d", pm4_hdr->count + 1); - if (just_opened) { - // Editor - cmdb_view.GotoAddrAndHighlight( - reinterpret_cast(pm4_hdr) - cmdb_addr, - reinterpret_cast(pm4_hdr) - cmdb_addr + - (pm4_hdr->count + 2) * 4); - } - auto const* it_body = reinterpret_cast(pm4_hdr + 1); @@ -1367,6 +1373,18 @@ void CmdListViewer::Draw() { } End(); } + + if (batch_view.open) { + batch_view.Draw(); + } + for (auto it = extra_batch_view.begin(); it != extra_batch_view.end(); ++it) { + if (!it->open) { + it = extra_batch_view.erase(it); + continue; + } + it->Draw(); + ++it; + } } } // namespace Core::Devtools::Widget \ No newline at end of file diff --git a/src/core/devtools/widget/cmd_list.h b/src/core/devtools/widget/cmd_list.h index 37229fb86..8eb463120 100644 --- a/src/core/devtools/widget/cmd_list.h +++ b/src/core/devtools/widget/cmd_list.h @@ -10,8 +10,8 @@ #include "common/types.h" #include "imgui_memory_editor.h" +#include "reg_view.h" #include "types.h" -#include "video_core/buffer_cache/buffer_cache.h" namespace AmdGpu { union PM4Type3Header; @@ -24,6 +24,8 @@ class FrameDumpViewer; class CmdListViewer { + const FrameDumpViewer* parent; + uintptr_t base_addr; std::string name; std::vector events{}; @@ -37,6 +39,9 @@ class CmdListViewer { int vqid{255}; s32 highlight_batch{-1}; + RegView batch_view; + std::vector extra_batch_view; + void OnNop(AmdGpu::PM4Type3Header const* header, u32 const* body); void OnSetBase(AmdGpu::PM4Type3Header const* header, u32 const* body); void OnSetContextReg(AmdGpu::PM4Type3Header const* header, u32 const* body); @@ -47,8 +52,8 @@ public: static void LoadConfig(const char* line); static void SerializeConfig(ImGuiTextBuffer* buf); - explicit CmdListViewer(const std::vector& cmd_list, uintptr_t base_addr = 0, - std::string name = ""); + explicit CmdListViewer(const FrameDumpViewer* parent, const std::vector& cmd_list, + uintptr_t base_addr = 0, std::string name = ""); void Draw(); }; diff --git a/src/core/devtools/widget/frame_dump.cpp b/src/core/devtools/widget/frame_dump.cpp index 646d3c1d9..4e6476dfa 100644 --- a/src/core/devtools/widget/frame_dump.cpp +++ b/src/core/devtools/widget/frame_dump.cpp @@ -49,7 +49,7 @@ FrameDumpViewer::FrameDumpViewer(FrameDump _frame_dump) : frame_dump(std::move(_ const auto fname = fmt::format("{}_{}_{:02}_{:02}", id, magic_enum::enum_name(selected_queue_type), selected_submit_num, selected_queue_num2); - cmd_list_viewer.emplace_back(cmd.data, cmd.base_addr, fname); + cmd_list_viewer.emplace_back(this, cmd.data, cmd.base_addr, fname); if (cmd.type == QueueType::dcb && cmd.submit_num == selected_submit_num && cmd.num2 == selected_queue_num2) { selected_cmd = cmd_list_viewer.size() - 1; diff --git a/src/core/devtools/widget/reg_view.cpp b/src/core/devtools/widget/reg_view.cpp new file mode 100644 index 000000000..bd2252bcd --- /dev/null +++ b/src/core/devtools/widget/reg_view.cpp @@ -0,0 +1,37 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include "reg_view.h" + +using namespace ImGui; + +namespace Core::Devtools::Widget { + +RegView::RegView() { + static int unique_id = 0; + id = unique_id++; +} + +void RegView::Draw() { + char name[32]; + snprintf(name, sizeof(name), "Reg view###reg_dump_%d", id); + if (Begin(name, &open, ImGuiWindowFlags_NoSavedSettings)) { + if (BeginTable("Enable shaders", 2)) { + for (int i = 0; i < DebugStateType::RegDump::MaxShaderStages; i++) { + TableNextRow(); + TableSetColumnIndex(0); + const char* names[] = {"vs", "ps", "gs", "es", "hs", "ls"}; + Text("%s", names[i]); + TableSetColumnIndex(1); + Text("%X", data.regs.stage_enable.IsStageEnabled(i)); + TableSetColumnIndex(0); + } + EndTable(); + } + } + End(); +} + +} // namespace Core::Devtools::Widget \ No newline at end of file diff --git a/src/core/devtools/widget/reg_view.h b/src/core/devtools/widget/reg_view.h new file mode 100644 index 000000000..3c188de16 --- /dev/null +++ b/src/core/devtools/widget/reg_view.h @@ -0,0 +1,22 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once +#include "core/debug_state.h" + +namespace Core::Devtools::Widget { + +class RegView { + int id; + +public: + bool open = false; + + DebugStateType::RegDump data; + + RegView(); + + void Draw(); +}; + +} // namespace Core::Devtools::Widget \ No newline at end of file diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 332a92a08..775601b44 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -505,6 +505,11 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) { : (asc_queue.ring_size_dw << 2u) - *asc_queue.read_addr; const std::span acb_span{acb_ptr, acb_size >> 2u}; + liverpool->SubmitAsc(vqid, acb_span); + + *asc_queue.read_addr += acb_size; + *asc_queue.read_addr %= asc_queue.ring_size_dw * 4; + if (DebugState.DumpingCurrentFrame()) { static auto last_frame_num = -1LL; static u32 seq_num{}; @@ -537,11 +542,6 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) { .base_addr = base_addr, }); } - - liverpool->SubmitAsc(vqid, acb_span); - - *asc_queue.read_addr += acb_size; - *asc_queue.read_addr %= asc_queue.ring_size_dw * 4; } int PS4_SYSV_ABI sceGnmDingDongForWorkload() { @@ -2165,6 +2165,7 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[ const auto& dcb_span = std::span{dcb_gpu_addrs[cbpair], dcb_size_dw}; const auto& ccb_span = std::span{ccb, ccb_size_dw}; + liverpool->SubmitGfx(dcb_span, ccb_span); if (DebugState.DumpingCurrentFrame()) { static auto last_frame_num = -1LL; static u32 seq_num{}; @@ -2192,8 +2193,6 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[ .base_addr = reinterpret_cast(ccb), }); } - - liverpool->SubmitGfx(dcb_span, ccb_span); } return ORBIS_OK; diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index b3b718836..9d509c56d 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -6,6 +6,7 @@ #include "common/debug.h" #include "common/polyfill_thread.h" #include "common/thread.h" +#include "core/debug_state.h" #include "core/libraries/videoout/driver.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/pm4_cmds.h" @@ -359,6 +360,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanindex_base_hi); regs.num_indices = draw_index->index_count; regs.draw_initiator = draw_index->draw_initiator; + if (DebugState.DumpingCurrentReg()) { + DebugState.PushRegsDump(reinterpret_cast(header), regs); + } if (rasterizer) { const auto cmd_address = reinterpret_cast(header); rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndex2", cmd_address)); @@ -373,6 +377,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanmax_size; regs.num_indices = draw_index_off->index_count; regs.draw_initiator = draw_index_off->draw_initiator; + if (DebugState.DumpingCurrentReg()) { + DebugState.PushRegsDump(reinterpret_cast(header), regs); + } if (rasterizer) { const auto cmd_address = reinterpret_cast(header); rasterizer->ScopeMarkerBegin( @@ -386,6 +393,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); regs.num_indices = draw_index->index_count; regs.draw_initiator = draw_index->draw_initiator; + if (DebugState.DumpingCurrentReg()) { + DebugState.PushRegsDump(reinterpret_cast(header), regs); + } if (rasterizer) { const auto cmd_address = reinterpret_cast(header); rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndexAuto", cmd_address)); @@ -399,6 +409,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spandata_offset; const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr; const auto size = sizeof(PM4CmdDrawIndirect::DrawInstancedArgs); + if (DebugState.DumpingCurrentReg()) { + DebugState.PushRegsDump(reinterpret_cast(header), regs); + } if (rasterizer) { const auto cmd_address = reinterpret_cast(header); rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndirect", cmd_address)); @@ -413,6 +426,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spandata_offset; const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr; const auto size = sizeof(PM4CmdDrawIndexIndirect::DrawIndexInstancedArgs); + if (DebugState.DumpingCurrentReg()) { + DebugState.PushRegsDump(reinterpret_cast(header), regs); + } if (rasterizer) { const auto cmd_address = reinterpret_cast(header); rasterizer->ScopeMarkerBegin( @@ -428,6 +444,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spandim_y; regs.cs_program.dim_z = dispatch_direct->dim_z; regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator; + if (DebugState.DumpingCurrentReg()) { + DebugState.PushRegsDump(reinterpret_cast(header), regs); + } if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) { const auto cmd_address = reinterpret_cast(header); rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:Dispatch", cmd_address)); @@ -442,6 +461,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spandata_offset; const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr; const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions); + if (DebugState.DumpingCurrentReg()) { + DebugState.PushRegsDump(reinterpret_cast(header), regs); + } if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) { const auto cmd_address = reinterpret_cast(header); rasterizer->ScopeMarkerBegin( @@ -620,6 +642,9 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { regs.cs_program.dim_y = dispatch_direct->dim_y; regs.cs_program.dim_z = dispatch_direct->dim_z; regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator; + if (DebugState.DumpingCurrentReg()) { + DebugState.PushRegsDump(reinterpret_cast(header), regs); + } if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) { const auto cmd_address = reinterpret_cast(header); rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:Dispatch", vqid, cmd_address)); diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 1c994d0a0..8d92fe3c6 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -935,7 +935,7 @@ struct Liverpool { BitField<5, 1, u32> gs_en; BitField<6, 1, u32> vs_en; - bool IsStageEnabled(u32 stage) { + bool IsStageEnabled(u32 stage) const { switch (stage) { case 0: case 1: