diff --git a/CMakeLists.txt b/CMakeLists.txt index f522837fc..d2b2a7dc7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -851,10 +851,10 @@ if (ARCHITECTURE STREQUAL "x86_64") src/core/cpu_patches.h) endif() -set(SHADER_RECOMPILER src/shader_recompiler/exception.h - src/shader_recompiler/profile.h +set(SHADER_RECOMPILER src/shader_recompiler/profile.h src/shader_recompiler/recompiler.cpp src/shader_recompiler/recompiler.h + src/shader_recompiler/resource.h src/shader_recompiler/info.h src/shader_recompiler/params.h src/shader_recompiler/runtime_info.h @@ -952,17 +952,24 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/value.h ) -set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp +set(VIDEO_CORE src/video_core/amdgpu/cb_db_extent.h + src/video_core/amdgpu/liverpool.cpp src/video_core/amdgpu/liverpool.h src/video_core/amdgpu/pixel_format.cpp src/video_core/amdgpu/pixel_format.h src/video_core/amdgpu/pm4_cmds.h src/video_core/amdgpu/pm4_opcodes.h + src/video_core/amdgpu/regs_color.h + src/video_core/amdgpu/regs_depth.h + src/video_core/amdgpu/regs.cpp + src/video_core/amdgpu/regs.h + src/video_core/amdgpu/regs_primitive.h + src/video_core/amdgpu/regs_shader.h + src/video_core/amdgpu/regs_texture.h + src/video_core/amdgpu/regs_vertex.h src/video_core/amdgpu/resource.h src/video_core/amdgpu/tiling.cpp src/video_core/amdgpu/tiling.h - src/video_core/amdgpu/types.h - src/video_core/amdgpu/default_context.cpp src/video_core/buffer_cache/buffer.cpp src/video_core/buffer_cache/buffer.h src/video_core/buffer_cache/buffer_cache.cpp diff --git a/src/common/number_utils.cpp b/src/common/number_utils.cpp index 660f539f9..121437a23 100644 --- a/src/common/number_utils.cpp +++ b/src/common/number_utils.cpp @@ -1,20 +1,14 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include #include - #include "common/number_utils.h" -#include "video_core/amdgpu/pixel_format.h" -#include "video_core/amdgpu/types.h" -#define UF11_EXPONENT_SHIFT 6 -#define UF10_EXPONENT_SHIFT 5 - -#define RGB9E5_MANTISSA_BITS 9 -#define RGB9E5_EXP_BIAS 1 - -#define F32_INFINITY 0x7f800000 +constexpr u32 UF11_EXPONENT_SHIFT = 6; +constexpr u32 UF10_EXPONENT_SHIFT = 5; +constexpr u32 RGB9E5_MANTISSA_BITS = 9; +constexpr u32 RGB9E5_EXP_BIAS = 1; +constexpr u32 F32_INFINITY = 0x7f800000; namespace NumberUtils { diff --git a/src/core/debug_state.cpp b/src/core/debug_state.cpp index 23ebcbb9b..f898117ec 100644 --- a/src/core/debug_state.cpp +++ b/src/core/debug_state.cpp @@ -157,7 +157,7 @@ std::optional DebugStateImpl::GetRegDump(uintptr_t base_addr, uintptr_ } void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, - const AmdGpu::Liverpool::Regs& regs) { + const AmdGpu::Regs& regs) { std::scoped_lock lock{frame_dump_list_mutex}; auto dump = GetRegDump(base_addr, header_addr); @@ -170,15 +170,14 @@ void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, for (int i = 0; i < RegDump::MaxShaderStages; i++) { if ((*dump)->regs.stage_enable.IsStageEnabled(i)) { auto stage = (*dump)->regs.ProgramForStage(i); - if (stage->address_lo != 0) { - const auto& info = AmdGpu::Liverpool::SearchBinaryInfo(stage->Address()); - auto code = stage->Code(); + if (stage->address) { + const auto params = AmdGpu::GetParams(*stage); (*dump)->stages[i] = PipelineShaderProgramDump{ .name = Vulkan::PipelineCache::GetShaderName(Shader::StageFromIndex(i), - info.shader_hash), - .hash = info.shader_hash, + params.hash), + .hash = params.hash, .user_data = *stage, - .code = std::vector{code.begin(), code.end()}, + .code = std::vector{params.code.begin(), params.code.end()}, }; } } @@ -198,12 +197,12 @@ void DebugStateImpl::PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_a auto& cs = (*dump)->regs.cs_program; cs = cs_state; - const auto& info = AmdGpu::Liverpool::SearchBinaryInfo(cs.Address()); + const auto params = AmdGpu::GetParams(cs); (*dump)->cs_data = PipelineComputerProgramDump{ - .name = Vulkan::PipelineCache::GetShaderName(Shader::Stage::Compute, info.shader_hash), - .hash = info.shader_hash, + .name = Vulkan::PipelineCache::GetShaderName(Shader::Stage::Compute, params.hash), + .hash = params.hash, .cs_program = cs, - .code = std::vector{cs.Code().begin(), cs.Code().end()}, + .code = std::vector{params.code.begin(), params.code.end()}, }; } diff --git a/src/core/debug_state.h b/src/core/debug_state.h index b1b8c00d6..dbdc08c9c 100644 --- a/src/core/debug_state.h +++ b/src/core/debug_state.h @@ -11,7 +11,9 @@ #include #include "common/types.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" +#include "shader_recompiler/runtime_info.h" +#include "video_core/amdgpu/regs.h" +#include "video_core/renderer_vulkan/vk_common.h" #ifdef _WIN32 #ifndef WIN32_LEAN_AND_MEAN @@ -54,21 +56,21 @@ struct QueueDump { struct PipelineShaderProgramDump { std::string name; u64 hash; - Vulkan::Liverpool::ShaderProgram user_data{}; + AmdGpu::ShaderProgram user_data{}; std::vector code{}; }; struct PipelineComputerProgramDump { std::string name; u64 hash; - Vulkan::Liverpool::ComputeProgram cs_program{}; + AmdGpu::ComputeProgram cs_program{}; std::vector code{}; }; struct RegDump { bool is_compute{false}; static constexpr size_t MaxShaderStages = 5; - Vulkan::Liverpool::Regs regs{}; + AmdGpu::Regs regs; std::array stages{}; PipelineComputerProgramDump cs_data{}; }; @@ -219,9 +221,8 @@ public: void PushQueueDump(QueueDump dump); - void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, - const AmdGpu::Liverpool::Regs& regs); - using CsState = AmdGpu::Liverpool::ComputeProgram; + void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, const AmdGpu::Regs& regs); + using CsState = AmdGpu::ComputeProgram; void PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_addr, const CsState& cs_state); void CollectShader(const std::string& name, Shader::LogicalStage l_stage, diff --git a/src/core/devtools/widget/cmd_list.cpp b/src/core/devtools/widget/cmd_list.cpp index f27122c88..3f83394b6 100644 --- a/src/core/devtools/widget/cmd_list.cpp +++ b/src/core/devtools/widget/cmd_list.cpp @@ -65,7 +65,7 @@ static HdrType GetNext(HdrType this_pm4, uint32_t n) { } void ParsePolygonControl(u32 value, bool begin_table) { - auto const reg = reinterpret_cast(value); + auto const reg = reinterpret_cast(value); if (!begin_table || BeginTable("PA_SU_SC_MODE_CNTL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) { @@ -73,80 +73,80 @@ void ParsePolygonControl(u32 value, bool begin_table) { TableSetColumnIndex(0); Text("CULL_FRONT"); TableSetColumnIndex(1); - Text("%X", reg.cull_front.Value()); + Text("%X", reg.cull_front); TableNextRow(); TableSetColumnIndex(0); Text("CULL_BACK"); TableSetColumnIndex(1); - Text("%X", reg.cull_back.Value()); + Text("%X", reg.cull_back); TableNextRow(); TableSetColumnIndex(0); Text("FACE"); TableSetColumnIndex(1); - Text("%s", enum_name(reg.front_face.Value()).data()); + Text("%s", enum_name(reg.front_face).data()); TableNextRow(); TableSetColumnIndex(0); Text("POLY_MODE"); TableSetColumnIndex(1); - Text("%X", reg.enable_polygon_mode.Value()); + Text("%X", reg.enable_polygon_mode); TableNextRow(); TableSetColumnIndex(0); Text("POLYMODE_FRONT_PTYPE"); TableSetColumnIndex(1); - Text("%s", enum_name(reg.polygon_mode_front.Value()).data()); + Text("%s", enum_name(reg.polygon_mode_front).data()); TableNextRow(); TableSetColumnIndex(0); Text("POLYMODE_BACK_PTYPE"); TableSetColumnIndex(1); - Text("%s", enum_name(reg.polygon_mode_back.Value()).data()); + Text("%s", enum_name(reg.polygon_mode_back).data()); TableNextRow(); TableSetColumnIndex(0); Text("POLY_OFFSET_FRONT_ENABLE"); TableSetColumnIndex(1); - Text("%X", reg.enable_polygon_offset_front.Value()); + Text("%X", reg.enable_polygon_offset_front); TableNextRow(); TableSetColumnIndex(0); Text("POLY_OFFSET_BACK_ENABLE"); TableSetColumnIndex(1); - Text("%X", reg.enable_polygon_offset_back.Value()); + Text("%X", reg.enable_polygon_offset_back); TableNextRow(); TableSetColumnIndex(0); Text("POLY_OFFSET_PARA_ENABLE"); TableSetColumnIndex(1); - Text("%X", reg.enable_polygon_offset_para.Value()); + Text("%X", reg.enable_polygon_offset_para); TableNextRow(); TableSetColumnIndex(0); Text("VTX_WINDOW_OFFSET_ENABLE"); TableSetColumnIndex(1); - Text("%X", reg.enable_window_offset.Value()); + Text("%X", reg.enable_window_offset); TableNextRow(); TableSetColumnIndex(0); Text("PROVOKING_VTX_LAST"); TableSetColumnIndex(1); - Text("%X (%s)", (u32)reg.provoking_vtx_last.Value(), - enum_name(reg.provoking_vtx_last.Value()).data()); + Text("%X (%s)", static_cast(reg.provoking_vtx_last), + enum_name(reg.provoking_vtx_last).data()); TableNextRow(); TableSetColumnIndex(0); Text("PERSP_CORR_DIS"); TableSetColumnIndex(1); - Text("%X", reg.persp_corr_dis.Value()); + Text("%X", reg.persp_corr_dis); TableNextRow(); TableSetColumnIndex(0); Text("MULTI_PRIM_IB_ENA"); TableSetColumnIndex(1); - Text("%X", reg.multi_prim_ib_ena.Value()); + Text("%X", reg.multi_prim_ib_ena); if (begin_table) { EndTable(); @@ -155,7 +155,7 @@ void ParsePolygonControl(u32 value, bool begin_table) { } void ParseAaConfig(u32 value, bool begin_table) { - auto const reg = reinterpret_cast(value); + auto const reg = reinterpret_cast(value); if (!begin_table || BeginTable("PA_SC_AA_CONFIG", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) { @@ -163,31 +163,31 @@ void ParseAaConfig(u32 value, bool begin_table) { TableSetColumnIndex(0); Text("MSAA_NUM_SAMPLES"); TableSetColumnIndex(1); - Text("%X", reg.msaa_num_samples.Value()); + Text("%X", reg.msaa_num_samples); TableNextRow(); TableSetColumnIndex(0); Text("AA_MASK_CENTROID_DTMN"); TableSetColumnIndex(1); - Text("%X", reg.aa_mask_centroid_dtmn.Value()); + Text("%X", reg.aa_mask_centroid_dtmn); TableNextRow(); TableSetColumnIndex(0); Text("MAX_SAMPLE_DIST"); TableSetColumnIndex(1); - Text("%X", reg.max_sample_dst.Value()); + Text("%X", reg.max_sample_dst); TableNextRow(); TableSetColumnIndex(0); Text("MSAA_EXPOSED_SAMPLES"); TableSetColumnIndex(1); - Text("%X", reg.msaa_exposed_samples.Value()); + Text("%X", reg.msaa_exposed_samples); TableNextRow(); TableSetColumnIndex(0); Text("DETAIL_TO_EXPOSED_MODE"); TableSetColumnIndex(1); - Text("%X", reg.detail_to_exposed_mode.Value()); + Text("%X", reg.detail_to_exposed_mode); if (begin_table) { EndTable(); @@ -196,7 +196,7 @@ void ParseAaConfig(u32 value, bool begin_table) { } void ParseViewportControl(u32 value, bool begin_table) { - auto const reg = reinterpret_cast(value); + auto const reg = reinterpret_cast(value); if (!begin_table || BeginTable("PA_CL_VTE_CNTL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) { @@ -204,61 +204,61 @@ void ParseViewportControl(u32 value, bool begin_table) { TableSetColumnIndex(0); Text("VPORT_X_SCALE_ENA"); TableSetColumnIndex(1); - Text("%X", reg.xscale_enable.Value()); + Text("%X", reg.xscale_enable); TableNextRow(); TableSetColumnIndex(0); Text("VPORT_X_OFFSET_ENA"); TableSetColumnIndex(1); - Text("%X", reg.yoffset_enable.Value()); + Text("%X", reg.yoffset_enable); TableNextRow(); TableSetColumnIndex(0); Text("VPORT_Y_SCALE_ENA"); TableSetColumnIndex(1); - Text("%X", reg.yscale_enable.Value()); + Text("%X", reg.yscale_enable); TableNextRow(); TableSetColumnIndex(0); Text("VPORT_Y_OFFSET_ENA"); TableSetColumnIndex(1); - Text("%X", reg.yoffset_enable.Value()); + Text("%X", reg.yoffset_enable); TableNextRow(); TableSetColumnIndex(0); Text("VPORT_Z_SCALE_ENA"); TableSetColumnIndex(1); - Text("%X", reg.zscale_enable.Value()); + Text("%X", reg.zscale_enable); TableNextRow(); TableSetColumnIndex(0); Text("VPORT_Z_OFFSET_ENA"); TableSetColumnIndex(1); - Text("%X", reg.zoffset_enable.Value()); + Text("%X", reg.zoffset_enable); TableNextRow(); TableSetColumnIndex(0); Text("VTX_XY_FMT"); TableSetColumnIndex(1); - Text("%X", reg.xy_transformed.Value()); + Text("%X", reg.xy_transformed); TableNextRow(); TableSetColumnIndex(0); Text("VTX_Z_FMT"); TableSetColumnIndex(1); - Text("%X", reg.z_transformed.Value()); + Text("%X", reg.z_transformed); TableNextRow(); TableSetColumnIndex(0); Text("VTX_W0_FMT"); TableSetColumnIndex(1); - Text("%X", reg.w_transformed.Value()); + Text("%X", reg.w_transformed); TableNextRow(); TableSetColumnIndex(0); Text("PERFCOUNTER_REF"); TableSetColumnIndex(1); - Text("%X", reg.perfcounter_ref.Value()); + Text("%X", reg.perfcounter_ref); if (begin_table) { EndTable(); @@ -267,7 +267,7 @@ void ParseViewportControl(u32 value, bool begin_table) { } void ParseColorControl(u32 value, bool begin_table) { - auto const reg = reinterpret_cast(value); + auto const reg = reinterpret_cast(value); if (!begin_table || BeginTable("CB_COLOR_CONTROL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) { @@ -275,25 +275,25 @@ void ParseColorControl(u32 value, bool begin_table) { TableSetColumnIndex(0); Text("DISABLE_DUAL_QUAD__VI"); TableSetColumnIndex(1); - Text("%X", reg.disable_dual_quad.Value()); + Text("%X", reg.disable_dual_quad); TableNextRow(); TableSetColumnIndex(0); Text("DEGAMMA_ENABLE"); TableSetColumnIndex(1); - Text("%X", reg.degamma_enable.Value()); + Text("%X", reg.degamma_enable); TableNextRow(); TableSetColumnIndex(0); Text("MODE"); TableSetColumnIndex(1); - Text("%X (%s)", (u32)reg.mode.Value(), enum_name(reg.mode.Value()).data()); + Text("%X (%s)", static_cast(reg.mode), enum_name(reg.mode).data()); TableNextRow(); TableSetColumnIndex(0); Text("ROP3"); TableSetColumnIndex(1); - Text("%X", static_cast(reg.rop3.Value())); + Text("%X", static_cast(reg.rop3)); if (begin_table) { EndTable(); @@ -302,7 +302,7 @@ void ParseColorControl(u32 value, bool begin_table) { } void ParseColor0Info(u32 value, bool begin_table) { - auto const reg = reinterpret_cast(value); + auto const reg = reinterpret_cast(value); if (!begin_table || BeginTable("CB_COLOR_INFO", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) { @@ -310,109 +310,109 @@ void ParseColor0Info(u32 value, bool begin_table) { TableSetColumnIndex(0); Text("ENDIAN"); TableSetColumnIndex(1); - Text("%s", enum_name(reg.endian.Value()).data()); + Text("%s", enum_name(reg.endian).data()); TableNextRow(); TableSetColumnIndex(0); Text("FORMAT"); TableSetColumnIndex(1); - Text("%s", enum_name(reg.format.Value()).data()); + Text("%s", enum_name(AmdGpu::DataFormat(reg.format)).data()); TableNextRow(); TableSetColumnIndex(0); Text("LINEAR_GENERAL"); TableSetColumnIndex(1); - Text("%X", reg.linear_general.Value()); + Text("%X", reg.linear_general); TableNextRow(); TableSetColumnIndex(0); Text("NUMBER_TYPE"); TableSetColumnIndex(1); - Text("%s", enum_name(reg.number_type.Value()).data()); + Text("%s", enum_name(AmdGpu::NumberFormat(reg.number_type)).data()); TableNextRow(); TableSetColumnIndex(0); Text("COMP_SWAP"); TableSetColumnIndex(1); - Text("%s", enum_name(reg.comp_swap.Value()).data()); + Text("%s", enum_name(reg.comp_swap).data()); TableNextRow(); TableSetColumnIndex(0); Text("FAST_CLEAR"); TableSetColumnIndex(1); - Text("%X", reg.fast_clear.Value()); + Text("%X", reg.fast_clear); TableNextRow(); TableSetColumnIndex(0); Text("COMPRESSION"); TableSetColumnIndex(1); - Text("%X", reg.compression.Value()); + Text("%X", reg.compression); TableNextRow(); TableSetColumnIndex(0); Text("BLEND_CLAMP"); TableSetColumnIndex(1); - Text("%X", reg.blend_clamp.Value()); + Text("%X", reg.blend_clamp); TableNextRow(); TableSetColumnIndex(0); Text("BLEND_BYPASS"); TableSetColumnIndex(1); - Text("%X", reg.blend_bypass.Value()); + Text("%X", reg.blend_bypass); TableNextRow(); TableSetColumnIndex(0); Text("SIMPLE_FLOAT"); TableSetColumnIndex(1); - Text("%X", reg.simple_float.Value()); + Text("%X", reg.simple_float); TableNextRow(); TableSetColumnIndex(0); Text("ROUND_MODE"); TableSetColumnIndex(1); - Text("%X (%s)", (u32)reg.round_mode.Value(), enum_name(reg.round_mode.Value()).data()); + Text("%X (%s)", static_cast(reg.round_mode), enum_name(reg.round_mode).data()); TableNextRow(); TableSetColumnIndex(0); Text("CMASK_IS_LINEAR"); TableSetColumnIndex(1); - Text("%X", reg.cmask_is_linear.Value()); + Text("%X", reg.cmask_is_linear); TableNextRow(); TableSetColumnIndex(0); Text("BLEND_OPT_DONT_RD_DST"); TableSetColumnIndex(1); - Text("%X", reg.blend_opt_dont_rd_dst.Value()); + Text("%X", reg.blend_opt_dont_rd_dst); TableNextRow(); TableSetColumnIndex(0); Text("BLEND_OPT_DISCARD_PIXEL"); TableSetColumnIndex(1); - Text("%X", reg.blend_opt_discard_pixel.Value()); + Text("%X", reg.blend_opt_discard_pixel); TableNextRow(); TableSetColumnIndex(0); Text("FMASK_COMPRESSION_DISABLE__CI__VI"); TableSetColumnIndex(1); - Text("%X", reg.fmask_compression_disable_ci.Value()); + Text("%X", reg.fmask_compression_disable_ci); TableNextRow(); TableSetColumnIndex(0); Text("FMASK_COMPRESS_1FRAG_ONLY__VI"); TableSetColumnIndex(1); - Text("%X", reg.fmask_compress_1frag_only.Value()); + Text("%X", reg.fmask_compress_1frag_only); TableNextRow(); TableSetColumnIndex(0); Text("DCC_ENABLE__VI"); TableSetColumnIndex(1); - Text("%X", reg.dcc_enable.Value()); + Text("%X", reg.dcc_enable); TableNextRow(); TableSetColumnIndex(0); Text("CMASK_ADDR_TYPE__VI"); TableSetColumnIndex(1); - Text("%X", reg.cmask_addr_type.Value()); + Text("%X", reg.cmask_addr_type); if (begin_table) { EndTable(); @@ -421,7 +421,7 @@ void ParseColor0Info(u32 value, bool begin_table) { } void ParseColor0Attrib(u32 value, bool begin_table) { - auto const reg = reinterpret_cast(value); + auto const reg = reinterpret_cast(value); if (!begin_table || BeginTable("CB_COLOR_ATTRIB", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) { @@ -429,37 +429,37 @@ void ParseColor0Attrib(u32 value, bool begin_table) { TableSetColumnIndex(0); Text("TILE_MODE_INDEX"); TableSetColumnIndex(1); - Text("%s", enum_name(reg.tile_mode_index.Value()).data()); + Text("%s", enum_name(reg.tile_mode_index).data()); TableNextRow(); TableSetColumnIndex(0); Text("FMASK_TILE_MODE_INDEX"); TableSetColumnIndex(1); - Text("%X", reg.fmask_tile_mode_index.Value()); + Text("%X", reg.fmask_tile_mode_index); TableNextRow(); TableSetColumnIndex(0); Text("FMASK_BANK_HEIGHT"); TableSetColumnIndex(1); - Text("%X", reg.fmask_bank_height.Value()); + Text("%X", reg.fmask_bank_height); TableNextRow(); TableSetColumnIndex(0); Text("NUM_SAMPLES"); TableSetColumnIndex(1); - Text("%X", reg.num_samples_log2.Value()); + Text("%X", reg.num_samples_log2); TableNextRow(); TableSetColumnIndex(0); Text("NUM_FRAGMENTS"); TableSetColumnIndex(1); - Text("%X", reg.num_fragments_log2.Value()); + Text("%X", reg.num_fragments_log2); TableNextRow(); TableSetColumnIndex(0); Text("FORCE_DST_ALPHA_1"); TableSetColumnIndex(1); - Text("%X", reg.force_dst_alpha_1.Value()); + Text("%X", reg.force_dst_alpha_1); if (begin_table) { EndTable(); @@ -468,7 +468,7 @@ void ParseColor0Attrib(u32 value, bool begin_table) { } void ParseBlendControl(u32 value, bool begin_table) { - auto const reg = reinterpret_cast(value); + auto const reg = reinterpret_cast(value); if (!begin_table || BeginTable("CB_BLEND_CONTROL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) { @@ -476,59 +476,59 @@ void ParseBlendControl(u32 value, bool begin_table) { TableSetColumnIndex(0); Text("COLOR_SRCBLEND"); TableSetColumnIndex(1); - Text("%X (%s)", (u32)reg.color_src_factor.Value(), - enum_name(reg.color_src_factor.Value()).data()); + Text("%X (%s)", static_cast(reg.color_src_factor), + enum_name(reg.color_src_factor).data()); TableNextRow(); TableSetColumnIndex(0); Text("COLOR_COMB_FCN"); TableSetColumnIndex(1); - Text("%X (%s)", (u32)reg.color_func.Value(), enum_name(reg.color_func.Value()).data()); + Text("%X (%s)", static_cast(reg.color_func), enum_name(reg.color_func).data()); TableNextRow(); TableSetColumnIndex(0); Text("COLOR_DESTBLEND"); TableSetColumnIndex(1); - Text("%X (%s)", (u32)reg.color_dst_factor.Value(), - enum_name(reg.color_dst_factor.Value()).data()); + Text("%X (%s)", static_cast(reg.color_dst_factor), + enum_name(reg.color_dst_factor).data()); TableNextRow(); TableSetColumnIndex(0); Text("ALPHA_SRCBLEND"); TableSetColumnIndex(1); - Text("%X (%s)", (u32)reg.alpha_src_factor.Value(), - enum_name(reg.alpha_src_factor.Value()).data()); + Text("%X (%s)", static_cast(reg.alpha_src_factor), + enum_name(reg.alpha_src_factor).data()); TableNextRow(); TableSetColumnIndex(0); Text("ALPHA_COMB_FCN"); TableSetColumnIndex(1); - Text("%X (%s)", (u32)reg.alpha_func.Value(), enum_name(reg.alpha_func.Value()).data()); + Text("%X (%s)", static_cast(reg.alpha_func), enum_name(reg.alpha_func).data()); TableNextRow(); TableSetColumnIndex(0); Text("ALPHA_DESTBLEND"); TableSetColumnIndex(1); - Text("%X (%s)", (u32)reg.alpha_dst_factor.Value(), - enum_name(reg.alpha_dst_factor.Value()).data()); + Text("%X (%s)", static_cast(reg.alpha_dst_factor), + enum_name(reg.alpha_dst_factor).data()); TableNextRow(); TableSetColumnIndex(0); Text("SEPARATE_ALPHA_BLEND"); TableSetColumnIndex(1); - Text("%X", reg.separate_alpha_blend.Value()); + Text("%X", reg.separate_alpha_blend); TableNextRow(); TableSetColumnIndex(0); Text("ENABLE"); TableSetColumnIndex(1); - Text("%X", reg.enable.Value()); + Text("%X", reg.enable); TableNextRow(); TableSetColumnIndex(0); Text("DISABLE_ROP3"); TableSetColumnIndex(1); - Text("%X", reg.disable_rop3.Value()); + Text("%X", reg.disable_rop3); if (begin_table) { EndTable(); @@ -537,7 +537,7 @@ void ParseBlendControl(u32 value, bool begin_table) { } void ParseDepthRenderControl(u32 value, bool begin_table) { - auto const reg = reinterpret_cast(value); + auto const reg = reinterpret_cast(value); if (!begin_table || BeginTable("DB_RENDER_CONTROL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) { @@ -545,61 +545,61 @@ void ParseDepthRenderControl(u32 value, bool begin_table) { TableSetColumnIndex(0); Text("DEPTH_CLEAR_ENABLE"); TableSetColumnIndex(1); - Text("%X", reg.depth_clear_enable.Value()); + Text("%X", reg.depth_clear_enable); TableNextRow(); TableSetColumnIndex(0); Text("STENCIL_CLEAR_ENABLE"); TableSetColumnIndex(1); - Text("%X", reg.stencil_clear_enable.Value()); + Text("%X", reg.stencil_clear_enable); TableNextRow(); TableSetColumnIndex(0); Text("DEPTH_COPY"); TableSetColumnIndex(1); - Text("%X", reg.depth_clear_enable.Value()); + Text("%X", reg.depth_clear_enable); TableNextRow(); TableSetColumnIndex(0); Text("STENCIL_COPY"); TableSetColumnIndex(1); - Text("%X", reg.stencil_copy.Value()); + Text("%X", reg.stencil_copy); TableNextRow(); TableSetColumnIndex(0); Text("RESUMMARIZE_ENABLE"); TableSetColumnIndex(1); - Text("%X", reg.resummarize_enable.Value()); + Text("%X", reg.resummarize_enable); TableNextRow(); TableSetColumnIndex(0); Text("STENCIL_COMPRESS_DISABLE"); TableSetColumnIndex(1); - Text("%X", reg.stencil_compress_disable.Value()); + Text("%X", reg.stencil_compress_disable); TableNextRow(); TableSetColumnIndex(0); Text("DEPTH_COMPRESS_DISABLE"); TableSetColumnIndex(1); - Text("%X", reg.depth_compress_disable.Value()); + Text("%X", reg.depth_compress_disable); TableNextRow(); TableSetColumnIndex(0); Text("COPY_CENTROID"); TableSetColumnIndex(1); - Text("%X", reg.copy_centroid.Value()); + Text("%X", reg.copy_centroid); TableNextRow(); TableSetColumnIndex(0); Text("COPY_SAMPLE"); TableSetColumnIndex(1); - Text("%X", reg.copy_sample.Value()); + Text("%X", reg.copy_sample); TableNextRow(); TableSetColumnIndex(0); Text("DECOMPRESS_ENABLE__VI"); TableSetColumnIndex(1); - Text("%X", reg.decompress_enable.Value()); + Text("%X", reg.decompress_enable); if (begin_table) { EndTable(); @@ -608,7 +608,7 @@ void ParseDepthRenderControl(u32 value, bool begin_table) { } void ParseDepthControl(u32 value, bool begin_table) { - auto const reg = reinterpret_cast(value); + auto const reg = reinterpret_cast(value); if (!begin_table || BeginTable("DB_DEPTH_CONTROL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) { @@ -616,63 +616,63 @@ void ParseDepthControl(u32 value, bool begin_table) { TableSetColumnIndex(0); Text("STENCIL_ENABLE"); TableSetColumnIndex(1); - Text("%X", reg.stencil_enable.Value()); + Text("%X", reg.stencil_enable); TableNextRow(); TableSetColumnIndex(0); Text("Z_ENABLE"); TableSetColumnIndex(1); - Text("%X", reg.depth_enable.Value()); + Text("%X", reg.depth_enable); TableNextRow(); TableSetColumnIndex(0); Text("Z_WRITE_ENABLE"); TableSetColumnIndex(1); - Text("%X", reg.depth_write_enable.Value()); + Text("%X", reg.depth_write_enable); TableNextRow(); TableSetColumnIndex(0); Text("DEPTH_BOUNDS_ENABLE"); TableSetColumnIndex(1); - Text("%X", reg.depth_bounds_enable.Value()); + Text("%X", reg.depth_bounds_enable); TableNextRow(); TableSetColumnIndex(0); Text("ZFUNC"); TableSetColumnIndex(1); - Text("%X (%s)", (u32)reg.depth_func.Value(), enum_name(reg.depth_func.Value()).data()); + Text("%X (%s)", static_cast(reg.depth_func), enum_name(reg.depth_func).data()); TableNextRow(); TableSetColumnIndex(0); Text("BACKFACE_ENABLE"); TableSetColumnIndex(1); - Text("%X", reg.backface_enable.Value()); + Text("%X", reg.backface_enable); TableNextRow(); TableSetColumnIndex(0); Text("STENCILFUNC"); TableSetColumnIndex(1); - Text("%X (%s)", (u32)reg.stencil_ref_func.Value(), - enum_name(reg.stencil_ref_func.Value()).data()); + Text("%X (%s)", static_cast(reg.stencil_ref_func), + enum_name(reg.stencil_ref_func).data()); TableNextRow(); TableSetColumnIndex(0); Text("STENCILFUNC_BF"); TableSetColumnIndex(1); - Text("%X (%s)", (u32)reg.stencil_bf_func.Value(), - enum_name(reg.stencil_bf_func.Value()).data()); + Text("%X (%s)", static_cast(reg.stencil_bf_func), + enum_name(reg.stencil_bf_func).data()); TableNextRow(); TableSetColumnIndex(0); Text("ENABLE_COLOR_WRITES_ON_DEPTH_FAIL"); TableSetColumnIndex(1); - Text("%X", reg.enable_color_writes_on_depth_fail.Value()); + Text("%X", reg.enable_color_writes_on_depth_fail); TableNextRow(); TableSetColumnIndex(0); Text("DISABLE_COLOR_WRITES_ON_DEPTH_PASS"); TableSetColumnIndex(1); - Text("%X", reg.disable_color_writes_on_depth_pass.Value()); + Text("%X", reg.disable_color_writes_on_depth_pass); if (begin_table) { EndTable(); @@ -681,7 +681,7 @@ void ParseDepthControl(u32 value, bool begin_table) { } void ParseEqaa(u32 value, bool begin_table) { - auto const reg = reinterpret_cast(value); + auto const reg = reinterpret_cast(value); if (!begin_table || BeginTable("DB_DEPTH_CONTROL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) { @@ -689,73 +689,73 @@ void ParseEqaa(u32 value, bool begin_table) { TableSetColumnIndex(0); Text("MAX_ANCHOR_SAMPLES"); TableSetColumnIndex(1); - Text("%X", reg.max_anchor_samples.Value()); + Text("%X", reg.max_anchor_samples); TableNextRow(); TableSetColumnIndex(0); Text("PS_ITER_SAMPLES"); TableSetColumnIndex(1); - Text("%X", reg.ps_iter_samples.Value()); + Text("%X", reg.ps_iter_samples); TableNextRow(); TableSetColumnIndex(0); Text("MASK_EXPORT_NUM_SAMPLES"); TableSetColumnIndex(1); - Text("%X", reg.mask_export_num_samples.Value()); + Text("%X", reg.mask_export_num_samples); TableNextRow(); TableSetColumnIndex(0); Text("ALPHA_TO_MASK_NUM_SAMPLES"); TableSetColumnIndex(1); - Text("%X", reg.alpha_to_mask_num_samples.Value()); + Text("%X", reg.alpha_to_mask_num_samples); TableNextRow(); TableSetColumnIndex(0); Text("HIGH_QUALITY_INTERSECTIONS"); TableSetColumnIndex(1); - Text("%X", reg.high_quality_intersections.Value()); + Text("%X", reg.high_quality_intersections); TableNextRow(); TableSetColumnIndex(0); Text("INCOHERENT_EQAA_READS"); TableSetColumnIndex(1); - Text("%X", reg.incoherent_eqaa_reads.Value()); + Text("%X", reg.incoherent_eqaa_reads); TableNextRow(); TableSetColumnIndex(0); Text("INTERPOLATE_COMP_Z"); TableSetColumnIndex(1); - Text("%X", reg.interpolate_comp_z.Value()); + Text("%X", reg.interpolate_comp_z); TableNextRow(); TableSetColumnIndex(0); Text("INTERPOLATE_SRC_Z"); TableSetColumnIndex(1); - Text("%X", reg.interpolate_src_z.Value()); + Text("%X", reg.interpolate_src_z); TableNextRow(); TableSetColumnIndex(0); Text("STATIC_ANCHOR_ASSOCIATIONS"); TableSetColumnIndex(1); - Text("%X", reg.static_anchor_associations.Value()); + Text("%X", reg.static_anchor_associations); TableNextRow(); TableSetColumnIndex(0); Text("ALPHA_TO_MASK_EQAA_DISABLE"); TableSetColumnIndex(1); - Text("%X", reg.alpha_to_mask_eqaa_disable.Value()); + Text("%X", reg.alpha_to_mask_eqaa_disable); TableNextRow(); TableSetColumnIndex(0); Text("OVERRASTERIZATION_AMOUNT"); TableSetColumnIndex(1); - Text("%X", reg.overrasterization_amount.Value()); + Text("%X", reg.overrasterization_amount); TableNextRow(); TableSetColumnIndex(0); Text("ENABLE_POSTZ_OVERRASTERIZATION"); TableSetColumnIndex(1); - Text("%X", reg.enable_postz_overrasterization.Value()); + Text("%X", reg.enable_postz_overrasterization); if (begin_table) { EndTable(); @@ -764,7 +764,7 @@ void ParseEqaa(u32 value, bool begin_table) { } void ParseZInfo(u32 value, bool begin_table) { - auto const reg = reinterpret_cast(value); + auto const reg = reinterpret_cast(value); if (!begin_table || BeginTable("DB_DEPTH_CONTROL", 2, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) { @@ -772,61 +772,61 @@ void ParseZInfo(u32 value, bool begin_table) { TableSetColumnIndex(0); Text("FORMAT"); TableSetColumnIndex(1); - Text("%X (%s)", (u32)reg.format.Value(), enum_name(reg.format.Value()).data()); + Text("%X (%s)", static_cast(reg.format), enum_name(reg.format).data()); TableNextRow(); TableSetColumnIndex(0); Text("NUM_SAMPLES"); TableSetColumnIndex(1); - Text("%X", reg.num_samples.Value()); + Text("%X", reg.num_samples); TableNextRow(); TableSetColumnIndex(0); Text("TILE_SPLIT__CI__VI"); TableSetColumnIndex(1); - Text("%X", reg.tile_split.Value()); + Text("%X", reg.tile_split); TableNextRow(); TableSetColumnIndex(0); Text("TILE_MODE_INDEX"); TableSetColumnIndex(1); - Text("%X", static_cast(reg.tile_mode_index.Value())); + Text("%X", static_cast(reg.tile_mode_index)); TableNextRow(); TableSetColumnIndex(0); Text("DECOMPRESS_ON_N_ZPLANES__VI"); TableSetColumnIndex(1); - Text("%X", reg.decompress_on_n_zplanes.Value()); + Text("%X", reg.decompress_on_n_zplanes); TableNextRow(); TableSetColumnIndex(0); Text("ALLOW_EXPCLEAR"); TableSetColumnIndex(1); - Text("%X", reg.allow_expclear.Value()); + Text("%X", reg.allow_expclear); TableNextRow(); TableSetColumnIndex(0); Text("READ_SIZE"); TableSetColumnIndex(1); - Text("%X", reg.read_size.Value()); + Text("%X", reg.read_size); TableNextRow(); TableSetColumnIndex(0); Text("TILE_SURFACE_ENABLE"); TableSetColumnIndex(1); - Text("%X", reg.tile_surface_en.Value()); + Text("%X", reg.tile_surface_enable); TableNextRow(); TableSetColumnIndex(0); Text("CLEAR_DISALLOWED__VI"); TableSetColumnIndex(1); - Text("%X", reg.clear_disallowed.Value()); + Text("%X", reg.clear_disallowed); TableNextRow(); TableSetColumnIndex(0); Text("ZRANGE_PRECISION"); TableSetColumnIndex(1); - Text("%X", reg.zrange_precision.Value()); + Text("%X", reg.zrange_precision); if (begin_table) { EndTable(); @@ -1515,4 +1515,4 @@ void CmdListViewer::Draw(bool only_batches_view, CmdListFilter& filter) { PopID(); } -} // namespace Core::Devtools::Widget \ No newline at end of file +} // namespace Core::Devtools::Widget diff --git a/src/core/devtools/widget/cmd_list.h b/src/core/devtools/widget/cmd_list.h index e2c61f6b9..d6344afb9 100644 --- a/src/core/devtools/widget/cmd_list.h +++ b/src/core/devtools/widget/cmd_list.h @@ -5,14 +5,13 @@ #pragma once -#include #include #include #include "common.h" #include "common/types.h" -#include "imgui_memory_editor.h" -#include "reg_view.h" +#include "core/devtools/widget/imgui_memory_editor.h" +#include "core/devtools/widget/reg_view.h" namespace AmdGpu { union PM4Type3Header; diff --git a/src/core/devtools/widget/reg_popup.cpp b/src/core/devtools/widget/reg_popup.cpp index 90d8c9681..95385c7a0 100644 --- a/src/core/devtools/widget/reg_popup.cpp +++ b/src/core/devtools/widget/reg_popup.cpp @@ -16,7 +16,7 @@ using magic_enum::enum_name; namespace Core::Devtools::Widget { -void RegPopup::DrawColorBuffer(const AmdGpu::Liverpool::ColorBuffer& buffer) { +void RegPopup::DrawColorBuffer(const AmdGpu::ColorBuffer& buffer) { if (BeginTable("COLOR_BUFFER", 2, ImGuiTableFlags_Borders)) { TableNextRow(); @@ -36,7 +36,7 @@ void RegPopup::DrawColorBuffer(const AmdGpu::Liverpool::ColorBuffer& buffer) { if (TreeNode("Color0Info")) { TableNextRow(); TableNextColumn(); - ParseColor0Info(buffer.info.u32all, false); + ParseColor0Info(buffer.info.raw, false); TreePop(); } @@ -45,7 +45,7 @@ void RegPopup::DrawColorBuffer(const AmdGpu::Liverpool::ColorBuffer& buffer) { if (TreeNode("Color0Attrib")) { TableNextRow(); TableNextColumn(); - ParseColor0Attrib(buffer.attrib.u32all, false); + ParseColor0Attrib(buffer.attrib.raw, false); TreePop(); } @@ -75,9 +75,8 @@ void RegPopup::DrawColorBuffer(const AmdGpu::Liverpool::ColorBuffer& buffer) { } } -void RegPopup::DrawDepthBuffer(const DepthBuffer& depth_data) { - const auto& [depth_buffer, depth_control] = depth_data; - +void RegPopup::DrawDepthBuffer(const AmdGpu::DepthBuffer& buffer, + const AmdGpu::DepthControl control) { SeparatorText("Depth buffer"); if (BeginTable("DEPTH_BUFFER", 2, ImGuiTableFlags_Borders)) { @@ -85,31 +84,31 @@ void RegPopup::DrawDepthBuffer(const DepthBuffer& depth_data) { // clang-format off DrawValueRowList( - "Z_INFO.FORMAT", depth_buffer.z_info.format, - "Z_INFO.NUM_SAMPLES", depth_buffer.z_info.num_samples, - "Z_INFO.TILE_SPLIT", depth_buffer.z_info.tile_split, - "Z_INFO.TILE_MODE_INDEX", depth_buffer.z_info.tile_mode_index, - "Z_INFO.DECOMPRESS_ON_N_ZPLANES", depth_buffer.z_info.decompress_on_n_zplanes, - "Z_INFO.ALLOW_EXPCLEAR", depth_buffer.z_info.allow_expclear, - "Z_INFO.READ_SIZE", depth_buffer.z_info.read_size, - "Z_INFO.TILE_SURFACE_EN", depth_buffer.z_info.tile_surface_en, - "Z_INFO.CLEAR_DISALLOWED", depth_buffer.z_info.clear_disallowed, - "Z_INFO.ZRANGE_PRECISION", depth_buffer.z_info.zrange_precision, - "STENCIL_INFO.FORMAT", depth_buffer.stencil_info.format, - "Z_READ_BASE", depth_buffer.z_read_base, - "STENCIL_READ_BASE", depth_buffer.stencil_read_base, - "Z_WRITE_BASE", depth_buffer.z_write_base, - "STENCIL_WRITE_BASE", depth_buffer.stencil_write_base, - "DEPTH_SIZE.PITCH_TILE_MAX", depth_buffer.depth_size.pitch_tile_max, - "DEPTH_SIZE.HEIGHT_TILE_MAX", depth_buffer.depth_size.height_tile_max, - "DEPTH_SLICE.TILE_MAX", depth_buffer.depth_slice.tile_max, - "Pitch()", depth_buffer.Pitch(), - "Height()", depth_buffer.Height(), - "DepthAddress()", depth_buffer.DepthAddress(), - "StencilAddress()", depth_buffer.StencilAddress(), - "NumSamples()", depth_buffer.NumSamples(), - "NumBits()", depth_buffer.NumBits(), - "GetDepthSliceSize()", depth_buffer.GetDepthSliceSize() + "Z_INFO.FORMAT", buffer.z_info.format, + "Z_INFO.NUM_SAMPLES", buffer.z_info.num_samples, + "Z_INFO.TILE_SPLIT", buffer.z_info.tile_split, + "Z_INFO.TILE_MODE_INDEX", buffer.z_info.tile_mode_index, + "Z_INFO.DECOMPRESS_ON_N_ZPLANES", buffer.z_info.decompress_on_n_zplanes, + "Z_INFO.ALLOW_EXPCLEAR", buffer.z_info.allow_expclear, + "Z_INFO.READ_SIZE", buffer.z_info.read_size, + "Z_INFO.TILE_SURFACE_ENABLE", buffer.z_info.tile_surface_enable, + "Z_INFO.CLEAR_DISALLOWED", buffer.z_info.clear_disallowed, + "Z_INFO.ZRANGE_PRECISION", buffer.z_info.zrange_precision, + "STENCIL_INFO.FORMAT", buffer.stencil_info.format, + "Z_READ_BASE", buffer.z_read_base, + "STENCIL_READ_BASE", buffer.stencil_read_base, + "Z_WRITE_BASE", buffer.z_write_base, + "STENCIL_WRITE_BASE", buffer.stencil_write_base, + "DEPTH_SIZE.PITCH_TILE_MAX", buffer.depth_size.pitch_tile_max, + "DEPTH_SIZE.HEIGHT_TILE_MAX", buffer.depth_size.height_tile_max, + "DEPTH_SLICE.TILE_MAX", buffer.depth_slice.tile_max, + "Pitch()", buffer.Pitch(), + "Height()", buffer.Height(), + "DepthAddress()", buffer.DepthAddress(), + "StencilAddress()", buffer.StencilAddress(), + "NumSamples()", buffer.NumSamples(), + "NumBits()", buffer.NumBits(), + "GetDepthSliceSize()", buffer.GetDepthSliceSize() ); // clang-format on @@ -121,16 +120,16 @@ void RegPopup::DrawDepthBuffer(const DepthBuffer& depth_data) { // clang-format off DrawValueRowList( - "STENCIL_ENABLE", depth_control.stencil_enable, - "DEPTH_ENABLE", depth_control.depth_enable, - "DEPTH_WRITE_ENABLE", depth_control.depth_write_enable, - "DEPTH_BOUNDS_ENABLE", depth_control.depth_bounds_enable, - "DEPTH_FUNC", depth_control.depth_func, - "BACKFACE_ENABLE", depth_control.backface_enable, - "STENCIL_FUNC", depth_control.stencil_ref_func, - "STENCIL_FUNC_BF", depth_control.stencil_bf_func, - "ENABLE_COLOR_WRITES_ON_DEPTH_FAIL", depth_control.enable_color_writes_on_depth_fail, - "DISABLE_COLOR_WRITES_ON_DEPTH_PASS", depth_control.disable_color_writes_on_depth_pass + "STENCIL_ENABLE", control.stencil_enable, + "DEPTH_ENABLE", control.depth_enable, + "DEPTH_WRITE_ENABLE", control.depth_write_enable, + "DEPTH_BOUNDS_ENABLE", control.depth_bounds_enable, + "DEPTH_FUNC", control.depth_func, + "BACKFACE_ENABLE", control.backface_enable, + "STENCIL_FUNC", control.stencil_ref_func, + "STENCIL_FUNC_BF", control.stencil_bf_func, + "ENABLE_COLOR_WRITES_ON_DEPTH_FAIL", control.enable_color_writes_on_depth_fail, + "DISABLE_COLOR_WRITES_ON_DEPTH_PASS", control.disable_color_writes_on_depth_pass ); // clang-format on @@ -143,15 +142,17 @@ RegPopup::RegPopup() { id = unique_id++; } -void RegPopup::SetData(const std::string& base_title, AmdGpu::Liverpool::ColorBuffer color_buffer, - u32 cb_id) { - this->data = color_buffer; +void RegPopup::SetData(const std::string& base_title, AmdGpu::ColorBuffer color_buffer, u32 cb_id) { + this->type = DataType::Color; + this->color = color_buffer; this->title = fmt::format("{}/CB #{}", base_title, cb_id); } -void RegPopup::SetData(const std::string& base_title, AmdGpu::Liverpool::DepthBuffer depth_buffer, - AmdGpu::Liverpool::DepthControl depth_control) { - this->data = std::make_tuple(depth_buffer, depth_control); +void RegPopup::SetData(const std::string& base_title, AmdGpu::DepthBuffer depth_buffer, + AmdGpu::DepthControl depth_control) { + this->type = DataType::Depth; + this->depth.buffer = depth_buffer; + this->depth.control = depth_control; this->title = fmt::format("{}/Depth", base_title); } @@ -161,10 +162,10 @@ void RegPopup::SetPos(ImVec2 pos, bool auto_resize) { Begin(name, &open, flags); SetWindowPos(pos); if (auto_resize) { - if (std::holds_alternative(data)) { + if (type == DataType::Color) { SetWindowSize({365.0f, 520.0f}); KeepWindowInside(); - } else if (std::holds_alternative(data)) { + } else if (type == DataType::Depth) { SetWindowSize({404.0f, 543.0f}); KeepWindowInside(); } @@ -182,10 +183,10 @@ void RegPopup::Draw() { moved = true; } - if (const auto* buffer = std::get_if(&data)) { - DrawColorBuffer(*buffer); - } else if (const auto* depth_data = std::get_if(&data)) { - DrawDepthBuffer(*depth_data); + if (type == DataType::Color) { + DrawColorBuffer(color); + } else if (type == DataType::Depth) { + DrawDepthBuffer(depth.buffer, depth.control); } } End(); diff --git a/src/core/devtools/widget/reg_popup.h b/src/core/devtools/widget/reg_popup.h index 9ccd60ac0..cade7c0d2 100644 --- a/src/core/devtools/widget/reg_popup.h +++ b/src/core/devtools/widget/reg_popup.h @@ -3,12 +3,10 @@ #pragma once -#include - #include - #include "common/types.h" -#include "video_core/renderer_vulkan/liverpool_to_vk.h" +#include "video_core/amdgpu/regs_color.h" +#include "video_core/amdgpu/regs_depth.h" namespace Core::Devtools::Widget { @@ -16,15 +14,24 @@ class RegPopup { int id; ImGuiWindowFlags flags{ImGuiWindowFlags_NoSavedSettings}; - using DepthBuffer = std::tuple; - ImVec2 last_pos; - std::variant data; + AmdGpu::ColorBuffer color; + struct { + AmdGpu::DepthBuffer buffer; + AmdGpu::DepthControl control; + } depth; + enum class DataType { + None = 0, + Color = 1, + Depth = 2, + }; + DataType type{}; std::string title{}; - static void DrawColorBuffer(const AmdGpu::Liverpool::ColorBuffer& buffer); + static void DrawColorBuffer(const AmdGpu::ColorBuffer& buffer); - static void DrawDepthBuffer(const DepthBuffer& depth_data); + static void DrawDepthBuffer(const AmdGpu::DepthBuffer& buffer, + const AmdGpu::DepthControl control); public: bool open = false; @@ -32,11 +39,10 @@ public: RegPopup(); - void SetData(const std::string& base_title, AmdGpu::Liverpool::ColorBuffer color_buffer, - u32 cb_id); + void SetData(const std::string& base_title, AmdGpu::ColorBuffer color_buffer, u32 cb_id); - void SetData(const std::string& base_title, AmdGpu::Liverpool::DepthBuffer depth_buffer, - AmdGpu::Liverpool::DepthControl depth_control); + void SetData(const std::string& base_title, AmdGpu::DepthBuffer depth_buffer, + AmdGpu::DepthControl depth_control); void SetPos(ImVec2 pos, bool auto_resize = false); diff --git a/src/core/devtools/widget/reg_view.cpp b/src/core/devtools/widget/reg_view.cpp index fa3c5e3e6..b8f534cd9 100644 --- a/src/core/devtools/widget/reg_view.cpp +++ b/src/core/devtools/widget/reg_view.cpp @@ -29,7 +29,7 @@ namespace Core::Devtools::Widget { void RegView::ProcessShader(int shader_id) { std::vector shader_code; - Vulkan::Liverpool::UserData user_data; + AmdGpu::UserData user_data; if (data.is_compute) { shader_code = data.cs_data.code; user_data = data.cs_data.cs_program.user_data; @@ -129,7 +129,7 @@ void RegView::DrawGraphicsRegs() { } }; - for (int cb = 0; cb < AmdGpu::Liverpool::NumColorBuffers; ++cb) { + for (int cb = 0; cb < AmdGpu::NUM_COLOR_BUFFERS; ++cb) { PushID(cb); TableNextRow(); @@ -246,8 +246,7 @@ void RegView::SetData(DebugStateType::RegDump _data, const std::string& base_tit default_reg_popup.SetData(title, regs.depth_buffer, regs.depth_control); default_reg_popup.open = true; } - } else if (last_selected_cb >= 0 && - last_selected_cb < AmdGpu::Liverpool::NumColorBuffers) { + } else if (last_selected_cb >= 0 && last_selected_cb < AmdGpu::NUM_COLOR_BUFFERS) { const auto& buffer = regs.color_buffers[last_selected_cb]; const bool has_cb = buffer && regs.color_target_mask.GetMask(last_selected_cb); if (has_cb) { @@ -348,7 +347,7 @@ void RegView::Draw() { } else { shader->hex_view.DrawContents(shader->user_data.data(), shader->user_data.size() * - sizeof(Vulkan::Liverpool::UserData::value_type)); + sizeof(AmdGpu::UserData::value_type)); } } End(); @@ -392,4 +391,4 @@ void RegView::Draw() { } } -} // namespace Core::Devtools::Widget \ No newline at end of file +} // namespace Core::Devtools::Widget diff --git a/src/core/devtools/widget/reg_view.h b/src/core/devtools/widget/reg_view.h index 3ac8ec077..7f20c4d3e 100644 --- a/src/core/devtools/widget/reg_view.h +++ b/src/core/devtools/widget/reg_view.h @@ -2,17 +2,18 @@ // SPDX-License-Identifier: GPL-2.0-or-later #pragma once + #include "core/debug_state.h" -#include "imgui_memory_editor.h" -#include "reg_popup.h" -#include "text_editor.h" +#include "core/devtools/widget/imgui_memory_editor.h" +#include "core/devtools/widget/reg_popup.h" +#include "core/devtools/widget/text_editor.h" namespace Core::Devtools::Widget { struct ShaderCache { MemoryEditor hex_view; TextEditor dis_view; - Vulkan::Liverpool::UserData user_data; + AmdGpu::UserData user_data; }; class RegView { @@ -54,4 +55,4 @@ public: void Draw(); }; -} // namespace Core::Devtools::Widget \ No newline at end of file +} // namespace Core::Devtools::Widget diff --git a/src/core/libraries/videoout/driver.cpp b/src/core/libraries/videoout/driver.cpp index 6b74107a7..4951c4f1a 100644 --- a/src/core/libraries/videoout/driver.cpp +++ b/src/core/libraries/videoout/driver.cpp @@ -10,6 +10,7 @@ #include "core/libraries/videoout/driver.h" #include "core/libraries/videoout/videoout_error.h" #include "imgui/renderer/imgui_core.h" +#include "video_core/amdgpu/liverpool.h" #include "video_core/renderer_vulkan/vk_presenter.h" extern std::unique_ptr presenter; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 2f01d00db..804c1d076 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -5,6 +5,8 @@ #include #include #include +#include + #include "common/assert.h" #include "common/func_traits.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" @@ -14,7 +16,6 @@ #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/program.h" #include "shader_recompiler/runtime_info.h" -#include "video_core/amdgpu/types.h" namespace Shader::Backend::SPIRV { namespace { @@ -136,7 +137,7 @@ Id TypeId(const EmitContext& ctx, IR::Type type) { case IR::Type::U32: return ctx.U32[1]; default: - throw NotImplementedException("Phi node type {}", type); + UNREACHABLE_MSG("Phi node type {}", type); } } @@ -224,7 +225,7 @@ spv::ExecutionMode ExecutionMode(AmdGpu::TessellationType primitive) { case AmdGpu::TessellationType::Quad: return spv::ExecutionMode::Quads; } - UNREACHABLE_MSG("Tessellation primitive {}", primitive); + UNREACHABLE_MSG("Tessellation primitive {}", magic_enum::enum_name(primitive)); } spv::ExecutionMode ExecutionMode(AmdGpu::TessellationPartitioning spacing) { @@ -238,7 +239,7 @@ spv::ExecutionMode ExecutionMode(AmdGpu::TessellationPartitioning spacing) { default: break; } - UNREACHABLE_MSG("Tessellation spacing {}", spacing); + UNREACHABLE_MSG("Tessellation spacing {}", magic_enum::enum_name(spacing)); } void SetupCapabilities(const Info& info, const Profile& profile, const RuntimeInfo& runtime_info, @@ -482,14 +483,12 @@ Id EmitPhi(EmitContext& ctx, IR::Inst* inst) { void EmitVoid(EmitContext&) {} Id EmitIdentity(EmitContext& ctx, const IR::Value& value) { - throw NotImplementedException("Forward identity declaration"); + UNREACHABLE_MSG("Forward identity declaration"); } Id EmitConditionRef(EmitContext& ctx, const IR::Value& value) { const Id id{ctx.Def(value)}; - if (!Sirit::ValidId(id)) { - throw NotImplementedException("Forward identity declaration"); - } + ASSERT_MSG(Sirit::ValidId(id), "Forward identity declaration"); return id; } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 16270a090..6dd1637dd 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -397,13 +397,11 @@ Id EmitImageAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords } Id EmitImageAtomicInc32(EmitContext&, IR::Inst*, u32, Id, Id) { - // TODO: This is not yet implemented - throw NotImplementedException("SPIR-V Instruction"); + UNREACHABLE_MSG("SPIR-V Instruction"); } Id EmitImageAtomicDec32(EmitContext&, IR::Inst*, u32, Id, Id) { - // TODO: This is not yet implemented - throw NotImplementedException("SPIR-V Instruction"); + UNREACHABLE_MSG("SPIR-V Instruction"); } Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index 804d98b74..d574b440c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -98,11 +98,11 @@ void EmitEmitPrimitive(EmitContext& ctx) { } void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { - throw NotImplementedException("Geometry streams"); + UNREACHABLE_MSG("Geometry streams"); } void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { - throw NotImplementedException("Geometry streams"); + UNREACHABLE_MSG("Geometry streams"); } void EmitDebugPrint(EmitContext& ctx, IR::Inst* inst, Id fmt, Id arg0, Id arg1, Id arg2, Id arg3) { diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index a57cd47f6..cc6d19075 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -6,7 +6,6 @@ #include "shader_recompiler/backend/spirv/spirv_emit_context.h" #include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/runtime_info.h" -#include "video_core/amdgpu/types.h" #include "video_core/buffer_cache/buffer_cache.h" #include @@ -109,7 +108,7 @@ Id EmitContext::Def(const IR::Value& value) { case IR::Type::StringLiteral: return String(value.StringLiteral()); default: - throw NotImplementedException("Immediate type {}", value.Type()); + UNREACHABLE_MSG("Immediate type {}", value.Type()); } } @@ -786,7 +785,7 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte void EmitContext::DefineBuffers() { for (const auto& desc : info.buffers) { const auto buf_sharp = desc.GetSharp(info); - const bool is_storage = desc.IsStorage(buf_sharp, profile); + const bool is_storage = desc.IsStorage(buf_sharp); // Set indexes for special buffers. if (desc.buffer_type == BufferType::Flatbuf) { @@ -921,7 +920,7 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) { default: break; } - throw InvalidArgument("Invalid texture type {}", type); + UNREACHABLE_MSG("Invalid texture type {}", type); } void EmitContext::DefineImagesAndSamplers() { diff --git a/src/shader_recompiler/exception.h b/src/shader_recompiler/exception.h deleted file mode 100644 index 293b2fb41..000000000 --- a/src/shader_recompiler/exception.h +++ /dev/null @@ -1,64 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include -#include -#include - -#include - -namespace Shader { - -class Exception : public std::exception { -public: - explicit Exception(std::string message) noexcept : err_message{std::move(message)} {} - - [[nodiscard]] const char* what() const noexcept override { - return err_message.c_str(); - } - - void Prepend(std::string_view prepend) { - err_message.insert(0, prepend); - } - - void Append(std::string_view append) { - err_message += append; - } - -private: - std::string err_message; -}; - -class LogicError : public Exception { -public: - template - explicit LogicError(const char* message, Args&&... args) - : Exception{fmt::format(fmt::runtime(message), std::forward(args)...)} {} -}; - -class RuntimeError : public Exception { -public: - template - explicit RuntimeError(const char* message, Args&&... args) - : Exception{fmt::format(fmt::runtime(message), std::forward(args)...)} {} -}; - -class NotImplementedException : public Exception { -public: - template - explicit NotImplementedException(const char* message, Args&&... args) - : Exception{fmt::format(fmt::runtime(message), std::forward(args)...)} { - Append(" is not implemented"); - } -}; - -class InvalidArgument : public Exception { -public: - template - explicit InvalidArgument(const char* message, Args&&... args) - : Exception{fmt::format(fmt::runtime(message), std::forward(args)...)} {} -}; - -} // namespace Shader diff --git a/src/shader_recompiler/frontend/structured_control_flow.cpp b/src/shader_recompiler/frontend/structured_control_flow.cpp index 41ae3c045..963b2c0d5 100644 --- a/src/shader_recompiler/frontend/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/structured_control_flow.cpp @@ -191,7 +191,7 @@ std::string DumpExpr(const Statement* stmt) { void SanitizeNoBreaks(const Tree& tree) { if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) { - throw NotImplementedException("Capturing statement with break nodes"); + UNREACHABLE_MSG("Capturing statement with break nodes"); } } @@ -584,7 +584,7 @@ private: case StatementType::Variable: return ir.GetGotoVariable(stmt.id); default: - throw NotImplementedException("Statement type {}", u32(stmt.type)); + UNREACHABLE_MSG("Statement type {}", u32(stmt.type)); } } diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp index 1290c59c2..cadef5157 100644 --- a/src/shader_recompiler/frontend/translate/export.cpp +++ b/src/shader_recompiler/frontend/translate/export.cpp @@ -4,22 +4,22 @@ #include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/ir/position.h" #include "shader_recompiler/ir/reinterpret.h" +#include "shader_recompiler/profile.h" #include "shader_recompiler/runtime_info.h" namespace Shader::Gcn { -static AmdGpu::NumberFormat NumberFormatCompressed( - AmdGpu::Liverpool::ShaderExportFormat export_format) { +static AmdGpu::NumberFormat NumberFormatCompressed(AmdGpu::ShaderExportFormat export_format) { switch (export_format) { - case AmdGpu::Liverpool::ShaderExportFormat::ABGR_FP16: + case AmdGpu::ShaderExportFormat::ABGR_FP16: return AmdGpu::NumberFormat::Float; - case AmdGpu::Liverpool::ShaderExportFormat::ABGR_UNORM16: + case AmdGpu::ShaderExportFormat::ABGR_UNORM16: return AmdGpu::NumberFormat::Unorm; - case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SNORM16: + case AmdGpu::ShaderExportFormat::ABGR_SNORM16: return AmdGpu::NumberFormat::Snorm; - case AmdGpu::Liverpool::ShaderExportFormat::ABGR_UINT16: + case AmdGpu::ShaderExportFormat::ABGR_UINT16: return AmdGpu::NumberFormat::Uint; - case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SINT16: + case AmdGpu::ShaderExportFormat::ABGR_SINT16: return AmdGpu::NumberFormat::Sint; default: UNREACHABLE_MSG("Unimplemented compressed export format {}", @@ -27,18 +27,18 @@ static AmdGpu::NumberFormat NumberFormatCompressed( } } -static u32 MaskFromExportFormat(u8 mask, AmdGpu::Liverpool::ShaderExportFormat export_format) { +static u32 MaskFromExportFormat(u8 mask, AmdGpu::ShaderExportFormat export_format) { switch (export_format) { - case AmdGpu::Liverpool::ShaderExportFormat::R_32: + case AmdGpu::ShaderExportFormat::R_32: // Red only return mask & 1; - case AmdGpu::Liverpool::ShaderExportFormat::GR_32: + case AmdGpu::ShaderExportFormat::GR_32: // Red and Green only return mask & 3; - case AmdGpu::Liverpool::ShaderExportFormat::AR_32: + case AmdGpu::ShaderExportFormat::AR_32: // Red and Alpha only return mask & 9; - case AmdGpu::Liverpool::ShaderExportFormat::ABGR_32: + case AmdGpu::ShaderExportFormat::ABGR_32: // All components return mask; default: @@ -59,7 +59,7 @@ void Translator::ExportRenderTarget(const GcnInst& inst) { } const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx]; - if (color_buffer.export_format == AmdGpu::Liverpool::ShaderExportFormat::Zero || exp.en == 0) { + if (color_buffer.export_format == AmdGpu::ShaderExportFormat::Zero || exp.en == 0) { // No export return; } diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 9e42ebea9..668882254 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -11,9 +11,9 @@ #include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/reg.h" #include "shader_recompiler/ir/reinterpret.h" +#include "shader_recompiler/profile.h" #include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/resource.h" -#include "video_core/amdgpu/types.h" #define MAGIC_ENUM_RANGE_MIN 0 #define MAGIC_ENUM_RANGE_MAX 1515 diff --git a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp index 7e7a1c346..f3da22845 100644 --- a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp +++ b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "shader_recompiler/frontend/translate/translate.h" +#include "shader_recompiler/profile.h" namespace Shader::Gcn { diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index ccf2c45e0..a3be34390 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -5,7 +5,6 @@ #include #include -#include #include #include "common/assert.h" #include "common/types.h" @@ -17,110 +16,11 @@ #include "shader_recompiler/ir/reg.h" #include "shader_recompiler/ir/type.h" #include "shader_recompiler/params.h" -#include "shader_recompiler/profile.h" +#include "shader_recompiler/resource.h" #include "shader_recompiler/runtime_info.h" -#include "video_core/amdgpu/resource.h" namespace Shader { -static constexpr size_t NumUserDataRegs = 16; -static constexpr size_t NumImages = 64; -static constexpr size_t NumBuffers = 40; -static constexpr size_t NumSamplers = 16; -static constexpr size_t NumFMasks = 8; - -enum class BufferType : u32 { - Guest, - Flatbuf, - BdaPagetable, - FaultBuffer, - GdsBuffer, - SharedMemory, -}; - -struct Info; - -struct BufferResource { - u32 sharp_idx; - IR::Type used_types; - AmdGpu::Buffer inline_cbuf; - BufferType buffer_type; - u8 instance_attrib{}; - bool is_written{}; - bool is_formatted{}; - - bool IsSpecial() const noexcept { - return buffer_type != BufferType::Guest; - } - - bool IsStorage(const AmdGpu::Buffer& buffer, const Profile& profile) const noexcept { - // When using uniform buffers, a size is required at compilation time, so we need to - // either compile a lot of shader specializations to handle each size or just force it to - // the maximum possible size always. However, for some vendors the shader-supplied size is - // used for bounds checking uniform buffer accesses, so the latter would effectively turn - // off buffer robustness behavior. Instead, force storage buffers which are bounds checked - // using the actual buffer size. We are assuming the performance hit from this is - // acceptable. - return true; // buffer.GetSize() > profile.max_ubo_size || is_written; - } - - [[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept; -}; -using BufferResourceList = boost::container::small_vector; - -struct ImageResource { - u32 sharp_idx; - bool is_depth{}; - bool is_atomic{}; - bool is_array{}; - bool is_written{}; - bool is_r128{}; - - [[nodiscard]] constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept; -}; -using ImageResourceList = boost::container::small_vector; - -struct SamplerResource { - u32 sharp_idx; - AmdGpu::Sampler inline_sampler; - u32 is_inline_sampler : 1; - u32 associated_image : 4; - u32 disable_aniso : 1; - - constexpr AmdGpu::Sampler GetSharp(const Info& info) const noexcept; -}; -using SamplerResourceList = boost::container::small_vector; - -struct FMaskResource { - u32 sharp_idx; - - constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept; -}; -using FMaskResourceList = boost::container::small_vector; - -struct PushData { - static constexpr u32 XOffsetIndex = 0; - static constexpr u32 YOffsetIndex = 1; - static constexpr u32 XScaleIndex = 2; - static constexpr u32 YScaleIndex = 3; - static constexpr u32 UdRegsIndex = 4; - static constexpr u32 BufOffsetIndex = UdRegsIndex + NumUserDataRegs / 4; - - float xoffset; - float yoffset; - float xscale; - float yscale; - std::array ud_regs; - std::array buf_offsets; - - void AddOffset(u32 binding, u32 offset) { - ASSERT(offset < 256 && binding < buf_offsets.size()); - buf_offsets[binding] = offset; - } -}; -static_assert(sizeof(PushData) <= 128, - "PushData size is greater than minimum size guaranteed by Vulkan spec"); - enum class Qualifier : u8 { None, Smooth, @@ -235,7 +135,7 @@ struct Info { Dynamic = 1 << 1, }; ReadConstType readconst_types{}; - bool uses_dma{false}; + bool uses_dma{}; explicit Info(Stage stage_, LogicalStage l_stage_, ShaderParams params) : stage{stage_}, l_stage{l_stage_}, pgm_hash{params.hash}, pgm_base{params.Base()}, @@ -262,7 +162,7 @@ struct Info { u32 mask = ud_mask.mask; while (mask) { const u32 index = std::countr_zero(mask); - ASSERT(bnd.user_data < NumUserDataRegs && index < NumUserDataRegs); + ASSERT(bnd.user_data < NUM_USER_DATA_REGS && index < NUM_USER_DATA_REGS); mask &= ~(1U << index); push.ud_regs[bnd.user_data++] = user_data[index]; } @@ -276,9 +176,8 @@ struct Info { void RefreshFlatBuf() { flattened_ud_buf.resize(srt_info.flattened_bufsize_dw); - ASSERT(user_data.size() <= NumUserDataRegs); + ASSERT(user_data.size() <= NUM_USER_DATA_REGS); std::memcpy(flattened_ud_buf.data(), user_data.data(), user_data.size_bytes()); - // Run the JIT program to walk the SRT and write the leaves to a flat buffer if (srt_info.walker_func) { srt_info.walker_func(user_data.data(), flattened_ud_buf.data()); } @@ -296,42 +195,4 @@ struct Info { }; DECLARE_ENUM_FLAG_OPERATORS(Info::ReadConstType); -constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept { - const auto buffer = inline_cbuf ? inline_cbuf : info.ReadUdSharp(sharp_idx); - if (!buffer.Valid()) { - LOG_DEBUG(Render, "Encountered invalid buffer sharp"); - return AmdGpu::Buffer::Null(); - } - return buffer; -} - -constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept { - AmdGpu::Image image{}; - if (!is_r128) { - image = info.ReadUdSharp(sharp_idx); - } else { - const auto raw = info.ReadUdSharp(sharp_idx); - std::memcpy(&image, &raw, sizeof(raw)); - } - if (!image.Valid()) { - LOG_DEBUG(Render_Vulkan, "Encountered invalid image sharp"); - image = AmdGpu::Image::Null(is_depth); - } else if (is_depth) { - const auto data_fmt = image.GetDataFmt(); - if (data_fmt != AmdGpu::DataFormat::Format16 && data_fmt != AmdGpu::DataFormat::Format32) { - LOG_DEBUG(Render_Vulkan, "Encountered non-depth image used with depth instruction!"); - image = AmdGpu::Image::Null(true); - } - } - return image; -} - -constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept { - return is_inline_sampler ? inline_sampler : info.ReadUdSharp(sharp_idx); -} - -constexpr AmdGpu::Image FMaskResource::GetSharp(const Info& info) const noexcept { - return info.ReadUdSharp(sharp_idx); -} - } // namespace Shader diff --git a/src/shader_recompiler/ir/basic_block.cpp b/src/shader_recompiler/ir/basic_block.cpp index 22af927d7..745c5f956 100644 --- a/src/shader_recompiler/ir/basic_block.cpp +++ b/src/shader_recompiler/ir/basic_block.cpp @@ -30,7 +30,7 @@ Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, const auto result_it{instructions.insert(insertion_point, *inst)}; if (inst->NumArgs() != args.size()) { - throw InvalidArgument("Invalid number of arguments {} in {}", args.size(), op); + UNREACHABLE_MSG("Invalid number of arguments {} in {}", args.size(), op); } std::ranges::for_each(args, [inst, index = size_t{0}](const Value& arg) mutable { inst->SetArg(index, arg); diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 8363cee0a..1e77dc677 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -5,8 +5,6 @@ #include #include #include "common/assert.h" -#include "ir_emitter.h" -#include "shader_recompiler/exception.h" #include "shader_recompiler/ir/debug_print.h" #include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/opcodes.h" @@ -196,7 +194,7 @@ U1 IREmitter::Condition(IR::Condition cond) { case IR::Condition::Execnz: return GetExec(); default: - throw NotImplementedException(""); + UNREACHABLE_MSG(""); } } @@ -1828,7 +1826,7 @@ U32U64 IREmitter::ConvertFToS(size_t bitsize, const F32F64& value) { default: break; } - throw NotImplementedException("Invalid destination bitsize {}", bitsize); + UNREACHABLE_MSG("Invalid destination bitsize {}", bitsize); } U32U64 IREmitter::ConvertFToU(size_t bitsize, const F32F64& value) { @@ -1929,7 +1927,7 @@ U8U16U32U64 IREmitter::UConvert(size_t result_bitsize, const U8U16U32U64& value) default: break; } - throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); + UNREACHABLE_MSG("Conversion from {} to {} bits", value.Type(), result_bitsize); } U8U16U32U64 IR::IREmitter::SConvert(size_t result_bitsize, const U8U16U32U64& value) { @@ -1946,8 +1944,7 @@ U8U16U32U64 IR::IREmitter::SConvert(size_t result_bitsize, const U8U16U32U64& va default: break; } - throw NotImplementedException("Signed Conversion from {} to {} bits", value.Type(), - result_bitsize); + UNREACHABLE_MSG("Signed Conversion from {} to {} bits", value.Type(), result_bitsize); } F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { @@ -1978,7 +1975,7 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { default: break; } - throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); + UNREACHABLE_MSG("Conversion from {} to {} bits", value.Type(), result_bitsize); } Value IREmitter::ImageAtomicIAdd(const Value& handle, const Value& coords, const Value& value, diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index eaab05cb7..40ce69df8 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -2,10 +2,8 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include -#include #include -#include "shader_recompiler/exception.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/type.h" #include "shader_recompiler/ir/value.h" @@ -21,9 +19,7 @@ Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} { } Inst::Inst(const Inst& base) : op{base.op}, flags{base.flags} { - if (base.op == Opcode::Phi) { - throw NotImplementedException("Copying phi node"); - } + ASSERT_MSG(base.op != Opcode::Phi, "Copying phi node"); std::construct_at(&args); const size_t num_args{base.NumArgs()}; for (size_t index = 0; index < num_args; ++index) { @@ -150,7 +146,7 @@ IR::Type Inst::Type() const { void Inst::SetArg(size_t index, Value value) { if (index >= NumArgs()) { - throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op); + UNREACHABLE_MSG("Out of bounds argument index {} in opcode {}", index, op); } const IR::Value arg{Arg(index)}; if (!arg.IsImmediate()) { @@ -171,7 +167,7 @@ Block* Inst::PhiBlock(size_t index) const { UNREACHABLE_MSG("{} is not a Phi instruction", op); } if (index >= phi_args.size()) { - throw InvalidArgument("Out of bounds argument index {} in phi instruction"); + UNREACHABLE_MSG("Out of bounds argument index {} in phi instruction"); } return phi_args[index].first; } diff --git a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp index e0c99655d..1d3b46b43 100644 --- a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp +++ b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp @@ -205,7 +205,7 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) { } info.srt_info.walker_func = c.getCurr(); - pass_info.dst_off_dw = NumUserDataRegs; + pass_info.dst_off_dw = NUM_USER_DATA_REGS; ASSERT(pass_info.dst_off_dw == info.srt_info.flattened_bufsize_dw); for (const auto& [sgpr_base, root] : pass_info.srt_roots) { diff --git a/src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp b/src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp index 6a9214f34..103060536 100644 --- a/src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp +++ b/src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include "shader_recompiler/ir/program.h" namespace Shader::Optimization { diff --git a/src/shader_recompiler/ir/passes/ring_access_elimination.cpp b/src/shader_recompiler/ir/passes/ring_access_elimination.cpp index 83416bfb8..f8818b622 100644 --- a/src/shader_recompiler/ir/passes/ring_access_elimination.cpp +++ b/src/shader_recompiler/ir/passes/ring_access_elimination.cpp @@ -96,7 +96,7 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim if (info.gs_copy_data.output_vertices && info.gs_copy_data.output_vertices != output_vertices) { ASSERT_MSG(output_vertices > info.gs_copy_data.output_vertices && - gs_info.mode == AmdGpu::Liverpool::GsMode::Mode::ScenarioG, + gs_info.mode == AmdGpu::GsScenario::ScenarioG, "Invalid geometry shader vertex configuration scenario = {}, max_vert_out = " "{}, output_vertices = {}", u32(gs_info.mode), output_vertices, info.gs_copy_data.output_vertices); diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index faae52c8f..3df4f8b86 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -3,6 +3,7 @@ #include "common/config.h" #include "shader_recompiler/ir/program.h" +#include "shader_recompiler/profile.h" #include "video_core/buffer_cache/buffer_cache.h" namespace Shader::Optimization { diff --git a/src/shader_recompiler/ir/value.h b/src/shader_recompiler/ir/value.h index b92c5d555..bca44125b 100644 --- a/src/shader_recompiler/ir/value.h +++ b/src/shader_recompiler/ir/value.h @@ -13,7 +13,6 @@ #include #include "common/assert.h" -#include "shader_recompiler/exception.h" #include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/opcodes.h" #include "shader_recompiler/ir/patch.h" @@ -105,7 +104,7 @@ public: explicit TypedValue(const Value& value) : Value(value) { if ((value.Type() & type_) == IR::Type::Void) { - throw InvalidArgument("Incompatible types {} and {}", type_, value.Type()); + UNREACHABLE_MSG("Incompatible types {} and {}", type_, value.Type()); } } diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index 2da9e7b01..547d4524f 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -6,6 +6,7 @@ #include "shader_recompiler/frontend/structured_control_flow.h" #include "shader_recompiler/ir/passes/ir_passes.h" #include "shader_recompiler/ir/post_order.h" +#include "shader_recompiler/profile.h" #include "shader_recompiler/recompiler.h" namespace Shader { diff --git a/src/shader_recompiler/resource.h b/src/shader_recompiler/resource.h new file mode 100644 index 000000000..29545d0bb --- /dev/null +++ b/src/shader_recompiler/resource.h @@ -0,0 +1,146 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/types.h" +#include "shader_recompiler/ir/type.h" +#include "video_core/amdgpu/resource.h" + +#include + +namespace Shader { + +static constexpr u32 NUM_USER_DATA_REGS = 16; +static constexpr u32 NUM_IMAGES = 64; +static constexpr u32 NUM_BUFFERS = 40; +static constexpr u32 NUM_SAMPLERS = 16; +static constexpr u32 NUM_FMASKS = 8; + +enum class BufferType : u32 { + Guest, + Flatbuf, + BdaPagetable, + FaultBuffer, + GdsBuffer, + SharedMemory, +}; + +struct Info; + +struct BufferResource { + u32 sharp_idx; + IR::Type used_types; + AmdGpu::Buffer inline_cbuf; + BufferType buffer_type; + u8 instance_attrib{}; + bool is_written{}; + bool is_formatted{}; + + bool IsSpecial() const noexcept { + return buffer_type != BufferType::Guest; + } + + bool IsStorage([[maybe_unused]] const AmdGpu::Buffer buffer) const noexcept { + // When using uniform buffers, a size is required at compilation time, so we need to + // either compile a lot of shader specializations to handle each size or just force it to + // the maximum possible size always. However, for some vendors the shader-supplied size is + // used for bounds checking uniform buffer accesses, so the latter would effectively turn + // off buffer robustness behavior. Instead, force storage buffers which are bounds checked + // using the actual buffer size. We are assuming the performance hit from this is + // acceptable. + return true; // buffer.GetSize() > profile.max_ubo_size || is_written; + } + + constexpr AmdGpu::Buffer GetSharp(const auto& info) const noexcept { + const auto buffer = + inline_cbuf ? inline_cbuf : info.template ReadUdSharp(sharp_idx); + if (!buffer.Valid()) { + LOG_DEBUG(Render, "Encountered invalid buffer sharp"); + return AmdGpu::Buffer::Null(); + } + return buffer; + } +}; +using BufferResourceList = boost::container::static_vector; + +struct ImageResource { + u32 sharp_idx; + bool is_depth{}; + bool is_atomic{}; + bool is_array{}; + bool is_written{}; + bool is_r128{}; + + constexpr AmdGpu::Image GetSharp(const auto& info) const noexcept { + AmdGpu::Image image{}; + if (!is_r128) { + image = info.template ReadUdSharp(sharp_idx); + } else { + const auto raw = info.template ReadUdSharp(sharp_idx); + std::memcpy(&image, &raw, sizeof(raw)); + } + if (!image.Valid()) { + LOG_DEBUG(Render_Vulkan, "Encountered invalid image sharp"); + image = AmdGpu::Image::Null(is_depth); + } else if (is_depth) { + const auto data_fmt = image.GetDataFmt(); + if (data_fmt != AmdGpu::DataFormat::Format16 && + data_fmt != AmdGpu::DataFormat::Format32) { + LOG_DEBUG(Render_Vulkan, + "Encountered non-depth image used with depth instruction!"); + image = AmdGpu::Image::Null(true); + } + } + return image; + } +}; +using ImageResourceList = boost::container::static_vector; + +struct SamplerResource { + u32 sharp_idx; + AmdGpu::Sampler inline_sampler; + u32 is_inline_sampler : 1; + u32 associated_image : 4; + u32 disable_aniso : 1; + + constexpr AmdGpu::Sampler GetSharp(const auto& info) const noexcept { + return is_inline_sampler ? inline_sampler + : info.template ReadUdSharp(sharp_idx); + } +}; +using SamplerResourceList = boost::container::static_vector; + +struct FMaskResource { + u32 sharp_idx; + + constexpr AmdGpu::Image GetSharp(const auto& info) const noexcept { + return info.template ReadUdSharp(sharp_idx); + } +}; +using FMaskResourceList = boost::container::static_vector; + +struct PushData { + static constexpr u32 XOffsetIndex = 0; + static constexpr u32 YOffsetIndex = 1; + static constexpr u32 XScaleIndex = 2; + static constexpr u32 YScaleIndex = 3; + static constexpr u32 UdRegsIndex = 4; + static constexpr u32 BufOffsetIndex = UdRegsIndex + NUM_USER_DATA_REGS / 4; + + float xoffset; + float yoffset; + float xscale; + float yscale; + std::array ud_regs; + std::array buf_offsets; + + void AddOffset(u32 binding, u32 offset) { + ASSERT(offset < 256 && binding < buf_offsets.size()); + buf_offsets[binding] = offset; + } +}; +static_assert(sizeof(PushData) <= 128, + "PushData size is greater than minimum size guaranteed by Vulkan spec"); + +} // namespace Shader diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 13de0f381..9624c465f 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -3,13 +3,12 @@ #pragma once -#include #include -#include #include "common/types.h" #include "shader_recompiler/frontend/tessellation.h" -#include "video_core/amdgpu/liverpool.h" -#include "video_core/amdgpu/types.h" +#include "video_core/amdgpu/pixel_format.h" +#include "video_core/amdgpu/regs_shader.h" +#include "video_core/amdgpu/regs_vertex.h" namespace Shader { @@ -36,7 +35,7 @@ enum class LogicalStage : u32 { constexpr u32 MaxStageTypes = static_cast(LogicalStage::NumLogicalStages); -[[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept { +constexpr Stage StageFromIndex(size_t index) noexcept { return static_cast(index); } @@ -87,7 +86,6 @@ struct VertexRuntimeInfo { bool clip_disable{}; u32 step_rate_0; u32 step_rate_1; - // Domain AmdGpu::TessellationType tess_type; AmdGpu::TessellationTopology tess_topology; AmdGpu::TessellationPartitioning tess_partitioning; @@ -110,22 +108,24 @@ struct VertexRuntimeInfo { }; struct HullRuntimeInfo { - // from registers u32 num_input_control_points; u32 num_threads; AmdGpu::TessellationType tess_type; bool offchip_lds_enable; - - // from tess constants buffer u32 ls_stride; u32 hs_output_cp_stride; u32 hs_output_base; - auto operator<=>(const HullRuntimeInfo&) const noexcept = default; + void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) { + ls_stride = tess_constants.ls_stride; + hs_output_cp_stride = tess_constants.hs_cp_stride; + hs_output_base = tess_constants.hs_output_base; + } - // It might be possible for a non-passthrough TCS to have these conditions, in some - // dumb situation. - // In that case, it should be fine to assume passthrough and declare some extra + bool operator==(const HullRuntimeInfo&) const = default; + + // It might be possible for a non-passthrough TCS to have these conditions, in some dumb + // situation. In that case, it should be fine to assume passthrough and declare some extra // output control points and attributes that shouldnt be read by the TES anyways bool IsPassthrough() const { return hs_output_base == 0 && ls_stride == hs_output_cp_stride && num_threads == 1; @@ -138,12 +138,6 @@ struct HullRuntimeInfo { u32 NumOutputControlPoints() const { return IsPassthrough() ? num_input_control_points : num_threads; } - - void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) { - ls_stride = tess_constants.ls_stride; - hs_output_cp_stride = tess_constants.hs_cp_stride; - hs_output_base = tess_constants.hs_output_base; - } }; static constexpr auto GsMaxOutputStreams = 4u; @@ -157,11 +151,11 @@ struct GeometryRuntimeInfo { u32 out_vertex_data_size{}; AmdGpu::PrimitiveType in_primitive; GsOutputPrimTypes out_primitive; - AmdGpu::Liverpool::GsMode::Mode mode; + AmdGpu::GsScenario mode; std::span vs_copy; u64 vs_copy_hash; - bool operator==(const GeometryRuntimeInfo& other) const noexcept { + bool operator==(const GeometryRuntimeInfo& other) const { return num_outputs == other.num_outputs && outputs == other.outputs && num_invocations && other.num_invocations && output_vertices == other.output_vertices && in_primitive == other.in_primitive && @@ -181,10 +175,10 @@ struct PsColorBuffer { AmdGpu::DataFormat data_format : 6; AmdGpu::NumberFormat num_format : 4; AmdGpu::NumberConversion num_conversion : 3; - AmdGpu::Liverpool::ShaderExportFormat export_format : 4; + AmdGpu::ShaderExportFormat export_format : 4; AmdGpu::CompMapping swizzle; - bool operator==(const PsColorBuffer& other) const noexcept = default; + bool operator==(const PsColorBuffer& other) const = default; }; struct FragmentRuntimeInfo { @@ -200,18 +194,18 @@ struct FragmentRuntimeInfo { bool operator==(const PsInput&) const noexcept = default; }; - AmdGpu::Liverpool::PsInput en_flags; - AmdGpu::Liverpool::PsInput addr_flags; + AmdGpu::PsInput en_flags; + AmdGpu::PsInput addr_flags; u32 num_inputs; std::array inputs; std::array color_buffers; - AmdGpu::Liverpool::ShaderExportFormat z_export_format; + AmdGpu::ShaderExportFormat z_export_format; u8 mrtz_mask; bool dual_source_blending; bool operator==(const FragmentRuntimeInfo& other) const noexcept { return std::ranges::equal(color_buffers, other.color_buffers) && - en_flags.raw == other.en_flags.raw && addr_flags.raw == other.addr_flags.raw && + en_flags == other.en_flags && addr_flags == other.addr_flags && num_inputs == other.num_inputs && z_export_format == other.z_export_format && mrtz_mask == other.mrtz_mask && dual_source_blending == other.dual_source_blending && std::ranges::equal(inputs.begin(), inputs.begin() + num_inputs, other.inputs.begin(), diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index a55f2a687..a7215e29e 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -9,6 +9,7 @@ #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/info.h" +#include "shader_recompiler/profile.h" namespace Shader { @@ -114,9 +115,9 @@ struct StageSpecialization { } u32 binding{}; ForEachSharp(binding, buffers, info->buffers, - [profile_](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { + [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { spec.stride = sharp.GetStride(); - spec.is_storage = desc.IsStorage(sharp, profile_); + spec.is_storage = desc.IsStorage(sharp); spec.is_formatted = desc.is_formatted; spec.swizzle_enable = sharp.swizzle_enable; if (spec.is_formatted) { diff --git a/src/video_core/amdgpu/cb_db_extent.h b/src/video_core/amdgpu/cb_db_extent.h new file mode 100644 index 000000000..623c72b59 --- /dev/null +++ b/src/video_core/amdgpu/cb_db_extent.h @@ -0,0 +1,22 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/types.h" + +namespace AmdGpu { + +union CbDbExtent { + struct { + u16 width; + u16 height; + }; + u32 raw; + + bool Valid() const { + return raw != 0; + } +}; + +} // namespace AmdGpu diff --git a/src/video_core/amdgpu/default_context.cpp b/src/video_core/amdgpu/default_context.cpp deleted file mode 100644 index 01229e7b1..000000000 --- a/src/video_core/amdgpu/default_context.cpp +++ /dev/null @@ -1,55 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include "common/types.h" -#include "video_core/amdgpu/liverpool.h" - -#include - -namespace AmdGpu { - -// The following values are taken from fpPS4: -// https://github.com/red-prig/fpPS4/blob/436b43064be4c78229500f3d3c054fc76639247d/chip/pm4_pfp.pas#L410 -// -static constexpr std::array reg_array_default{ - 0x00000000u, 0x80000000u, 0x40004000u, 0xdeadbeefu, 0x00000000u, 0x40004000u, 0x00000000u, - 0x40004000u, 0x00000000u, 0x40004000u, 0x00000000u, 0x40004000u, 0xaa99aaaau, 0x00000000u, - 0xdeadbeefu, 0xdeadbeefu, 0x80000000u, 0x40004000u, 0x00000000u, 0x00000000u, 0x80000000u, - 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, - 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, - 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, - 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, - 0x40004000u, 0x80000000u, 0x40004000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, - 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, - 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, - 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, - 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, - 0x2a00161au, -}; - -void Liverpool::Regs::SetDefaults() { - std::memset(reg_array.data(), 0, reg_array.size() * sizeof(u32)); - - std::memcpy(®_array[ContextRegWordOffset + 0x80], reg_array_default.data(), - reg_array_default.size() * sizeof(u32)); - - // Individual context regs values - reg_array[ContextRegWordOffset + 0x000d] = 0x40004000u; - reg_array[ContextRegWordOffset + 0x01b6] = 0x00000002u; - reg_array[ContextRegWordOffset + 0x0204] = 0x00090000u; - reg_array[ContextRegWordOffset + 0x0205] = 0x00000004u; - reg_array[ContextRegWordOffset + 0x0295] = 0x00000100u; - reg_array[ContextRegWordOffset + 0x0296] = 0x00000080u; - reg_array[ContextRegWordOffset + 0x0297] = 0x00000002u; - reg_array[ContextRegWordOffset + 0x02aa] = 0x00001000u; - reg_array[ContextRegWordOffset + 0x02f7] = 0x00001000u; - reg_array[ContextRegWordOffset + 0x02f9] = 0x00000005u; - reg_array[ContextRegWordOffset + 0x02fa] = 0x3f800000u; - reg_array[ContextRegWordOffset + 0x02fb] = 0x3f800000u; - reg_array[ContextRegWordOffset + 0x02fc] = 0x3f800000u; - reg_array[ContextRegWordOffset + 0x02fd] = 0x3f800000u; - reg_array[ContextRegWordOffset + 0x0316] = 0x0000000eu; - reg_array[ContextRegWordOffset + 0x0317] = 0x00000010u; -} - -} // namespace AmdGpu diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index a504737a5..85bfeb1a1 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -12,6 +12,7 @@ #include "core/libraries/kernel/process.h" #include "core/libraries/videoout/driver.h" #include "core/memory.h" +#include "core/platform.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/pm4_cmds.h" #include "video_core/renderdoc.h" @@ -305,14 +306,14 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); - const auto reg_addr = ConfigRegWordOffset + set_data->reg_offset; + const auto reg_addr = Regs::ConfigRegWordOffset + set_data->reg_offset; const auto* payload = reinterpret_cast(header + 2); std::memcpy(®s.reg_array[reg_addr], payload, (count - 1) * sizeof(u32)); break; } case PM4ItOpcode::SetContextReg: { const auto* set_data = reinterpret_cast(header); - const auto reg_addr = ContextRegWordOffset + set_data->reg_offset; + const auto reg_addr = Regs::ContextRegWordOffset + set_data->reg_offset; const auto* payload = reinterpret_cast(header + 2); std::memcpy(®s.reg_array[reg_addr], payload, (count - 1) * sizeof(u32)); @@ -335,7 +336,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spantype3.count; if (nop_offset == 0x0e || nop_offset == 0x0d || nop_offset == 0x0b) { @@ -358,7 +359,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spantype3.count; if (nop_offset == 0x04) { @@ -394,14 +395,14 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanreg_offset - 0x200); std::memcpy(addr, header + 2, set_size); } else { - std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2, - set_size); + std::memcpy(®s.reg_array[Regs::ShRegWordOffset + set_data->reg_offset], + header + 2, set_size); } break; } case PM4ItOpcode::SetUconfigReg: { const auto* set_data = reinterpret_cast(header); - std::memcpy(®s.reg_array[UconfigRegWordOffset + set_data->reg_offset], + std::memcpy(®s.reg_array[Regs::UconfigRegWordOffset + set_data->reg_offset], header + 2, (count - 1) * sizeof(u32)); break; } @@ -418,7 +419,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); regs.max_index_size = draw_index->max_size; regs.index_base_address.base_addr_lo = draw_index->index_base_lo; - regs.index_base_address.base_addr_hi.Assign(draw_index->index_base_hi); + regs.index_base_address.base_addr_hi = draw_index->index_base_hi; regs.num_indices = draw_index->index_count; regs.draw_initiator = draw_index->draw_initiator; if (DebugState.DumpingCurrentReg()) { @@ -582,7 +583,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); regs.index_base_address.base_addr_lo = index_base->addr_lo; - regs.index_base_address.base_addr_hi.Assign(index_base->addr_hi); + regs.index_base_address.base_addr_hi = index_base->addr_hi; break; } case PM4ItOpcode::IndexBufferSize: { @@ -638,12 +639,14 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); - event_eop->SignalFence([](void* address, u64 data, u32 num_bytes) { - auto* memory = Core::Memory::Instance(); - if (!memory->TryWriteBacking(address, &data, num_bytes)) { - memcpy(address, &data, num_bytes); - } - }); + event_eop->SignalFence( + [](void* address, u64 data, u32 num_bytes) { + auto* memory = Core::Memory::Instance(); + if (!memory->TryWriteBacking(address, &data, num_bytes)) { + memcpy(address, &data, num_bytes); + } + }, + [] { Platform::IrqC::Instance()->Signal(Platform::InterruptId::GfxEop); }); break; } case PM4ItOpcode::DmaData: { @@ -947,8 +950,8 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, u32 vqid) { (set_data->reg_offset - 0x200); std::memcpy(addr, header + 2, set_size); } else { - std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2, - set_size); + std::memcpy(®s.reg_array[Regs::ShRegWordOffset + set_data->reg_offset], + header + 2, set_size); } break; } @@ -1030,7 +1033,9 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, u32 vqid) { } case PM4ItOpcode::ReleaseMem: { const auto* release_mem = reinterpret_cast(header); - release_mem->SignalFence(static_cast(queue.pipe_id)); + release_mem->SignalFence([pipe_id = queue.pipe_id] { + Platform::IrqC::Instance()->Signal(static_cast(pipe_id)); + }); break; } case PM4ItOpcode::EventWrite: { @@ -1053,11 +1058,8 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, u32 vqid) { FIBER_EXIT; } -std::pair, std::span> Liverpool::CopyCmdBuffers( - std::span dcb, std::span ccb) { +Liverpool::CmdBuffer Liverpool::CopyCmdBuffers(std::span dcb, std::span ccb) { auto& queue = mapped_queues[GfxQueueId]; - - // std::vector resize can invalidate spans for commands in flight ASSERT_MSG(queue.dcb_buffer.capacity() >= queue.dcb_buffer_offset + dcb.size(), "dcb copy buffer out of reserved space"); ASSERT_MSG(queue.ccb_buffer.capacity() >= queue.ccb_buffer_offset + ccb.size(), @@ -1068,8 +1070,8 @@ std::pair, std::span> Liverpool::CopyCmdBuffers( queue.ccb_buffer.resize( std::max(queue.ccb_buffer.size(), queue.ccb_buffer_offset + ccb.size())); - u32 prev_dcb_buffer_offset = queue.dcb_buffer_offset; - u32 prev_ccb_buffer_offset = queue.ccb_buffer_offset; + const u32 prev_dcb_buffer_offset = queue.dcb_buffer_offset; + const u32 prev_ccb_buffer_offset = queue.ccb_buffer_offset; if (!dcb.empty()) { std::memcpy(queue.dcb_buffer.data() + queue.dcb_buffer_offset, dcb.data(), dcb.size_bytes()); diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 10d4648b7..249ea2ba6 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -3,7 +3,6 @@ #pragma once -#include #include #include #include @@ -15,15 +14,11 @@ #include #include "common/assert.h" -#include "common/bit_field.h" -#include "common/polyfill_thread.h" #include "common/slot_vector.h" #include "common/types.h" #include "common/unique_function.h" -#include "shader_recompiler/params.h" -#include "video_core/amdgpu/pixel_format.h" -#include "video_core/amdgpu/tiling.h" -#include "video_core/amdgpu/types.h" +#include "video_core/amdgpu/cb_db_extent.h" +#include "video_core/amdgpu/regs.h" namespace Vulkan { class Rasterizer; @@ -35,13 +30,6 @@ struct VideoOutPort; namespace AmdGpu { -#define GFX6_3D_REG_INDEX(field_name) (offsetof(AmdGpu::Liverpool::Regs, field_name) / sizeof(u32)) - -#define CONCAT2(x, y) DO_CONCAT2(x, y) -#define DO_CONCAT2(x, y) x##y -#define INSERT_PADDING_WORDS(num_words) \ - [[maybe_unused]] std::array CONCAT2(pad, __LINE__) - struct Liverpool { static constexpr u32 GfxQueueId = 0u; static constexpr u32 NumGfxRings = 1u; // actually 2, but HP is reserved by system software @@ -51,990 +39,6 @@ struct Liverpool { static constexpr u32 NumTotalQueues = NumGfxRings + NumComputeRings; static_assert(NumTotalQueues < 64u); // need to fit into u64 bitmap for ffs - static constexpr u32 NumColorBuffers = 8; - static constexpr u32 NumViewports = 16; - static constexpr u32 NumClipPlanes = 6; - static constexpr u32 NumShaderUserData = 16; - static constexpr u32 UconfigRegWordOffset = 0xC000; - static constexpr u32 ContextRegWordOffset = 0xA000; - static constexpr u32 ConfigRegWordOffset = 0x2000; - static constexpr u32 ShRegWordOffset = 0x2C00; - static constexpr u32 NumRegs = 0xD000; - - using UserData = std::array; - - struct BinaryInfo { - static constexpr u8 signature_ref[] = {0x4f, 0x72, 0x62, 0x53, 0x68, 0x64, 0x72}; // OrbShdr - - std::array signature; - u8 version; - u32 pssl_or_cg : 1; - u32 cached : 1; - u32 type : 4; - u32 source_type : 2; - u32 length : 24; - u8 chunk_usage_base_offset_in_dw; - u8 num_input_usage_slots; - u8 is_srt : 1; - u8 is_srt_used_info_valid : 1; - u8 is_extended_usage_info : 1; - u8 reserved2 : 5; - u8 reserved3; - u64 shader_hash; - u32 crc32; - - bool Valid() const { - return std::memcmp(signature.data(), signature_ref, sizeof(signature_ref)) == 0; - } - }; - - static const BinaryInfo& SearchBinaryInfo(const u32* code, size_t search_limit = 0x2000) { - constexpr u32 token_mov_vcchi = 0xBEEB03FF; - - if (code[0] == token_mov_vcchi) { - const auto* info = std::bit_cast(code + (code[1] + 1) * 2); - if (info->Valid()) { - return *info; - } - } - - // First instruction is not s_mov_b32 vcc_hi, #imm, - // which means we cannot get the binary info via said instruction. - // The easiest solution is to iterate through each dword and break - // on the first instance of the binary info. - constexpr size_t signature_size = sizeof(BinaryInfo::signature_ref) / sizeof(u8); - const u32* end = code + search_limit; - - for (const u32* it = code; it < end; ++it) { - if (const BinaryInfo* info = std::bit_cast(it); info->Valid()) { - return *info; - } - } - - UNREACHABLE_MSG("Shader binary info not found."); - } - - struct ShaderProgram { - u32 address_lo; - BitField<0, 8, u32> address_hi; - union { - // SPI_SHADER_PGM_RSRC1_XX - BitField<0, 6, u64> num_vgprs; - BitField<6, 4, u64> num_sgprs; - BitField<10, 2, u64> priority; - BitField<12, 2, FpRoundMode> fp_round_mode32; - BitField<14, 2, FpRoundMode> fp_round_mode64; - BitField<16, 2, FpDenormMode> fp_denorm_mode32; - BitField<18, 2, FpDenormMode> fp_denorm_mode64; - BitField<12, 8, u64> float_mode; - BitField<24, 2, u64> vgpr_comp_cnt; // SPI provided per-thread inputs - // SPI_SHADER_PGM_RSRC2_XX - BitField<32, 1, u64> scratch_en; - BitField<33, 5, u64> num_user_regs; - union { - BitField<39, 1, u64> oc_lds_en; - } rsrc2_hs; - } settings; - UserData user_data; - - template - const T Address() const { - const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8; - return reinterpret_cast(addr); - } - - std::span Code() const { - const u32* code = Address(); - const BinaryInfo& bininfo = SearchBinaryInfo(code); - const u32 num_dwords = bininfo.length / sizeof(u32); - return std::span{code, num_dwords}; - } - - [[nodiscard]] u32 NumVgprs() const { - // Each increment allocates 4 registers, where 0 = 4 registers. - return (settings.num_vgprs + 1) * 4; - } - }; - - struct HsTessFactorClamp { - // I've only seen min=0.0, max=1.0 so far. - // TODO why is max set to 1.0? Makes no sense - float hs_max_tess; - float hs_min_tess; - }; - - struct ComputeProgram { - u32 dispatch_initiator; - u32 dim_x; - u32 dim_y; - u32 dim_z; - u32 start_x; - u32 start_y; - u32 start_z; - struct { - u16 full; - u16 partial; - } num_thread_x, num_thread_y, num_thread_z; - INSERT_PADDING_WORDS(1); - BitField<0, 12, u32> max_wave_id; - u32 address_lo; - BitField<0, 8, u32> address_hi; - INSERT_PADDING_WORDS(4); - union { - BitField<0, 6, u64> num_vgprs; - BitField<6, 4, u64> num_sgprs; - BitField<33, 5, u64> num_user_regs; - BitField<39, 3, u64> tgid_enable; - BitField<47, 9, u64> lds_dwords; - } settings; - INSERT_PADDING_WORDS(1); - u32 resource_limits; - INSERT_PADDING_WORDS(0x2A); - UserData user_data; - - template - const T Address() const { - const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8; - return reinterpret_cast(addr); - } - - u32 SharedMemSize() const noexcept { - // lds_dwords is in units of 128 dwords. We return bytes. - return settings.lds_dwords.Value() * 128 * 4; - } - - u32 NumWorkgroups() const noexcept { - return dim_x * dim_y * dim_z; - } - - bool IsTgidEnabled(u32 i) const noexcept { - return (settings.tgid_enable.Value() >> i) & 1; - } - - std::span Code() const { - const u32* code = Address(); - const BinaryInfo& bininfo = SearchBinaryInfo(code); - const u32 num_dwords = bininfo.length / sizeof(u32); - return std::span{code, num_dwords}; - } - }; - - template - static constexpr const BinaryInfo& GetBinaryInfo(const Shader& sh) { - const auto* code = sh.template Address(); - return SearchBinaryInfo(code); - } - - static constexpr Shader::ShaderParams GetParams(const auto& sh) { - auto& bininfo = GetBinaryInfo(sh); - return { - .user_data = sh.user_data, - .code = sh.Code(), - .hash = bininfo.shader_hash, - }; - } - - union PsInputControl { - u32 raw; - BitField<0, 5, u32> input_offset; - BitField<5, 1, u32> use_default; - BitField<8, 2, u32> default_value; - BitField<10, 1, u32> flat_shade; - }; - - enum class ShaderExportComp : u32 { - None = 0, - OneComp = 1, - TwoComp = 2, - FourCompCompressed = 3, - FourComp = 4, - }; - - union ShaderPosFormat { - u32 raw; - BitField<0, 4, ShaderExportComp> pos0; - BitField<4, 4, ShaderExportComp> pos1; - BitField<8, 4, ShaderExportComp> pos2; - BitField<12, 4, ShaderExportComp> pos3; - }; - - enum class ShaderExportFormat : u32 { - Zero = 0, - R_32 = 1, - GR_32 = 2, - AR_32 = 3, - ABGR_FP16 = 4, - ABGR_UNORM16 = 5, - ABGR_SNORM16 = 6, - ABGR_UINT16 = 7, - ABGR_SINT16 = 8, - ABGR_32 = 9, - }; - - union ColorExportFormat { - u32 raw; - BitField<0, 4, ShaderExportFormat> col0; - BitField<4, 4, ShaderExportFormat> col1; - BitField<8, 4, ShaderExportFormat> col2; - BitField<12, 4, ShaderExportFormat> col3; - BitField<16, 4, ShaderExportFormat> col4; - BitField<20, 4, ShaderExportFormat> col5; - BitField<24, 4, ShaderExportFormat> col6; - BitField<28, 4, ShaderExportFormat> col7; - - [[nodiscard]] ShaderExportFormat GetFormat(const u32 buf_idx) const { - return static_cast((raw >> (buf_idx * 4)) & 0xfu); - } - }; - - union VsOutputControl { - u32 raw; - BitField<0, 8, u32> clip_distance_enable; - BitField<8, 8, u32> cull_distance_enable; - BitField<16, 1, u32> use_vtx_point_size; - BitField<17, 1, u32> use_vtx_edge_flag; - BitField<18, 1, u32> use_vtx_render_target_idx; - BitField<19, 1, u32> use_vtx_viewport_idx; - BitField<20, 1, u32> use_vtx_kill_flag; - BitField<21, 1, u32> vs_out_misc_enable; - BitField<22, 1, u32> vs_out_ccdist0_enable; - BitField<23, 1, u32> vs_out_ccdist1_enable; - BitField<25, 1, u32> use_vtx_gs_cut_flag; - - bool IsClipDistEnabled(u32 index) const { - return (clip_distance_enable.Value() >> index) & 1; - } - - bool IsCullDistEnabled(u32 index) const { - return (cull_distance_enable.Value() >> index) & 1; - } - }; - - struct LineControl { - u32 width_fixed_point; - - float Width() const { - return static_cast(width_fixed_point) / 8.0; - } - }; - - struct ModeControl { - s32 msaa_enable : 1; - s32 vport_scissor_enable : 1; - s32 line_stripple_enable : 1; - s32 send_unlit_stiles_to_pkr : 1; - }; - - enum class ZOrder : u32 { - LateZ = 0, - EarlyZLateZ = 1, - ReZ = 2, - EarlyZReZ = 3, - }; - - enum class ConservativeDepth : u32 { - Any = 0, - LessThanZ = 1, - GreaterThanZ = 2, - }; - - union DepthShaderControl { - u32 raw; - BitField<0, 1, u32> z_export_enable; - BitField<1, 1, u32> stencil_test_val_export_enable; - BitField<2, 1, u32> stencil_op_val_export_enable; - BitField<4, 2, ZOrder> z_order; - BitField<6, 1, u32> kill_enable; - BitField<7, 1, u32> coverage_to_mask_enable; - BitField<8, 1, u32> mask_export_enable; - BitField<9, 1, u32> exec_on_hier_fail; - BitField<10, 1, u32> exec_on_noop; - BitField<11, 1, u32> alpha_to_mask_disable; - BitField<12, 1, u32> depth_before_shader; - BitField<13, 2, ConservativeDepth> conservative_z_export; - }; - - enum class CompareFunc : u32 { - Never = 0, - Less = 1, - Equal = 2, - LessEqual = 3, - Greater = 4, - NotEqual = 5, - GreaterEqual = 6, - Always = 7, - }; - - union DepthControl { - u32 raw; - BitField<0, 1, u32> stencil_enable; - BitField<1, 1, u32> depth_enable; - BitField<2, 1, u32> depth_write_enable; - BitField<3, 1, u32> depth_bounds_enable; - BitField<4, 3, CompareFunc> depth_func; - BitField<7, 1, u32> backface_enable; - BitField<8, 3, CompareFunc> stencil_ref_func; - BitField<20, 3, CompareFunc> stencil_bf_func; - BitField<30, 1, u32> enable_color_writes_on_depth_fail; - BitField<31, 1, u32> disable_color_writes_on_depth_pass; - }; - - enum class StencilFunc : u32 { - Keep = 0, - Zero = 1, - Ones = 2, - ReplaceTest = 3, - ReplaceOp = 4, - AddClamp = 5, - SubClamp = 6, - Invert = 7, - AddWrap = 8, - SubWrap = 9, - And = 10, - Or = 11, - Xor = 12, - Nand = 13, - Nor = 14, - Xnor = 15, - }; - - union StencilControl { - u32 raw; - BitField<0, 4, StencilFunc> stencil_fail_front; - BitField<4, 4, StencilFunc> stencil_zpass_front; - BitField<8, 4, StencilFunc> stencil_zfail_front; - BitField<12, 4, StencilFunc> stencil_fail_back; - BitField<16, 4, StencilFunc> stencil_zpass_back; - BitField<20, 4, StencilFunc> stencil_zfail_back; - }; - - union StencilRefMask { - u32 raw; - BitField<0, 8, u32> stencil_test_val; - BitField<8, 8, u32> stencil_mask; - BitField<16, 8, u32> stencil_write_mask; - BitField<24, 8, u32> stencil_op_val; - }; - - struct DepthBuffer { - enum class ZFormat : u32 { - Invalid = 0, - Z16 = 1, - Z32Float = 3, - }; - - enum class StencilFormat : u32 { - Invalid = 0, - Stencil8 = 1, - }; - - union ZInfo { - BitField<0, 2, ZFormat> format; - BitField<2, 2, u32> num_samples; - BitField<13, 3, u32> tile_split; - BitField<20, 3, TileMode> tile_mode_index; - BitField<23, 4, u32> decompress_on_n_zplanes; - BitField<27, 1, u32> allow_expclear; - BitField<28, 1, u32> read_size; - BitField<29, 1, u32> tile_surface_en; - BitField<30, 1, u32> clear_disallowed; - BitField<31, 1, u32> zrange_precision; - } z_info; - union { - BitField<0, 1, StencilFormat> format; - } stencil_info; - u32 z_read_base; - u32 stencil_read_base; - u32 z_write_base; - u32 stencil_write_base; - union { - BitField<0, 11, u32> pitch_tile_max; - BitField<11, 11, u32> height_tile_max; - } depth_size; - union { - BitField<0, 22, u32> tile_max; - } depth_slice; - - bool DepthValid() const { - return DepthAddress() != 0 && z_info.format != ZFormat::Invalid; - } - - bool StencilValid() const { - return StencilAddress() != 0 && stencil_info.format != StencilFormat::Invalid; - } - - bool DepthWriteValid() const { - return DepthWriteAddress() != 0 && z_info.format != ZFormat::Invalid; - } - - bool StencilWriteValid() const { - return StencilWriteAddress() != 0 && stencil_info.format != StencilFormat::Invalid; - } - - u32 Pitch() const { - return (depth_size.pitch_tile_max + 1) << 3; - } - - u32 Height() const { - return (depth_size.height_tile_max + 1) << 3; - } - - u64 DepthAddress() const { - return u64(z_read_base) << 8; - } - - u64 StencilAddress() const { - return u64(stencil_read_base) << 8; - } - - u64 DepthWriteAddress() const { - return u64(z_write_base) << 8; - } - - u64 StencilWriteAddress() const { - return u64(stencil_write_base) << 8; - } - - u32 NumSamples() const { - return 1u << z_info.num_samples; // spec doesn't say it is a log2 - } - - u32 NumBits() const { - return z_info.format == ZFormat::Z32Float ? 32 : 16; - } - - size_t GetDepthSliceSize() const { - ASSERT(z_info.format != ZFormat::Invalid); - const auto bpe = NumBits() >> 3; // in bytes - return (depth_slice.tile_max + 1) * 64 * bpe * NumSamples(); - } - - TileMode GetTileMode() const { - return z_info.tile_mode_index.Value(); - } - - bool IsTiled() const { - return GetTileMode() != TileMode::DisplayLinearAligned; - } - }; - - enum class ClipSpace : u32 { - MinusWToW = 0, - ZeroToW = 1, - }; - - enum class PrimKillCond : u32 { - AllVtx = 0, - AnyVtx = 1, - }; - - union ClipperControl { - u32 raw; - BitField<0, 6, u32> user_clip_plane_enable; - BitField<16, 1, u32> clip_disable; - BitField<19, 1, ClipSpace> clip_space; - BitField<21, 1, PrimKillCond> vtx_kill_or; - BitField<22, 1, u32> dx_rasterization_kill; - BitField<24, 1, u32> dx_linear_attr_clip_enable; - BitField<26, 1, u32> zclip_near_disable; - BitField<27, 1, u32> zclip_far_disable; - - bool ZclipEnable() const { - if (zclip_near_disable != zclip_far_disable) { - return false; - } - return !zclip_near_disable; - } - }; - - enum class PolygonMode : u32 { - Point = 0, - Line = 1, - Fill = 2, - }; - - enum class ProvokingVtxLast : u32 { - First = 0, - Last = 1, - }; - - enum class CullMode : u32 { - None = 0, - Front = 1, - Back = 2, - FrontAndBack = 3, - }; - - enum class FrontFace : u32 { - CounterClockwise = 0, - Clockwise = 1, - }; - - union PolygonControl { - u32 raw; - BitField<0, 1, u32> cull_front; - BitField<1, 1, u32> cull_back; - BitField<2, 1, FrontFace> front_face; - BitField<3, 2, u32> enable_polygon_mode; - BitField<5, 3, PolygonMode> polygon_mode_front; - BitField<8, 3, PolygonMode> polygon_mode_back; - BitField<11, 1, u32> enable_polygon_offset_front; - BitField<12, 1, u32> enable_polygon_offset_back; - BitField<13, 1, u32> enable_polygon_offset_para; - BitField<16, 1, u32> enable_window_offset; - BitField<19, 1, ProvokingVtxLast> provoking_vtx_last; - BitField<20, 1, u32> persp_corr_dis; - BitField<21, 1, u32> multi_prim_ib_ena; - - PolygonMode PolyMode() const { - return enable_polygon_mode ? polygon_mode_front.Value() : PolygonMode::Fill; - } - - CullMode CullingMode() const { - return static_cast(cull_front | cull_back << 1); - } - - bool NeedsBias() const { - return enable_polygon_offset_back || enable_polygon_offset_front || - enable_polygon_offset_para; - } - }; - - union VsOutputConfig { - u32 raw; - BitField<1, 5, u32> export_count_min_one; - BitField<6, 1, u32> half_pack; - - u32 NumExports() const { - return export_count_min_one.Value() + 1; - } - }; - - union ColorBufferMask { - enum ColorComponent : u32 { - ComponentR = (1u << 0), - ComponentG = (1u << 1), - ComponentB = (1u << 2), - ComponentA = (1u << 3), - }; - - u32 raw; - BitField<0, 4, u32> output0_mask; - BitField<4, 4, u32> output1_mask; - BitField<8, 4, u32> output2_mask; - BitField<12, 4, u32> output3_mask; - BitField<16, 4, u32> output4_mask; - BitField<20, 4, u32> output5_mask; - BitField<24, 4, u32> output6_mask; - BitField<28, 4, u32> output7_mask; - - u32 GetMask(int buf_id) const { - return (raw >> (buf_id * 4)) & 0xfu; - } - - void SetMask(int buf_id, u32 mask) { - raw &= ~(0xf << (buf_id * 4)); - raw |= (mask << (buf_id * 4)); - } - }; - - struct BorderColorBufferBase { - u32 base_addr_lo; - BitField<0, 8, u32> base_addr_hi; - - template - T Address() const { - return std::bit_cast(u64(base_addr_hi) << 40 | u64(base_addr_lo) << 8); - } - }; - - struct IndexBufferBase { - BitField<0, 8, u32> base_addr_hi; - u32 base_addr_lo; - - template - T Address() const { - return std::bit_cast((base_addr_lo & ~1U) | u64(base_addr_hi) << 32); - } - }; - - enum class IndexType : u32 { - Index16 = 0, - Index32 = 1, - }; - - enum class IndexSwapMode : u32 { - None = 0, - Swap16 = 1, - Swap32 = 2, - SwapWord = 3, - }; - - union IndexBufferType { - u32 raw; - BitField<0, 2, IndexType> index_type; - BitField<2, 2, IndexSwapMode> swap_mode; - }; - - union VgtNumInstances { - u32 num_instances; - - u32 NumInstances() const { - return num_instances == 0 ? 1 : num_instances; - } - }; - - struct Scissor { - struct { - s16 top_left_x; - s16 top_left_y; - }; - struct { - s16 bottom_right_x; - s16 bottom_right_y; - }; - - // From AMD spec: 'Negative numbers clamped to 0' - static s16 Clamp(s16 value) { - return std::max(s16(0), value); - } - - u32 GetWidth() const { - return static_cast(Clamp(bottom_right_x) - Clamp(top_left_x)); - } - - u32 GetHeight() const { - return static_cast(Clamp(bottom_right_y) - Clamp(top_left_y)); - } - }; - - struct WindowOffset { - s32 window_x_offset : 16; - s32 window_y_offset : 16; - }; - - struct ViewportScissor { - union { - BitField<0, 15, s32> top_left_x; - BitField<16, 15, s32> top_left_y; - BitField<31, 1, s32> window_offset_disable; - }; - struct { - s16 bottom_right_x; - s16 bottom_right_y; - }; - - u32 GetWidth() const { - return bottom_right_x - top_left_x; - } - - u32 GetHeight() const { - return bottom_right_y - top_left_y; - } - }; - - struct ViewportDepth { - float zmin; - float zmax; - }; - - struct ViewportBounds { - float xscale; - float xoffset; - float yscale; - float yoffset; - float zscale; - float zoffset; - }; - - union ViewportControl { - BitField<0, 1, u32> xscale_enable; - BitField<1, 1, u32> xoffset_enable; - BitField<2, 1, u32> yscale_enable; - BitField<3, 1, u32> yoffset_enable; - BitField<4, 1, u32> zscale_enable; - BitField<5, 1, u32> zoffset_enable; - BitField<8, 1, u32> xy_transformed; - BitField<9, 1, u32> z_transformed; - BitField<10, 1, u32> w_transformed; - BitField<11, 1, u32> perfcounter_ref; - }; - - struct ClipUserData { - u32 data_x; - u32 data_y; - u32 data_z; - u32 data_w; - }; - - using BlendConstants = std::array; - - union BlendControl { - enum class BlendFactor : u32 { - Zero = 0, - One = 1, - SrcColor = 2, - OneMinusSrcColor = 3, - SrcAlpha = 4, - OneMinusSrcAlpha = 5, - DstAlpha = 6, - OneMinusDstAlpha = 7, - DstColor = 8, - OneMinusDstColor = 9, - SrcAlphaSaturate = 10, - ConstantColor = 13, - OneMinusConstantColor = 14, - Src1Color = 15, - InvSrc1Color = 16, - Src1Alpha = 17, - InvSrc1Alpha = 18, - ConstantAlpha = 19, - OneMinusConstantAlpha = 20, - }; - - enum class BlendFunc : u32 { - Add = 0, - Subtract = 1, - Min = 2, - Max = 3, - ReverseSubtract = 4, - }; - - u32 raw; - BitField<0, 5, BlendFactor> color_src_factor; - BitField<5, 3, BlendFunc> color_func; - BitField<8, 5, BlendFactor> color_dst_factor; - BitField<16, 5, BlendFactor> alpha_src_factor; - BitField<21, 3, BlendFunc> alpha_func; - BitField<24, 5, BlendFactor> alpha_dst_factor; - BitField<29, 1, u32> separate_alpha_blend; - BitField<30, 1, u32> enable; - BitField<31, 1, u32> disable_rop3; - - bool operator==(const BlendControl& other) const { - return raw == other.raw; - } - }; - - union ColorControl { - enum class OperationMode : u32 { - Disable = 0u, - Normal = 1u, - EliminateFastClear = 2u, - Resolve = 3u, - Err = 4u, - FmaskDecompress = 5u, - }; - enum class LogicOp : u32 { - Clear = 0x00, - Nor = 0x11, - AndInverted = 0x22, - CopyInverted = 0x33, - AndReverse = 0x44, - Invert = 0x55, - Xor = 0x66, - Nand = 0x77, - And = 0x88, - Equiv = 0x99, - Noop = 0xAA, - OrInverted = 0xBB, - Copy = 0xCC, - OrReverse = 0xDD, - Or = 0xEE, - Set = 0xFF, - }; - - BitField<0, 1, u32> disable_dual_quad; - BitField<3, 1, u32> degamma_enable; - BitField<4, 3, OperationMode> mode; - BitField<16, 8, LogicOp> rop3; - }; - - struct ColorBuffer { - enum class EndianSwap : u32 { - None = 0, - Swap8In16 = 1, - Swap8In32 = 2, - Swap8In64 = 3, - }; - - enum class SwapMode : u32 { - Standard = 0, - Alternate = 1, - StandardReverse = 2, - AlternateReverse = 3, - }; - - enum class RoundMode : u32 { - ByHalf = 0, - Truncate = 1, - }; - - u32 base_address; - union { - BitField<0, 11, u32> tile_max; - BitField<20, 11, u32> fmask_tile_max; - } pitch; - union { - BitField<0, 22, u32> tile_max; - } slice; - union { - BitField<0, 11, u32> slice_start; - BitField<13, 11, u32> slice_max; - } view; - union Color0Info { - BitField<0, 2, EndianSwap> endian; - BitField<2, 5, DataFormat> format; - BitField<7, 1, u32> linear_general; - BitField<8, 3, NumberFormat> number_type; - BitField<11, 2, SwapMode> comp_swap; - BitField<13, 1, u32> fast_clear; - BitField<14, 1, u32> compression; - BitField<15, 1, u32> blend_clamp; - BitField<16, 1, u32> blend_bypass; - BitField<17, 1, u32> simple_float; - BitField<18, 1, RoundMode> round_mode; - BitField<19, 1, u32> cmask_is_linear; - BitField<20, 3, u32> blend_opt_dont_rd_dst; - BitField<23, 3, u32> blend_opt_discard_pixel; - BitField<26, 1, u32> fmask_compression_disable_ci; - BitField<27, 1, u32> fmask_compress_1frag_only; - BitField<28, 1, u32> dcc_enable; - BitField<29, 2, u32> cmask_addr_type; - /// Neo-mode only - BitField<31, 1, u32> alt_tile_mode; - - u32 u32all; - } info; - union Color0Attrib { - BitField<0, 5, TileMode> tile_mode_index; - BitField<5, 5, u32> fmask_tile_mode_index; - BitField<10, 2, u32> fmask_bank_height; - BitField<12, 3, u32> num_samples_log2; - BitField<15, 2, u32> num_fragments_log2; - BitField<17, 1, u32> force_dst_alpha_1; - - u32 u32all; - } attrib; - INSERT_PADDING_WORDS(1); - u32 cmask_base_address; - union { - BitField<0, 14, u32> tile_max; - } cmask_slice; - u32 fmask_base_address; - union { - BitField<0, 14, u32> tile_max; - } fmask_slice; - u32 clear_word0; - u32 clear_word1; - INSERT_PADDING_WORDS(2); - - operator bool() const { - return base_address && info.format != DataFormat::FormatInvalid; - } - - u32 Pitch() const { - return (pitch.tile_max + 1) << 3; - } - - u32 Height() const { - return (slice.tile_max + 1) * 64 / Pitch(); - } - - u64 Address() const { - return u64(base_address) << 8; - } - - VAddr CmaskAddress() const { - return VAddr(cmask_base_address) << 8; - } - - VAddr FmaskAddress() const { - return VAddr(fmask_base_address) << 8; - } - - u32 NumSamples() const { - return 1 << attrib.num_fragments_log2; - } - - u32 NumSlices() const { - return view.slice_max + 1; - } - - size_t GetColorSliceSize() const { - const auto num_bytes_per_element = NumBitsPerBlock(info.format) / 8u; - const auto slice_size = - num_bytes_per_element * (slice.tile_max + 1) * 64u * NumSamples(); - return slice_size; - } - - TileMode GetTileMode() const { - return info.linear_general ? TileMode::DisplayLinearAligned - : attrib.tile_mode_index.Value(); - } - - bool IsTiled() const { - return GetTileMode() != TileMode::DisplayLinearAligned; - } - - [[nodiscard]] DataFormat GetDataFmt() const { - return RemapDataFormat(info.format); - } - - [[nodiscard]] NumberFormat GetNumberFmt() const { - return RemapNumberFormat(GetFixedNumberFormat(), info.format); - } - - [[nodiscard]] NumberConversion GetNumberConversion() const { - return MapNumberConversion(GetFixedNumberFormat(), info.format); - } - - [[nodiscard]] CompMapping Swizzle() const { - // clang-format off - static constexpr std::array, 4> mrt_swizzles{{ - // Standard - std::array{{ - {.r = CompSwizzle::Red, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, - {.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, - {.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Blue, .a = CompSwizzle::Zero}, - {.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Blue, .a = CompSwizzle::Alpha}, - }}, - // Alternate - std::array{{ - {.r = CompSwizzle::Green, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, - {.r = CompSwizzle::Red, .g = CompSwizzle::Alpha, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, - {.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Alpha, .a = CompSwizzle::Zero}, - {.r = CompSwizzle::Blue, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Alpha}, - }}, - // StandardReverse - std::array{{ - {.r = CompSwizzle::Blue, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, - {.r = CompSwizzle::Green, .g = CompSwizzle::Red, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, - {.r = CompSwizzle::Blue, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Zero}, - {.r = CompSwizzle::Alpha, .g = CompSwizzle::Blue, .b = CompSwizzle::Green, .a = CompSwizzle::Red}, - }}, - // AlternateReverse - std::array{{ - {.r = CompSwizzle::Alpha, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, - {.r = CompSwizzle::Alpha, .g = CompSwizzle::Red, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, - {.r = CompSwizzle::Alpha, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Zero}, - {.r = CompSwizzle::Alpha, .g = CompSwizzle::Red, .b = CompSwizzle::Green, .a = CompSwizzle::Blue}, - }}, - }}; - // clang-format on - const auto swap_idx = static_cast(info.comp_swap.Value()); - const auto components_idx = NumComponents(info.format) - 1; - const auto mrt_swizzle = mrt_swizzles[swap_idx][components_idx]; - return RemapSwizzle(info.format, mrt_swizzle); - } - - [[nodiscard]] NumberFormat GetFixedNumberFormat() const { - // There is a small difference between T# and CB number types, account for it. - return info.number_type == NumberFormat::SnormNz ? NumberFormat::Srgb - : info.number_type.Value(); - } - }; - enum ContextRegs : u32 { DbZInfo = 0xA010, CbColor0Base = 0xA318, @@ -1055,462 +59,12 @@ struct Liverpool { CbColor7Cmask = 0xA388, }; - struct PolygonOffset { - float depth_bias; - float front_scale; - float front_offset; - float back_scale; - float back_offset; - }; - - struct Address { - u32 address; - - VAddr GetAddress() const { - return u64(address) << 8; - } - }; - - union DepthRenderControl { - u32 raw; - BitField<0, 1, u32> depth_clear_enable; - BitField<1, 1, u32> stencil_clear_enable; - BitField<2, 1, u32> depth_copy; - BitField<3, 1, u32> stencil_copy; - BitField<4, 1, u32> resummarize_enable; - BitField<5, 1, u32> stencil_compress_disable; - BitField<6, 1, u32> depth_compress_disable; - BitField<7, 1, u32> copy_centroid; - BitField<8, 1, u32> copy_sample; - BitField<9, 1, u32> decompress_enable; - }; - - union DepthView { - BitField<0, 11, u32> slice_start; - BitField<13, 11, u32> slice_max; - BitField<24, 1, u32> z_read_only; - BitField<25, 1, u32> stencil_read_only; - - u32 NumSlices() const { - return slice_max + 1u; - } - }; - - enum class ForceEnable : u32 { - Off = 0, - Enable = 1, - Disable = 2, - }; - - enum class ForceSumm : u32 { - Off = 0, - MinZ = 1, - MaxZ = 2, - Both = 3, - }; - - union DepthRenderOverride { - u32 raw; - BitField<0, 2, ForceEnable> force_hiz_enable; - BitField<2, 2, ForceEnable> force_his_enable0; - BitField<4, 2, ForceEnable> force_his_enable1; - BitField<6, 1, u32> force_shader_z_order; - BitField<7, 1, u32> fast_z_disable; - BitField<8, 1, u32> fast_stencil_disable; - BitField<9, 1, u32> noop_cull_disable; - BitField<10, 1, u32> force_color_kill; - BitField<11, 1, u32> force_z_read; - BitField<12, 1, u32> force_stencil_read; - BitField<13, 2, ForceEnable> force_full_z_range; - BitField<15, 1, u32> force_qc_smask_conflict; - BitField<16, 1, u32> disable_viewport_clamp; - BitField<17, 1, u32> ignore_sc_zrange; - BitField<18, 1, u32> disable_fully_covered; - BitField<19, 2, ForceSumm> force_z_limit_summ; - BitField<21, 5, u32> max_tiles_in_dtt; - BitField<26, 1, u32> disable_tile_rate_tiles; - BitField<27, 1, u32> force_z_dirty; - BitField<28, 1, u32> force_stencil_dirty; - BitField<29, 1, u32> force_z_valid; - BitField<30, 1, u32> force_stencil_valid; - BitField<31, 1, u32> preserve_compression; - }; - - union AaConfig { - BitField<0, 3, u32> msaa_num_samples; - BitField<4, 1, u32> aa_mask_centroid_dtmn; - BitField<13, 4, u32> max_sample_dst; - BitField<20, 3, u32> msaa_exposed_samples; - BitField<24, 2, u32> detail_to_exposed_mode; - - u32 NumSamples() const { - return 1 << msaa_num_samples; - } - }; - - union ShaderStageEnable { - enum VgtStages : u32 { - Vs = 0u, // always enabled - EsGs = 0xB0u, - LsHs = 0x45u, - }; - - VgtStages raw; - BitField<0, 2, u32> ls_en; - BitField<2, 1, u32> hs_en; - BitField<3, 2, u32> es_en; - BitField<5, 1, u32> gs_en; - BitField<6, 2, u32> vs_en; - BitField<8, 1, u32> dynamic_hs; - - bool IsStageEnabled(u32 stage) const { - switch (stage) { - case 0: - case 1: - return true; - case 2: - return gs_en.Value(); - case 3: - return es_en.Value(); - case 4: - return hs_en.Value(); - case 5: - return ls_en.Value(); - default: - UNREACHABLE(); - } - } - }; - - union GsInstances { - u32 raw; - struct { - u32 enable : 2; - u32 count : 6; - }; - - bool IsEnabled() const { - return enable && count > 0; - } - }; - - union GsOutPrimitiveType { - u32 raw; - struct { - GsOutputPrimitiveType outprim_type : 6; - GsOutputPrimitiveType outprim_type1 : 6; - GsOutputPrimitiveType outprim_type2 : 6; - GsOutputPrimitiveType outprim_type3 : 6; - u32 reserved : 3; - u32 unique_type_per_stream : 1; - }; - - GsOutputPrimitiveType GetPrimitiveType(u32 stream) const { - if (unique_type_per_stream == 0) { - return outprim_type; - } - - switch (stream) { - case 0: - return outprim_type; - case 1: - return outprim_type1; - case 2: - return outprim_type2; - case 3: - return outprim_type3; - default: - UNREACHABLE(); - } - } - }; - - union GsMode { - enum class Mode : u32 { - Off = 0, - ScenarioA = 1, - ScenarioB = 2, - ScenarioG = 3, - ScenarioC = 4, - }; - - u32 raw; - BitField<0, 3, Mode> mode; - BitField<3, 2, u32> cut_mode; - BitField<22, 2, u32> onchip; - }; - - union StreamOutControl { - u32 raw; - struct { - u32 offset_update_done : 1; - u32 : 31; - }; - }; - - union StreamOutConfig { - u32 raw; - struct { - u32 streamout_0_en : 1; - u32 streamout_1_en : 1; - u32 streamout_2_en : 1; - u32 streamout_3_en : 1; - u32 rast_stream : 3; - u32 : 1; - u32 rast_stream_mask : 4; - u32 : 19; - u32 use_rast_stream_mask : 1; - }; - }; - - union StreamOutBufferConfig { - u32 raw; - struct { - u32 stream_0_buf_en : 4; - u32 stream_1_buf_en : 4; - u32 stream_2_buf_en : 4; - u32 stream_3_buf_en : 4; - }; - }; - - union LsHsConfig { - u32 raw; - BitField<0, 8, u32> num_patches; - BitField<8, 6, u32> hs_input_control_points; - BitField<14, 6, u32> hs_output_control_points; - }; - - union TessellationConfig { - u32 raw; - BitField<0, 2, TessellationType> type; - BitField<2, 3, TessellationPartitioning> partitioning; - BitField<5, 3, TessellationTopology> topology; - }; - - union TessFactorMemoryBase { - u32 base; - - u64 MemoryBase() const { - return static_cast(base) << 8; - } - }; - - union Eqaa { - u32 raw; - BitField<0, 1, u32> max_anchor_samples; - BitField<4, 3, u32> ps_iter_samples; - BitField<8, 3, u32> mask_export_num_samples; - BitField<12, 3, u32> alpha_to_mask_num_samples; - BitField<16, 1, u32> high_quality_intersections; - BitField<17, 1, u32> incoherent_eqaa_reads; - BitField<18, 1, u32> interpolate_comp_z; - BitField<19, 1, u32> interpolate_src_z; - BitField<20, 1, u32> static_anchor_associations; - BitField<21, 1, u32> alpha_to_mask_eqaa_disable; - BitField<24, 3, u32> overrasterization_amount; - BitField<27, 1, u32> enable_postz_overrasterization; - }; - - union PsInput { - u32 raw; - struct { - u32 persp_sample_ena : 1; - u32 persp_center_ena : 1; - u32 persp_centroid_ena : 1; - u32 persp_pull_model_ena : 1; - u32 linear_sample_ena : 1; - u32 linear_center_ena : 1; - u32 linear_centroid_ena : 1; - u32 line_stipple_tex_ena : 1; - u32 pos_x_float_ena : 1; - u32 pos_y_float_ena : 1; - u32 pos_z_float_ena : 1; - u32 pos_w_float_ena : 1; - u32 front_face_ena : 1; - u32 ancillary_ena : 1; - u32 sample_coverage_ena : 1; - u32 pos_fixed_pt_ena : 1; - }; - }; - - union Regs { - struct { - INSERT_PADDING_WORDS(0x2C08); - ShaderProgram ps_program; - INSERT_PADDING_WORDS(0x2C); - ShaderProgram vs_program; - INSERT_PADDING_WORDS(0x2C); - ShaderProgram gs_program; - INSERT_PADDING_WORDS(0x2C); - ShaderProgram es_program; - INSERT_PADDING_WORDS(0x2C); - ShaderProgram hs_program; - INSERT_PADDING_WORDS(0x2D48 - 0x2d08 - 20); - ShaderProgram ls_program; - INSERT_PADDING_WORDS(0xA4); - ComputeProgram cs_program; // shadowed by `cs_state` in `mapped_queues` - INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80 - 3 - 5); - DepthRenderControl depth_render_control; - INSERT_PADDING_WORDS(1); - DepthView depth_view; - DepthRenderOverride depth_render_override; - INSERT_PADDING_WORDS(1); - Address depth_htile_data_base; - INSERT_PADDING_WORDS(2); - float depth_bounds_min; - float depth_bounds_max; - u32 stencil_clear; - float depth_clear; - Scissor screen_scissor; - INSERT_PADDING_WORDS(0xA010 - 0xA00C - 2); - DepthBuffer depth_buffer; - INSERT_PADDING_WORDS(8); - BorderColorBufferBase ta_bc_base; - INSERT_PADDING_WORDS(0xA080 - 0xA020 - 2); - WindowOffset window_offset; - ViewportScissor window_scissor; - INSERT_PADDING_WORDS(0xA08E - 0xA081 - 2); - ColorBufferMask color_target_mask; - ColorBufferMask color_shader_mask; - ViewportScissor generic_scissor; - INSERT_PADDING_WORDS(2); - std::array viewport_scissors; - std::array viewport_depths; - INSERT_PADDING_WORDS(0xA102 - 0xA0D4); - u32 index_offset; - u32 primitive_restart_index; - INSERT_PADDING_WORDS(1); - BlendConstants blend_constants; - INSERT_PADDING_WORDS(0xA10B - 0xA105 - 4); - StencilControl stencil_control; - StencilRefMask stencil_ref_front; - StencilRefMask stencil_ref_back; - INSERT_PADDING_WORDS(1); - std::array viewports; - std::array clip_user_data; - INSERT_PADDING_WORDS(0xA191 - 0xA187); - std::array ps_inputs; - VsOutputConfig vs_output_config; - INSERT_PADDING_WORDS(1); - PsInput ps_input_ena; - PsInput ps_input_addr; - INSERT_PADDING_WORDS(1); - BitField<0, 6, u32> num_interp; - INSERT_PADDING_WORDS(0xA1C3 - 0xA1B6 - 1); - ShaderPosFormat shader_pos_format; - ShaderExportFormat z_export_format; - ColorExportFormat color_export_format; - INSERT_PADDING_WORDS(0xA1E0 - 0xA1C3 - 3); - std::array blend_control; - INSERT_PADDING_WORDS(0xA1F9 - 0xA1E0 - 8); - IndexBufferBase index_base_address; - INSERT_PADDING_WORDS(1); - u32 draw_initiator; - INSERT_PADDING_WORDS(0xA200 - 0xA1F9 - 4); - DepthControl depth_control; - INSERT_PADDING_WORDS(1); - ColorControl color_control; - DepthShaderControl depth_shader_control; - ClipperControl clipper_control; - PolygonControl polygon_control; - ViewportControl viewport_control; - VsOutputControl vs_output_control; - INSERT_PADDING_WORDS(0xA287 - 0xA207 - 6); - LineControl line_control; - INSERT_PADDING_WORDS(4); - HsTessFactorClamp hs_clamp; - INSERT_PADDING_WORDS(0xA290 - 0xA287 - 2); - GsMode vgt_gs_mode; - INSERT_PADDING_WORDS(1); - ModeControl mode_control; - INSERT_PADDING_WORDS(8); - GsOutPrimitiveType vgt_gs_out_prim_type; - INSERT_PADDING_WORDS(1); - u32 index_size; - u32 max_index_size; - IndexBufferType index_buffer_type; - INSERT_PADDING_WORDS(0xA2A1 - 0xA29E - 2); - u32 enable_primitive_id; - INSERT_PADDING_WORDS(3); - u32 enable_primitive_restart; - INSERT_PADDING_WORDS(0xA2A8 - 0xA2A5 - 1); - u32 vgt_instance_step_rate_0; - u32 vgt_instance_step_rate_1; - INSERT_PADDING_WORDS(0xA2AB - 0xA2A9 - 1); - u32 vgt_esgs_ring_itemsize; - u32 vgt_gsvs_ring_itemsize; - INSERT_PADDING_WORDS(0xA2CE - 0xA2AC - 1); - BitField<0, 11, u32> vgt_gs_max_vert_out; - INSERT_PADDING_WORDS(0xA2D5 - 0xA2CE - 1); - ShaderStageEnable stage_enable; - LsHsConfig ls_hs_config; - u32 vgt_gs_vert_itemsize[4]; - TessellationConfig tess_config; - INSERT_PADDING_WORDS(3); - PolygonOffset poly_offset; - GsInstances vgt_gs_instance_cnt; - StreamOutConfig vgt_strmout_config; - StreamOutBufferConfig vgt_strmout_buffer_config; - INSERT_PADDING_WORDS(0xA2F8 - 0xA2E6 - 1); - AaConfig aa_config; - INSERT_PADDING_WORDS(0xA318 - 0xA2F8 - 1); - ColorBuffer color_buffers[NumColorBuffers]; - INSERT_PADDING_WORDS(0xC03F - 0xA390); - StreamOutControl cp_strmout_cntl; - INSERT_PADDING_WORDS(0xC242 - 0xC040); - PrimitiveType primitive_type; - INSERT_PADDING_WORDS(0xC24C - 0xC243); - u32 num_indices; - VgtNumInstances num_instances; - INSERT_PADDING_WORDS(0xC250 - 0xC24D - 1); - TessFactorMemoryBase vgt_tf_memory_base; - }; - std::array reg_array{}; - - const ShaderProgram* ProgramForStage(u32 index) const { - switch (index) { - case 0: - return &ps_program; - case 1: - return &vs_program; - case 2: - return &gs_program; - case 3: - return &es_program; - case 4: - return &hs_program; - case 5: - return &ls_program; - } - return nullptr; - } - - bool IsClipDisabled() const { - return clipper_control.clip_disable || primitive_type == PrimitiveType::RectList; - } - - void SetDefaults(); - }; - Regs regs{}; - - // See for a comment in context reg parsing code - union CbDbExtent { - struct { - u16 width; - u16 height; - }; - u32 raw{0u}; - - [[nodiscard]] bool Valid() const { - return raw != 0; - } - }; - std::array last_cb_extent{}; + std::array last_cb_extent{}; CbDbExtent last_db_extent{}; public: - Liverpool(); + explicit Liverpool(); ~Liverpool(); void SubmitGfx(std::span dcb, std::span ccb); @@ -1568,8 +122,7 @@ public: void ReserveCopyBufferSpace() { GpuQueue& gfx_queue = mapped_queues[GfxQueueId]; - std::scoped_lock lk(gfx_queue.m_access); - + std::scoped_lock lk(gfx_queue.m_access); constexpr size_t GfxReservedSize = 2_MB >> 2; gfx_queue.ccb_buffer.reserve(GfxReservedSize); gfx_queue.dcb_buffer.reserve(GfxReservedSize); @@ -1623,8 +176,8 @@ private: Handle handle; }; - std::pair, std::span> CopyCmdBuffers(std::span dcb, - std::span ccb); + using CmdBuffer = std::pair, std::span>; + CmdBuffer CopyCmdBuffers(std::span dcb, std::span ccb); Task ProcessGraphics(std::span dcb, std::span ccb); Task ProcessCeUpdate(std::span ccb); template @@ -1677,85 +230,7 @@ private: std::condition_variable_any submit_cv; std::queue> command_queue{}; std::thread::id gpu_id; - int curr_qid{-1}; + s32 curr_qid{-1}; }; -static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08); -static_assert(GFX6_3D_REG_INDEX(vs_program) == 0x2C48); -static_assert(GFX6_3D_REG_INDEX(vs_program.user_data) == 0x2C4C); -static_assert(GFX6_3D_REG_INDEX(gs_program) == 0x2C88); -static_assert(GFX6_3D_REG_INDEX(es_program) == 0x2CC8); -static_assert(GFX6_3D_REG_INDEX(hs_program) == 0x2D08); -static_assert(GFX6_3D_REG_INDEX(ls_program) == 0x2D48); -static_assert(GFX6_3D_REG_INDEX(cs_program) == 0x2E00); -static_assert(GFX6_3D_REG_INDEX(cs_program.dim_z) == 0x2E03); -static_assert(GFX6_3D_REG_INDEX(cs_program.address_lo) == 0x2E0C); -static_assert(GFX6_3D_REG_INDEX(cs_program.user_data) == 0x2E40); -static_assert(GFX6_3D_REG_INDEX(depth_render_control) == 0xA000); -static_assert(GFX6_3D_REG_INDEX(depth_view) == 0xA002); -static_assert(GFX6_3D_REG_INDEX(depth_htile_data_base) == 0xA005); -static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C); -static_assert(GFX6_3D_REG_INDEX(depth_buffer.z_info) == 0xA010); -static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017); -static_assert(GFX6_3D_REG_INDEX(ta_bc_base) == 0xA020); -static_assert(GFX6_3D_REG_INDEX(window_offset) == 0xA080); -static_assert(GFX6_3D_REG_INDEX(window_scissor) == 0xA081); -static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E); -static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F); -static_assert(GFX6_3D_REG_INDEX(generic_scissor) == 0xA090); -static_assert(GFX6_3D_REG_INDEX(viewport_scissors) == 0xA094); -static_assert(GFX6_3D_REG_INDEX(index_offset) == 0xA102); -static_assert(GFX6_3D_REG_INDEX(primitive_restart_index) == 0xA103); -static_assert(GFX6_3D_REG_INDEX(stencil_control) == 0xA10B); -static_assert(GFX6_3D_REG_INDEX(viewports) == 0xA10F); -static_assert(GFX6_3D_REG_INDEX(clip_user_data) == 0xA16F); -static_assert(GFX6_3D_REG_INDEX(ps_inputs) == 0xA191); -static_assert(GFX6_3D_REG_INDEX(vs_output_config) == 0xA1B1); -static_assert(GFX6_3D_REG_INDEX(ps_input_ena) == 0xA1B3); -static_assert(GFX6_3D_REG_INDEX(ps_input_addr) == 0xA1B4); -static_assert(GFX6_3D_REG_INDEX(num_interp) == 0xA1B6); -static_assert(GFX6_3D_REG_INDEX(shader_pos_format) == 0xA1C3); -static_assert(GFX6_3D_REG_INDEX(z_export_format) == 0xA1C4); -static_assert(GFX6_3D_REG_INDEX(color_export_format) == 0xA1C5); -static_assert(GFX6_3D_REG_INDEX(blend_control) == 0xA1E0); -static_assert(GFX6_3D_REG_INDEX(index_base_address) == 0xA1F9); -static_assert(GFX6_3D_REG_INDEX(draw_initiator) == 0xA1FC); -static_assert(GFX6_3D_REG_INDEX(depth_control) == 0xA200); -static_assert(GFX6_3D_REG_INDEX(color_control) == 0xA202); -static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204); -static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206); -static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207); -static_assert(GFX6_3D_REG_INDEX(line_control) == 0xA282); -static_assert(GFX6_3D_REG_INDEX(hs_clamp) == 0xA287); -static_assert(GFX6_3D_REG_INDEX(vgt_gs_mode) == 0xA290); -static_assert(GFX6_3D_REG_INDEX(mode_control) == 0xA292); -static_assert(GFX6_3D_REG_INDEX(vgt_gs_out_prim_type) == 0xA29B); -static_assert(GFX6_3D_REG_INDEX(index_size) == 0xA29D); -static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F); -static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1); -static_assert(GFX6_3D_REG_INDEX(enable_primitive_restart) == 0xA2A5); -static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_0) == 0xA2A8); -static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_1) == 0xA2A9); -static_assert(GFX6_3D_REG_INDEX(vgt_esgs_ring_itemsize) == 0xA2AB); -static_assert(GFX6_3D_REG_INDEX(vgt_gsvs_ring_itemsize) == 0xA2AC); -static_assert(GFX6_3D_REG_INDEX(vgt_gs_max_vert_out) == 0xA2CE); -static_assert(GFX6_3D_REG_INDEX(stage_enable) == 0xA2D5); -static_assert(GFX6_3D_REG_INDEX(vgt_gs_vert_itemsize[0]) == 0xA2D7); -static_assert(GFX6_3D_REG_INDEX(tess_config) == 0xA2DB); -static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF); -static_assert(GFX6_3D_REG_INDEX(vgt_gs_instance_cnt) == 0xA2E4); -static_assert(GFX6_3D_REG_INDEX(vgt_strmout_config) == 0xA2E5); -static_assert(GFX6_3D_REG_INDEX(vgt_strmout_buffer_config) == 0xA2E6); -static_assert(GFX6_3D_REG_INDEX(aa_config) == 0xA2F8); -static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318); -static_assert(GFX6_3D_REG_INDEX(color_buffers[0].pitch) == 0xA319); -static_assert(GFX6_3D_REG_INDEX(color_buffers[0].slice) == 0xA31A); -static_assert(GFX6_3D_REG_INDEX(color_buffers[7].base_address) == 0xA381); -static_assert(GFX6_3D_REG_INDEX(cp_strmout_cntl) == 0xC03F); -static_assert(GFX6_3D_REG_INDEX(primitive_type) == 0xC242); -static_assert(GFX6_3D_REG_INDEX(num_instances) == 0xC24D); -static_assert(GFX6_3D_REG_INDEX(vgt_tf_memory_base) == 0xc250); - -#undef GFX6_3D_REG_INDEX - } // namespace AmdGpu diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index d6bb9748c..eb48f3568 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -4,26 +4,24 @@ #pragma once #include +#include "common/assert.h" #include "common/bit_field.h" #include "common/types.h" #include "common/uint128.h" #include "core/libraries/gnmdriver/gnmdriver.h" #include "core/libraries/kernel/time.h" -#include "core/platform.h" #include "video_core/amdgpu/pm4_opcodes.h" namespace AmdGpu { -/// This enum defines the Shader types supported in PM4 type 3 header enum class PM4ShaderType : u32 { - ShaderGraphics = 0, ///< Graphics shader - ShaderCompute = 1 ///< Compute shader + ShaderGraphics = 0, + ShaderCompute = 1, }; -/// This enum defines the predicate value supported in PM4 type 3 header enum class PM4Predicate : u32 { - PredDisable = 0, ///< Predicate disabled - PredEnable = 1 ///< Predicate enabled + PredDisable = 0, + PredEnable = 1, }; union PM4Type0Header { @@ -466,7 +464,7 @@ struct PM4CmdEventWriteEop { return data_lo | u64(data_hi) << 32; } - void SignalFence(auto&& write_mem) const { + void SignalFence(auto&& write_mem, auto&& signal_irq) const { u32* address = Address(); switch (data_sel.Value()) { case DataSelect::None: { @@ -502,7 +500,7 @@ struct PM4CmdEventWriteEop { ASSERT(data_sel == DataSelect::None); [[fallthrough]]; case InterruptSelect::IrqWhenWriteConfirm: { - Platform::IrqC::Instance()->Signal(Platform::InterruptId::GfxEop); + signal_irq(); break; } default: { @@ -682,7 +680,7 @@ struct PM4CmdWaitRegMem { return reg.Value(); } - bool Test(const std::array& regs) const { + bool Test(std::span regs) const { u32 value = mem_space.Value() == MemSpace::Memory ? *Address() : regs[Reg()]; switch (function.Value()) { case Function::Always: { @@ -934,7 +932,7 @@ struct PM4CmdReleaseMem { return data_lo | u64(data_hi) << 32; } - void SignalFence(Platform::InterruptId irq_id) const { + void SignalFence(auto&& signal_irq) const { switch (data_sel.Value()) { case DataSelect::Data32Low: { *Address() = DataDWord(); @@ -965,7 +963,7 @@ struct PM4CmdReleaseMem { case InterruptSelect::IrqUndocumented: [[fallthrough]]; case InterruptSelect::IrqWhenWriteConfirm: { - Platform::IrqC::Instance()->Signal(irq_id); + signal_irq(); break; } default: { diff --git a/src/video_core/amdgpu/regs.cpp b/src/video_core/amdgpu/regs.cpp new file mode 100644 index 000000000..17fd80312 --- /dev/null +++ b/src/video_core/amdgpu/regs.cpp @@ -0,0 +1,128 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "video_core/amdgpu/regs.h" + +namespace AmdGpu { + +// The following values are taken from fpPS4: +// https://github.com/red-prig/fpPS4/blob/436b43064be4c78229500f3d3c054fc76639247d/chip/pm4_pfp.pas#L410 +static constexpr std::array REG_ARRAY_DEFAULT = { + 0x00000000u, 0x80000000u, 0x40004000u, 0xdeadbeefu, 0x00000000u, 0x40004000u, 0x00000000u, + 0x40004000u, 0x00000000u, 0x40004000u, 0x00000000u, 0x40004000u, 0xaa99aaaau, 0x00000000u, + 0xdeadbeefu, 0xdeadbeefu, 0x80000000u, 0x40004000u, 0x00000000u, 0x00000000u, 0x80000000u, + 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, + 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, + 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, + 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, 0x40004000u, 0x80000000u, + 0x40004000u, 0x80000000u, 0x40004000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, + 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, + 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, + 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, + 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, 0x00000000u, 0x3f800000u, + 0x2a00161au, +}; + +void Regs::SetDefaults() { + std::memset(reg_array.data(), 0, reg_array.size() * sizeof(u32)); + + std::memcpy(®_array[ContextRegWordOffset + 0x80], REG_ARRAY_DEFAULT.data(), + REG_ARRAY_DEFAULT.size() * sizeof(u32)); + + // Individual context regs values + reg_array[ContextRegWordOffset + 0x000d] = 0x40004000u; + reg_array[ContextRegWordOffset + 0x01b6] = 0x00000002u; + reg_array[ContextRegWordOffset + 0x0204] = 0x00090000u; + reg_array[ContextRegWordOffset + 0x0205] = 0x00000004u; + reg_array[ContextRegWordOffset + 0x0295] = 0x00000100u; + reg_array[ContextRegWordOffset + 0x0296] = 0x00000080u; + reg_array[ContextRegWordOffset + 0x0297] = 0x00000002u; + reg_array[ContextRegWordOffset + 0x02aa] = 0x00001000u; + reg_array[ContextRegWordOffset + 0x02f7] = 0x00001000u; + reg_array[ContextRegWordOffset + 0x02f9] = 0x00000005u; + reg_array[ContextRegWordOffset + 0x02fa] = 0x3f800000u; + reg_array[ContextRegWordOffset + 0x02fb] = 0x3f800000u; + reg_array[ContextRegWordOffset + 0x02fc] = 0x3f800000u; + reg_array[ContextRegWordOffset + 0x02fd] = 0x3f800000u; + reg_array[ContextRegWordOffset + 0x0316] = 0x0000000eu; + reg_array[ContextRegWordOffset + 0x0317] = 0x00000010u; +} + +#define GFX6_3D_REG_INDEX(field_name) (offsetof(AmdGpu::Regs, field_name) / sizeof(u32)) + +static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08); +static_assert(GFX6_3D_REG_INDEX(vs_program) == 0x2C48); +static_assert(GFX6_3D_REG_INDEX(vs_program.user_data) == 0x2C4C); +static_assert(GFX6_3D_REG_INDEX(gs_program) == 0x2C88); +static_assert(GFX6_3D_REG_INDEX(es_program) == 0x2CC8); +static_assert(GFX6_3D_REG_INDEX(hs_program) == 0x2D08); +static_assert(GFX6_3D_REG_INDEX(ls_program) == 0x2D48); +static_assert(GFX6_3D_REG_INDEX(cs_program) == 0x2E00); +static_assert(GFX6_3D_REG_INDEX(cs_program.dim_z) == 0x2E03); +static_assert(GFX6_3D_REG_INDEX(cs_program.user_data) == 0x2E40); +static_assert(GFX6_3D_REG_INDEX(depth_render_control) == 0xA000); +static_assert(GFX6_3D_REG_INDEX(depth_view) == 0xA002); +static_assert(GFX6_3D_REG_INDEX(depth_htile_data_base) == 0xA005); +static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C); +static_assert(GFX6_3D_REG_INDEX(depth_buffer.z_info) == 0xA010); +static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017); +static_assert(GFX6_3D_REG_INDEX(ta_bc_base) == 0xA020); +static_assert(GFX6_3D_REG_INDEX(window_offset) == 0xA080); +static_assert(GFX6_3D_REG_INDEX(window_scissor) == 0xA081); +static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E); +static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F); +static_assert(GFX6_3D_REG_INDEX(generic_scissor) == 0xA090); +static_assert(GFX6_3D_REG_INDEX(viewport_scissors) == 0xA094); +static_assert(GFX6_3D_REG_INDEX(index_offset) == 0xA102); +static_assert(GFX6_3D_REG_INDEX(primitive_restart_index) == 0xA103); +static_assert(GFX6_3D_REG_INDEX(stencil_control) == 0xA10B); +static_assert(GFX6_3D_REG_INDEX(viewports) == 0xA10F); +static_assert(GFX6_3D_REG_INDEX(clip_user_data) == 0xA16F); +static_assert(GFX6_3D_REG_INDEX(ps_inputs) == 0xA191); +static_assert(GFX6_3D_REG_INDEX(vs_output_config) == 0xA1B1); +static_assert(GFX6_3D_REG_INDEX(ps_input_ena) == 0xA1B3); +static_assert(GFX6_3D_REG_INDEX(ps_input_addr) == 0xA1B4); +static_assert(GFX6_3D_REG_INDEX(shader_pos_format) == 0xA1C3); +static_assert(GFX6_3D_REG_INDEX(z_export_format) == 0xA1C4); +static_assert(GFX6_3D_REG_INDEX(color_export_format) == 0xA1C5); +static_assert(GFX6_3D_REG_INDEX(blend_control) == 0xA1E0); +static_assert(GFX6_3D_REG_INDEX(index_base_address) == 0xA1F9); +static_assert(GFX6_3D_REG_INDEX(draw_initiator) == 0xA1FC); +static_assert(GFX6_3D_REG_INDEX(depth_control) == 0xA200); +static_assert(GFX6_3D_REG_INDEX(color_control) == 0xA202); +static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204); +static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206); +static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207); +static_assert(GFX6_3D_REG_INDEX(line_control) == 0xA282); +static_assert(GFX6_3D_REG_INDEX(hs_clamp) == 0xA287); +static_assert(GFX6_3D_REG_INDEX(vgt_gs_mode) == 0xA290); +static_assert(GFX6_3D_REG_INDEX(mode_control) == 0xA292); +static_assert(GFX6_3D_REG_INDEX(vgt_gs_out_prim_type) == 0xA29B); +static_assert(GFX6_3D_REG_INDEX(index_size) == 0xA29D); +static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F); +static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1); +static_assert(GFX6_3D_REG_INDEX(enable_primitive_restart) == 0xA2A5); +static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_0) == 0xA2A8); +static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_1) == 0xA2A9); +static_assert(GFX6_3D_REG_INDEX(vgt_esgs_ring_itemsize) == 0xA2AB); +static_assert(GFX6_3D_REG_INDEX(vgt_gsvs_ring_itemsize) == 0xA2AC); +static_assert(GFX6_3D_REG_INDEX(stage_enable) == 0xA2D5); +static_assert(GFX6_3D_REG_INDEX(vgt_gs_vert_itemsize[0]) == 0xA2D7); +static_assert(GFX6_3D_REG_INDEX(tess_config) == 0xA2DB); +static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF); +static_assert(GFX6_3D_REG_INDEX(vgt_gs_instance_cnt) == 0xA2E4); +static_assert(GFX6_3D_REG_INDEX(vgt_strmout_config) == 0xA2E5); +static_assert(GFX6_3D_REG_INDEX(vgt_strmout_buffer_config) == 0xA2E6); +static_assert(GFX6_3D_REG_INDEX(aa_config) == 0xA2F8); +static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318); +static_assert(GFX6_3D_REG_INDEX(color_buffers[0].pitch) == 0xA319); +static_assert(GFX6_3D_REG_INDEX(color_buffers[0].slice) == 0xA31A); +static_assert(GFX6_3D_REG_INDEX(color_buffers[7].base_address) == 0xA381); +static_assert(GFX6_3D_REG_INDEX(cp_strmout_cntl) == 0xC03F); +static_assert(GFX6_3D_REG_INDEX(primitive_type) == 0xC242); +static_assert(GFX6_3D_REG_INDEX(num_instances) == 0xC24D); +static_assert(GFX6_3D_REG_INDEX(vgt_tf_memory_base) == 0xc250); + +#undef GFX6_3D_REG_INDEX + +} // namespace AmdGpu diff --git a/src/video_core/amdgpu/regs.h b/src/video_core/amdgpu/regs.h new file mode 100644 index 000000000..a740c38c2 --- /dev/null +++ b/src/video_core/amdgpu/regs.h @@ -0,0 +1,189 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "video_core/amdgpu/regs_color.h" +#include "video_core/amdgpu/regs_depth.h" +#include "video_core/amdgpu/regs_primitive.h" +#include "video_core/amdgpu/regs_shader.h" +#include "video_core/amdgpu/regs_texture.h" +#include "video_core/amdgpu/regs_vertex.h" + +namespace AmdGpu { + +#define DO_CONCAT2(x, y) x##y +#define CONCAT2(x, y) DO_CONCAT2(x, y) +#define INSERT_PADDING_WORDS(num_words) \ + [[maybe_unused]] std::array CONCAT2(pad, __LINE__) + +union Regs { + static constexpr u32 NumRegs = 0xD000; + static constexpr u32 UconfigRegWordOffset = 0xC000; + static constexpr u32 ContextRegWordOffset = 0xA000; + static constexpr u32 ConfigRegWordOffset = 0x2000; + static constexpr u32 ShRegWordOffset = 0x2C00; + + struct { + INSERT_PADDING_WORDS(11272); + ShaderProgram ps_program; + INSERT_PADDING_WORDS(44); + ShaderProgram vs_program; + INSERT_PADDING_WORDS(44); + ShaderProgram gs_program; + INSERT_PADDING_WORDS(44); + ShaderProgram es_program; + INSERT_PADDING_WORDS(44); + ShaderProgram hs_program; + INSERT_PADDING_WORDS(44); + ShaderProgram ls_program; + INSERT_PADDING_WORDS(164); + ComputeProgram cs_program; + INSERT_PADDING_WORDS(29104); + DepthRenderControl depth_render_control; + INSERT_PADDING_WORDS(1); + DepthView depth_view; + DepthRenderOverride depth_render_override; + INSERT_PADDING_WORDS(1); + Address depth_htile_data_base; + INSERT_PADDING_WORDS(2); + float depth_bounds_min; + float depth_bounds_max; + u32 stencil_clear; + float depth_clear; + Scissor screen_scissor; + INSERT_PADDING_WORDS(2); + DepthBuffer depth_buffer; + INSERT_PADDING_WORDS(8); + BorderColorBuffer ta_bc_base; + INSERT_PADDING_WORDS(94); + WindowOffset window_offset; + ViewportScissor window_scissor; + INSERT_PADDING_WORDS(11); + ColorBufferMask color_target_mask; + ColorBufferMask color_shader_mask; + ViewportScissor generic_scissor; + INSERT_PADDING_WORDS(2); + std::array viewport_scissors; + std::array viewport_depths; + INSERT_PADDING_WORDS(46); + u32 index_offset; + u32 primitive_restart_index; + INSERT_PADDING_WORDS(1); + BlendConstants blend_constants; + INSERT_PADDING_WORDS(2); + StencilControl stencil_control; + StencilRefMask stencil_ref_front; + StencilRefMask stencil_ref_back; + INSERT_PADDING_WORDS(1); + std::array viewports; + std::array clip_user_data; + INSERT_PADDING_WORDS(10); + std::array ps_inputs; + VsOutputConfig vs_output_config; + INSERT_PADDING_WORDS(1); + PsInput ps_input_ena; + PsInput ps_input_addr; + INSERT_PADDING_WORDS(1); + u32 num_interp : 6; + INSERT_PADDING_WORDS(12); + ShaderPosFormat shader_pos_format; + ShaderExportFormat z_export_format; + ColorExportFormat color_export_format; + INSERT_PADDING_WORDS(26); + std::array blend_control; + INSERT_PADDING_WORDS(17); + IndexBufferBase index_base_address; + INSERT_PADDING_WORDS(1); + u32 draw_initiator; + INSERT_PADDING_WORDS(3); + DepthControl depth_control; + INSERT_PADDING_WORDS(1); + ColorControl color_control; + DepthShaderControl depth_shader_control; + ClipperControl clipper_control; + PolygonControl polygon_control; + ViewportControl viewport_control; + VsOutputControl vs_output_control; + INSERT_PADDING_WORDS(122); + LineControl line_control; + INSERT_PADDING_WORDS(4); + TessFactorClamp hs_clamp; + INSERT_PADDING_WORDS(7); + GsMode vgt_gs_mode; + INSERT_PADDING_WORDS(1); + ModeControl mode_control; + INSERT_PADDING_WORDS(8); + GsOutPrimitiveType vgt_gs_out_prim_type; + INSERT_PADDING_WORDS(1); + u32 index_size; + u32 max_index_size; + IndexBufferType index_buffer_type; + INSERT_PADDING_WORDS(1); + u32 enable_primitive_id; + INSERT_PADDING_WORDS(3); + u32 enable_primitive_restart; + INSERT_PADDING_WORDS(2); + u32 vgt_instance_step_rate_0; + u32 vgt_instance_step_rate_1; + INSERT_PADDING_WORDS(1); + u32 vgt_esgs_ring_itemsize; + u32 vgt_gsvs_ring_itemsize; + INSERT_PADDING_WORDS(33); + u32 vgt_gs_max_vert_out : 11; + INSERT_PADDING_WORDS(6); + ShaderStageEnable stage_enable; + LsHsConfig ls_hs_config; + u32 vgt_gs_vert_itemsize[4]; + TessellationConfig tess_config; + INSERT_PADDING_WORDS(3); + PolygonOffset poly_offset; + GsInstances vgt_gs_instance_cnt; + StreamOutConfig vgt_strmout_config; + StreamOutBufferConfig vgt_strmout_buffer_config; + INSERT_PADDING_WORDS(17); + AaConfig aa_config; + INSERT_PADDING_WORDS(31); + ColorBuffer color_buffers[NUM_COLOR_BUFFERS]; + INSERT_PADDING_WORDS(7343); + StreamOutControl cp_strmout_cntl; + INSERT_PADDING_WORDS(514); + PrimitiveType primitive_type; + INSERT_PADDING_WORDS(9); + u32 num_indices; + VgtNumInstances num_instances; + INSERT_PADDING_WORDS(2); + TessFactorMemoryBase vgt_tf_memory_base; + }; + std::array reg_array; + + const ShaderProgram* ProgramForStage(u32 index) const { + switch (index) { + case 0: + return &ps_program; + case 1: + return &vs_program; + case 2: + return &gs_program; + case 3: + return &es_program; + case 4: + return &hs_program; + case 5: + return &ls_program; + } + return nullptr; + } + + bool IsClipDisabled() const { + return clipper_control.clip_disable || primitive_type == PrimitiveType::RectList; + } + + void SetDefaults(); +}; + +#undef DO_CONCAT2 +#undef CONCAT2 +#undef INSERT_PADDING_WORDS + +} // namespace AmdGpu diff --git a/src/video_core/amdgpu/regs_color.h b/src/video_core/amdgpu/regs_color.h new file mode 100644 index 000000000..94b3a51b0 --- /dev/null +++ b/src/video_core/amdgpu/regs_color.h @@ -0,0 +1,307 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "video_core/amdgpu/pixel_format.h" +#include "video_core/amdgpu/tiling.h" + +namespace AmdGpu { + +static constexpr u32 NUM_COLOR_BUFFERS = 8; + +using BlendConstants = std::array; + +struct BlendControl { + enum class BlendFactor : u32 { + Zero = 0, + One = 1, + SrcColor = 2, + OneMinusSrcColor = 3, + SrcAlpha = 4, + OneMinusSrcAlpha = 5, + DstAlpha = 6, + OneMinusDstAlpha = 7, + DstColor = 8, + OneMinusDstColor = 9, + SrcAlphaSaturate = 10, + ConstantColor = 13, + OneMinusConstantColor = 14, + Src1Color = 15, + InvSrc1Color = 16, + Src1Alpha = 17, + InvSrc1Alpha = 18, + ConstantAlpha = 19, + OneMinusConstantAlpha = 20, + }; + + enum class BlendFunc : u32 { + Add = 0, + Subtract = 1, + Min = 2, + Max = 3, + ReverseSubtract = 4, + }; + + BlendFactor color_src_factor : 5; + BlendFunc color_func : 3; + BlendFactor color_dst_factor : 5; + u32 : 3; + BlendFactor alpha_src_factor : 5; + BlendFunc alpha_func : 3; + BlendFactor alpha_dst_factor : 5; + u32 separate_alpha_blend : 1; + u32 enable : 1; + u32 disable_rop3 : 1; + + bool operator==(const BlendControl& other) const = default; +}; + +struct ColorControl { + enum class OperationMode : u32 { + Disable = 0u, + Normal = 1u, + EliminateFastClear = 2u, + Resolve = 3u, + Err = 4u, + FmaskDecompress = 5u, + }; + enum class LogicOp : u32 { + Clear = 0x00, + Nor = 0x11, + AndInverted = 0x22, + CopyInverted = 0x33, + AndReverse = 0x44, + Invert = 0x55, + Xor = 0x66, + Nand = 0x77, + And = 0x88, + Equiv = 0x99, + Noop = 0xAA, + OrInverted = 0xBB, + Copy = 0xCC, + OrReverse = 0xDD, + Or = 0xEE, + Set = 0xFF, + }; + + u32 disable_dual_quad : 1; + u32 : 2; + u32 degamma_enable : 1; + OperationMode mode : 3; + u32 : 9; + LogicOp rop3 : 8; +}; + +struct ColorBufferMask { + enum ColorComponent : u32 { + ComponentR = (1u << 0), + ComponentG = (1u << 1), + ComponentB = (1u << 2), + ComponentA = (1u << 3), + }; + + u32 raw; + + u32 GetMask(u32 buf_id) const { + return (raw >> (buf_id * 4)) & 0xfu; + } + + void SetMask(u32 buf_id, u32 mask) { + raw &= ~(0xf << (buf_id * 4)); + raw |= (mask << (buf_id * 4)); + } +}; + +struct ColorBuffer { + enum class EndianSwap : u32 { + None = 0, + Swap8In16 = 1, + Swap8In32 = 2, + Swap8In64 = 3, + }; + + enum class SwapMode : u32 { + Standard = 0, + Alternate = 1, + StandardReverse = 2, + AlternateReverse = 3, + }; + + enum class RoundMode : u32 { + ByHalf = 0, + Truncate = 1, + }; + + u32 base_address; + struct { + u32 tile_max : 11; + u32 : 9; + u32 fmask_tile_max : 11; + } pitch; + struct { + u32 tile_max : 22; + } slice; + struct { + u32 slice_start : 11; + u32 : 2; + u32 slice_max : 11; + } view; + union Color0Info { + u32 raw; + struct { + EndianSwap endian : 2; + u32 format : 5; + u32 linear_general : 1; + u32 number_type : 3; + SwapMode comp_swap : 2; + u32 fast_clear : 1; + u32 compression : 1; + u32 blend_clamp : 1; + u32 blend_bypass : 1; + u32 simple_float : 1; + RoundMode round_mode : 1; + u32 cmask_is_linear : 1; + u32 blend_opt_dont_rd_dst : 3; + u32 blend_opt_discard_pixel : 3; + u32 fmask_compression_disable_ci : 1; + u32 fmask_compress_1frag_only : 1; + u32 dcc_enable : 1; + u32 cmask_addr_type : 2; + u32 alt_tile_mode : 1; + }; + } info; + union Color0Attrib { + u32 raw; + struct { + TileMode tile_mode_index : 5; + u32 fmask_tile_mode_index : 5; + u32 fmask_bank_height : 2; + u32 num_samples_log2 : 3; + u32 num_fragments_log2 : 2; + u32 force_dst_alpha_1 : 1; + }; + } attrib; + u32 pad0; + u32 cmask_base_address; + struct { + u32 tile_max : 14; + } cmask_slice; + u32 fmask_base_address; + struct { + u32 tile_max : 14; + } fmask_slice; + u32 clear_word0; + u32 clear_word1; + std::array pad1; + + operator bool() const { + return base_address && info.format; + } + + u32 Pitch() const { + return (pitch.tile_max + 1) << 3; + } + + u32 Height() const { + return (slice.tile_max + 1) * 64 / Pitch(); + } + + u64 Address() const { + return u64(base_address) << 8 | (info.linear_general ? (view.slice_start & 0xff) : 0); + } + + VAddr CmaskAddress() const { + return VAddr(cmask_base_address) << 8; + } + + VAddr FmaskAddress() const { + return VAddr(fmask_base_address) << 8; + } + + u32 NumSamples() const { + return 1 << attrib.num_fragments_log2; + } + + u32 BaseSlice() const { + return info.linear_general ? 0 : view.slice_start; + } + + u32 NumSlices() const { + return view.slice_max + 1; + } + + u32 GetColorSliceSize() const { + const auto num_bytes_per_element = NumBitsPerBlock(DataFormat(info.format)) / 8u; + const auto slice_size = num_bytes_per_element * (slice.tile_max + 1) * 64u * NumSamples(); + return slice_size; + } + + TileMode GetTileMode() const { + return info.linear_general ? TileMode::DisplayLinearGeneral : attrib.tile_mode_index; + } + + bool IsTiled() const { + return GetTileMode() != TileMode::DisplayLinearAligned && + GetTileMode() != TileMode::DisplayLinearGeneral; + } + + DataFormat GetDataFmt() const { + return RemapDataFormat(DataFormat(info.format)); + } + + NumberFormat GetNumberFmt() const { + return RemapNumberFormat(GetFixedNumberFormat(), DataFormat(info.format)); + } + + NumberConversion GetNumberConversion() const { + return MapNumberConversion(GetFixedNumberFormat(), DataFormat(info.format)); + } + + CompMapping Swizzle() const { + // clang-format off + static constexpr std::array, 4> mrt_swizzles{{ + // Standard + std::array{{ + {.r = CompSwizzle::Red, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Blue, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Blue, .a = CompSwizzle::Alpha}, + }}, + // Alternate + std::array{{ + {.r = CompSwizzle::Green, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Red, .g = CompSwizzle::Alpha, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Alpha, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Blue, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Alpha}, + }}, + // StandardReverse + std::array{{ + {.r = CompSwizzle::Blue, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Green, .g = CompSwizzle::Red, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Blue, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Alpha, .g = CompSwizzle::Blue, .b = CompSwizzle::Green, .a = CompSwizzle::Red}, + }}, + // AlternateReverse + std::array{{ + {.r = CompSwizzle::Alpha, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Alpha, .g = CompSwizzle::Red, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Alpha, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Alpha, .g = CompSwizzle::Red, .b = CompSwizzle::Green, .a = CompSwizzle::Blue}, + }}, + }}; + // clang-format on + const auto swap_idx = static_cast(info.comp_swap); + const auto components_idx = NumComponents(DataFormat(info.format)) - 1; + const auto mrt_swizzle = mrt_swizzles[swap_idx][components_idx]; + return RemapSwizzle(DataFormat(info.format), mrt_swizzle); + } + + NumberFormat GetFixedNumberFormat() const { + // There is a small difference between T# and CB number types, account for it. + const auto number_fmt = NumberFormat(info.number_type); + return number_fmt == NumberFormat::SnormNz ? NumberFormat::Srgb : number_fmt; + } +}; + +} // namespace AmdGpu diff --git a/src/video_core/amdgpu/regs_depth.h b/src/video_core/amdgpu/regs_depth.h new file mode 100644 index 000000000..5cab35cda --- /dev/null +++ b/src/video_core/amdgpu/regs_depth.h @@ -0,0 +1,291 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/assert.h" +#include "common/types.h" +#include "video_core/amdgpu/tiling.h" + +namespace AmdGpu { + +enum class ZOrder : u32 { + LateZ = 0, + EarlyZLateZ = 1, + ReZ = 2, + EarlyZReZ = 3, +}; + +enum class ConservativeDepth : u32 { + Any = 0, + LessThanZ = 1, + GreaterThanZ = 2, +}; + +struct DepthShaderControl { + u32 z_export_enable : 1; + u32 stencil_test_val_export_enable : 1; + u32 stencil_op_val_export_enable : 1; + u32 : 1; + ZOrder z_order : 2; + u32 kill_enable : 1; + u32 coverage_to_mask_enable : 1; + u32 mask_export_enable : 1; + u32 exec_on_hier_fail : 1; + u32 exec_on_noop : 1; + u32 alpha_to_mask_disable : 1; + u32 depth_before_shader : 1; + ConservativeDepth conservative_z_export : 2; +}; + +enum class CompareFunc : u32 { + Never = 0, + Less = 1, + Equal = 2, + LessEqual = 3, + Greater = 4, + NotEqual = 5, + GreaterEqual = 6, + Always = 7, +}; + +struct DepthControl { + u32 stencil_enable : 1; + u32 depth_enable : 1; + u32 depth_write_enable : 1; + u32 depth_bounds_enable : 1; + CompareFunc depth_func : 3; + u32 backface_enable : 1; + CompareFunc stencil_ref_func : 3; + u32 : 9; + CompareFunc stencil_bf_func : 3; + u32 : 7; + u32 enable_color_writes_on_depth_fail : 1; + u32 disable_color_writes_on_depth_pass : 1; +}; + +enum class StencilFunc : u32 { + Keep = 0, + Zero = 1, + Ones = 2, + ReplaceTest = 3, + ReplaceOp = 4, + AddClamp = 5, + SubClamp = 6, + Invert = 7, + AddWrap = 8, + SubWrap = 9, + And = 10, + Or = 11, + Xor = 12, + Nand = 13, + Nor = 14, + Xnor = 15, +}; + +struct StencilControl { + StencilFunc stencil_fail_front : 4; + StencilFunc stencil_zpass_front : 4; + StencilFunc stencil_zfail_front : 4; + StencilFunc stencil_fail_back : 4; + StencilFunc stencil_zpass_back : 4; + StencilFunc stencil_zfail_back : 4; +}; + +struct StencilRefMask { + u8 stencil_test_val; + u8 stencil_mask; + u8 stencil_write_mask; + u8 stencil_op_val; +}; + +struct DepthRenderControl { + u32 depth_clear_enable : 1; + u32 stencil_clear_enable : 1; + u32 depth_copy : 1; + u32 stencil_copy : 1; + u32 resummarize_enable : 1; + u32 stencil_compress_disable : 1; + u32 depth_compress_disable : 1; + u32 copy_centroid : 1; + u32 copy_sample : 1; + u32 decompress_enable : 1; +}; + +struct DepthView { + u32 slice_start : 11; + u32 : 2; + u32 slice_max : 11; + u32 z_read_only : 1; + u32 stencil_read_only : 1; + + u32 NumSlices() const { + return slice_max + 1u; + } +}; + +enum class ForceEnable : u32 { + Off = 0, + Enable = 1, + Disable = 2, +}; + +enum class ForceSumm : u32 { + Off = 0, + MinZ = 1, + MaxZ = 2, + Both = 3, +}; + +struct DepthRenderOverride { + ForceEnable force_hiz_enable : 2; + ForceEnable force_his_enable0 : 2; + ForceEnable force_his_enable1 : 2; + u32 force_shader_z_order : 1; + u32 fast_z_disable : 1; + u32 fast_stencil_disable : 1; + u32 noop_cull_disable : 1; + u32 force_color_kill : 1; + u32 force_z_read : 1; + u32 force_stencil_read : 1; + ForceEnable force_full_z_range : 2; + u32 force_qc_smask_conflict : 1; + u32 disable_viewport_clamp : 1; + u32 ignore_sc_zrange : 1; + u32 disable_fully_covered : 1; + ForceSumm force_z_limit_summ : 2; + u32 max_tiles_in_dtt : 5; + u32 disable_tile_rate_tiles : 1; + u32 force_z_dirty : 1; + u32 force_stencil_dirty : 1; + u32 force_z_valid : 1; + u32 force_stencil_valid : 1; + u32 preserve_compression : 1; +}; + +struct Eqaa { + u32 max_anchor_samples : 1; + u32 : 3; + u32 ps_iter_samples : 3; + u32 : 1; + u32 mask_export_num_samples : 3; + u32 : 1; + u32 alpha_to_mask_num_samples : 3; + u32 : 1; + u32 high_quality_intersections : 1; + u32 incoherent_eqaa_reads : 1; + u32 interpolate_comp_z : 1; + u32 interpolate_src_z : 1; + u32 static_anchor_associations : 1; + u32 alpha_to_mask_eqaa_disable : 1; + u32 : 2; + u32 overrasterization_amount : 3; + u32 enable_postz_overrasterization : 1; +}; + +struct DepthBuffer { + enum class ZFormat : u32 { + Invalid = 0, + Z16 = 1, + Z32Float = 3, + }; + + enum class StencilFormat : u32 { + Invalid = 0, + Stencil8 = 1, + }; + + struct ZInfo { + ZFormat format : 2; + u32 num_samples : 2; + u32 : 9; + u32 tile_split : 3; + u32 : 4; + u32 tile_mode_index : 3; + u32 decompress_on_n_zplanes : 4; + u32 allow_expclear : 1; + u32 read_size : 1; + u32 tile_surface_enable : 1; + u32 clear_disallowed : 1; + u32 zrange_precision : 1; + } z_info; + struct { + StencilFormat format : 1; + } stencil_info; + u32 z_read_base; + u32 stencil_read_base; + u32 z_write_base; + u32 stencil_write_base; + struct { + u32 pitch_tile_max : 11; + u32 height_tile_max : 11; + } depth_size; + struct { + u32 tile_max : 22; + } depth_slice; + + bool DepthValid() const { + return DepthAddress() != 0 && z_info.format != ZFormat::Invalid; + } + + bool StencilValid() const { + return StencilAddress() != 0 && stencil_info.format != StencilFormat::Invalid; + } + + bool DepthWriteValid() const { + return DepthWriteAddress() != 0 && z_info.format != ZFormat::Invalid; + } + + bool StencilWriteValid() const { + return StencilWriteAddress() != 0 && stencil_info.format != StencilFormat::Invalid; + } + + u32 Pitch() const { + return (depth_size.pitch_tile_max + 1) << 3; + } + + u32 Height() const { + return (depth_size.height_tile_max + 1) << 3; + } + + u64 DepthAddress() const { + return u64(z_read_base) << 8; + } + + u64 StencilAddress() const { + return u64(stencil_read_base) << 8; + } + + u64 DepthWriteAddress() const { + return u64(z_write_base) << 8; + } + + u64 StencilWriteAddress() const { + return u64(stencil_write_base) << 8; + } + + u32 NumSamples() const { + return 1u << z_info.num_samples; // spec doesn't say it is a log2 + } + + u32 NumBits() const { + return z_info.format == ZFormat::Z32Float ? 32 : 16; + } + + u32 GetDepthSliceSize() const { + ASSERT(z_info.format != ZFormat::Invalid); + const auto bpe = NumBits() >> 3; // in bytes + return (depth_slice.tile_max + 1) * 64 * bpe * NumSamples(); + } + + TileMode GetTileMode() const { + return static_cast(z_info.tile_mode_index); + } + + bool IsTiled() const { + return GetTileMode() != TileMode::DisplayLinearAligned && + GetTileMode() != TileMode::DisplayLinearGeneral; + } +}; + +} // namespace AmdGpu diff --git a/src/video_core/amdgpu/regs_primitive.h b/src/video_core/amdgpu/regs_primitive.h new file mode 100644 index 000000000..562ae656c --- /dev/null +++ b/src/video_core/amdgpu/regs_primitive.h @@ -0,0 +1,237 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/types.h" + +namespace AmdGpu { + +static constexpr u32 NUM_VIEWPORTS = 16; +static constexpr u32 NUM_CLIP_PLANES = 6; + +enum class ClipSpace : u32 { + MinusWToW = 0, + ZeroToW = 1, +}; + +enum class PrimKillCond : u32 { + AllVtx = 0, + AnyVtx = 1, +}; + +struct ClipperControl { + u32 user_clip_plane_enable : 6; + u32 : 10; + u32 clip_disable : 1; + u32 : 2; + ClipSpace clip_space : 1; + u32 : 1; + PrimKillCond vtx_kill_or : 1; + u32 dx_rasterization_kill : 1; + u32 : 1; + u32 dx_linear_attr_clip_enable : 1; + u32 : 1; + u32 zclip_near_disable : 1; + u32 zclip_far_disable : 1; + + bool ZclipEnable() const { + if (zclip_near_disable != zclip_far_disable) { + return false; + } + return !zclip_near_disable; + } +}; + +enum class PolygonMode : u32 { + Point = 0, + Line = 1, + Fill = 2, +}; + +enum class ProvokingVtxLast : u32 { + First = 0, + Last = 1, +}; + +enum class CullMode : u32 { + None = 0, + Front = 1, + Back = 2, + FrontAndBack = 3, +}; + +enum class FrontFace : u32 { + CounterClockwise = 0, + Clockwise = 1, +}; + +struct PolygonControl { + u32 cull_front : 1; + u32 cull_back : 1; + FrontFace front_face : 1; + u32 enable_polygon_mode : 2; + PolygonMode polygon_mode_front : 3; + PolygonMode polygon_mode_back : 3; + u32 enable_polygon_offset_front : 1; + u32 enable_polygon_offset_back : 1; + u32 enable_polygon_offset_para : 1; + u32 : 2; + u32 enable_window_offset : 1; + u32 : 2; + ProvokingVtxLast provoking_vtx_last : 1; + u32 persp_corr_dis : 1; + u32 multi_prim_ib_ena : 1; + + PolygonMode PolyMode() const { + return enable_polygon_mode ? polygon_mode_front : PolygonMode::Fill; + } + + CullMode CullingMode() const { + return static_cast(cull_front | cull_back << 1); + } + + bool NeedsBias() const { + return enable_polygon_offset_back || enable_polygon_offset_front || + enable_polygon_offset_para; + } +}; + +struct VsOutputControl { + u32 clip_distance_enable : 8; + u32 cull_distance_enable : 8; + u32 use_vtx_point_size : 1; + u32 use_vtx_edge_flag : 1; + u32 use_vtx_render_target_idx : 1; + u32 use_vtx_viewport_idx : 1; + u32 use_vtx_kill_flag : 1; + u32 vs_out_misc_enable : 1; + u32 vs_out_ccdist0_enable : 1; + u32 vs_out_ccdist1_enable : 1; + u32 vs_out_misc_side_bus_ena : 1; + u32 use_vtx_gs_cut_flag : 1; + + bool IsClipDistEnabled(u32 index) const { + return (clip_distance_enable >> index) & 1; + } + + bool IsCullDistEnabled(u32 index) const { + return (cull_distance_enable >> index) & 1; + } +}; + +struct LineControl { + u32 width_fixed_point; + + float Width() const { + return static_cast(width_fixed_point) / 8.0; + } +}; + +struct ModeControl { + u32 msaa_enable : 1; + u32 vport_scissor_enable : 1; + u32 line_stripple_enable : 1; + u32 send_unlit_stiles_to_pkr : 1; +}; + +struct Scissor { + struct { + s16 top_left_x; + s16 top_left_y; + }; + struct { + s16 bottom_right_x; + s16 bottom_right_y; + }; + + static u16 Clamp(s16 value) { + return std::max(s16(0), value); + } + + u32 GetWidth() const { + return static_cast(Clamp(bottom_right_x) - Clamp(top_left_x)); + } + + u32 GetHeight() const { + return static_cast(Clamp(bottom_right_y) - Clamp(top_left_y)); + } +}; + +struct WindowOffset { + s32 window_x_offset : 16; + s32 window_y_offset : 16; +}; + +struct ViewportScissor { + struct { + u16 top_left_x : 15; + u16 top_left_y : 15; + u16 window_offset_disable : 1; + }; + struct { + u16 bottom_right_x : 15; + u16 bottom_right_y : 15; + }; + + u32 GetWidth() const { + return bottom_right_x - top_left_x; + } + + u32 GetHeight() const { + return bottom_right_y - top_left_y; + } +}; + +struct ViewportDepth { + float zmin; + float zmax; +}; + +struct ViewportBounds { + float xscale; + float xoffset; + float yscale; + float yoffset; + float zscale; + float zoffset; +}; + +struct ViewportControl { + u32 xscale_enable : 1; + u32 xoffset_enable : 1; + u32 yscale_enable : 1; + u32 yoffset_enable : 1; + u32 zscale_enable : 1; + u32 zoffset_enable : 1; + u32 : 2; + u32 xy_transformed : 1; + u32 z_transformed : 1; + u32 w_transformed : 1; + u32 perfcounter_ref : 1; +}; + +struct ClipUserData { + u32 data_x; + u32 data_y; + u32 data_z; + u32 data_w; +}; + +struct AaConfig { + u32 msaa_num_samples : 3; + u32 : 1; + u32 aa_mask_centroid_dtmn : 1; + u32 : 8; + u32 max_sample_dst : 4; + u32 : 3; + u32 msaa_exposed_samples : 3; + u32 : 1; + u32 detail_to_exposed_mode : 2; + + u32 NumSamples() const { + return 1 << msaa_num_samples; + } +}; + +} // namespace AmdGpu diff --git a/src/video_core/amdgpu/regs_shader.h b/src/video_core/amdgpu/regs_shader.h new file mode 100644 index 000000000..bc20c9a71 --- /dev/null +++ b/src/video_core/amdgpu/regs_shader.h @@ -0,0 +1,241 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/assert.h" +#include "common/types.h" +#include "shader_recompiler/params.h" + +namespace AmdGpu { + +static constexpr u32 NUM_USER_DATA = 16; + +using UserData = std::array; + +struct BinaryInfo { + static constexpr std::array signature_ref = {0x4f, 0x72, 0x62, 0x53, + 0x68, 0x64, 0x72}; // OrbShdr + + std::array signature; + u8 version; + u32 pssl_or_cg : 1; + u32 cached : 1; + u32 type : 4; + u32 source_type : 2; + u32 length : 24; + u8 chunk_usage_base_offset_in_dw; + u8 num_input_usage_slots; + u8 is_srt : 1; + u8 is_srt_used_info_valid : 1; + u8 is_extended_usage_info : 1; + u8 reserved2 : 5; + u8 reserved3; + u64 shader_hash; + u32 crc32; + + bool Valid() const { + return signature == signature_ref; + } +}; + +enum class FpRoundMode : u32 { + NearestEven = 0, + PlusInf = 1, + MinInf = 2, + ToZero = 3, +}; + +enum class FpDenormMode : u32 { + InOutFlush = 0, + InAllowOutFlush = 1, + InFlushOutAllow = 2, + InOutAllow = 3, +}; + +struct ShaderProgram { + u64 address : 40; + struct { + u32 num_vgprs : 6; + u32 num_sgprs : 4; + u32 priority : 2; + FpRoundMode fp_round_mode32 : 2; + FpRoundMode fp_round_mode64 : 2; + FpDenormMode fp_denorm_mode32 : 2; + FpDenormMode fp_denorm_mode64 : 2; + u32 : 4; + u32 vgpr_comp_cnt : 2; + u32 : 6; + u32 scratch_en : 1; + u32 num_user_regs : 5; + u32 : 1; + u32 oc_lds_en : 1; + } settings; + UserData user_data; + + template + const T Address() const { + return std::bit_cast(address << 8); + } + + [[nodiscard]] u32 NumVgprs() const { + // Each increment allocates 4 registers, where 0 = 4 registers. + return (settings.num_vgprs + 1) * 4; + } +}; + +struct VsOutputConfig { + u32 : 1; + u32 export_count_min_one : 5; + u32 half_pack : 1; + + u32 NumExports() const { + return export_count_min_one + 1; + } +}; + +struct PsInputControl { + u32 input_offset : 5; + u32 use_default : 1; + u32 : 2; + u32 default_value : 2; + u32 flat_shade : 1; +}; + +struct PsInput { + u32 persp_sample_ena : 1; + u32 persp_center_ena : 1; + u32 persp_centroid_ena : 1; + u32 persp_pull_model_ena : 1; + u32 linear_sample_ena : 1; + u32 linear_center_ena : 1; + u32 linear_centroid_ena : 1; + u32 line_stipple_tex_ena : 1; + u32 pos_x_float_ena : 1; + u32 pos_y_float_ena : 1; + u32 pos_z_float_ena : 1; + u32 pos_w_float_ena : 1; + u32 front_face_ena : 1; + u32 ancillary_ena : 1; + u32 sample_coverage_ena : 1; + u32 pos_fixed_pt_ena : 1; + + bool operator==(const PsInput&) const = default; +}; + +enum class ShaderExportComp : u32 { + None = 0, + OneComp = 1, + TwoComp = 2, + FourCompCompressed = 3, + FourComp = 4, +}; + +struct ShaderPosFormat { + ShaderExportComp pos0 : 4; + ShaderExportComp pos1 : 4; + ShaderExportComp pos2 : 4; + ShaderExportComp pos3 : 4; +}; + +enum class ShaderExportFormat : u32 { + Zero = 0, + R_32 = 1, + GR_32 = 2, + AR_32 = 3, + ABGR_FP16 = 4, + ABGR_UNORM16 = 5, + ABGR_SNORM16 = 6, + ABGR_UINT16 = 7, + ABGR_SINT16 = 8, + ABGR_32 = 9, +}; + +struct ColorExportFormat { + u32 raw; + + [[nodiscard]] ShaderExportFormat GetFormat(const u32 buf_idx) const { + return static_cast((raw >> (buf_idx * 4)) & 0xfu); + } +}; + +struct ComputeProgram { + u32 dispatch_initiator; + u32 dim_x; + u32 dim_y; + u32 dim_z; + u32 start_x; + u32 start_y; + u32 start_z; + struct { + u16 full; + u16 partial; + } num_thread_x, num_thread_y, num_thread_z; + u32 pad0; + u32 max_wave_id : 12; + u64 address : 40; + std::array pad1; + struct { + u64 num_vgprs : 6; + u64 num_sgprs : 4; + u64 : 23; + u64 num_user_regs : 5; + u64 : 1; + u64 tgid_enable : 3; + u64 : 5; + u64 lds_dwords : 9; + } settings; + u32 pad2; + u32 resource_limits; + std::array pad3; + UserData user_data; + + template + const T Address() const { + return std::bit_cast(address << 8); + } + + u32 SharedMemSize() const noexcept { + // lds_dwords is in units of 128 dwords. We return bytes. + return settings.lds_dwords * 128 * 4; + } + + u32 NumWorkgroups() const noexcept { + return dim_x * dim_y * dim_z; + } + + bool IsTgidEnabled(u32 i) const noexcept { + return (settings.tgid_enable >> i) & 1; + } +}; + +static constexpr const BinaryInfo& SearchBinaryInfo(const u32* code) { + constexpr u32 token_mov_vcchi = 0xBEEB03FF; + if (code[0] == token_mov_vcchi) { + const auto* info = std::bit_cast(code + (code[1] + 1) * 2); + if (info->Valid()) { + return *info; + } + } + constexpr u32 signature_size = sizeof(BinaryInfo::signature_ref) / sizeof(u8); + constexpr u32 search_limit = 0x4000; + const u32* end = code + search_limit; + for (const u32* it = code; it < end; ++it) { + if (const BinaryInfo* info = std::bit_cast(it); info->Valid()) { + return *info; + } + } + UNREACHABLE_MSG("Shader binary info not found."); +} + +static constexpr Shader::ShaderParams GetParams(const auto& sh) { + const auto* code = sh.template Address(); + const auto& bininfo = SearchBinaryInfo(code); + return { + .user_data = sh.user_data, + .code = std::span{code, bininfo.length / sizeof(u32)}, + .hash = bininfo.shader_hash, + }; +} + +} // namespace AmdGpu diff --git a/src/video_core/amdgpu/regs_texture.h b/src/video_core/amdgpu/regs_texture.h new file mode 100644 index 000000000..503cecf60 --- /dev/null +++ b/src/video_core/amdgpu/regs_texture.h @@ -0,0 +1,20 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "common/types.h" + +namespace AmdGpu { + +struct BorderColorBuffer { + u64 base_addr : 40; + + template + const T Address() const { + return std::bit_cast(base_addr << 8); + } +}; + +} // namespace AmdGpu diff --git a/src/video_core/amdgpu/regs_vertex.h b/src/video_core/amdgpu/regs_vertex.h new file mode 100644 index 000000000..5422b92c1 --- /dev/null +++ b/src/video_core/amdgpu/regs_vertex.h @@ -0,0 +1,257 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/assert.h" +#include "common/types.h" + +namespace AmdGpu { + +enum class PrimitiveType : u32 { + None = 0, + PointList = 1, + LineList = 2, + LineStrip = 3, + TriangleList = 4, + TriangleFan = 5, + TriangleStrip = 6, + PatchPrimitive = 9, + AdjLineList = 10, + AdjLineStrip = 11, + AdjTriangleList = 12, + AdjTriangleStrip = 13, + RectList = 17, + LineLoop = 18, + QuadList = 19, + QuadStrip = 20, + Polygon = 21, +}; + +struct IndexBufferBase { + u32 base_addr_hi : 8; + u32 base_addr_lo; + + template + T Address() const { + return std::bit_cast((base_addr_lo & ~1U) | u64(base_addr_hi) << 32); + } +}; + +enum class IndexType : u32 { + Index16 = 0, + Index32 = 1, +}; + +enum class IndexSwapMode : u32 { + None = 0, + Swap16 = 1, + Swap32 = 2, + SwapWord = 3, +}; + +union IndexBufferType { + u32 raw; + struct { + IndexType index_type : 2; + IndexSwapMode swap_mode : 2; + }; +}; + +struct VgtNumInstances { + u32 num_instances; + + u32 NumInstances() const { + return num_instances == 0 ? 1 : num_instances; + } +}; + +struct PolygonOffset { + float depth_bias; + float front_scale; + float front_offset; + float back_scale; + float back_offset; +}; + +struct Address { + u32 address; + + VAddr GetAddress() const { + return u64(address) << 8; + } +}; + +union ShaderStageEnable { + enum VgtStages : u32 { + Vs = 0u, // always enabled + EsGs = 0xB0u, + LsHs = 0x45u, + }; + + VgtStages raw; + struct { + u32 ls_en : 2; + u32 hs_en : 1; + u32 es_en : 2; + u32 gs_en : 1; + u32 vs_en : 2; + u32 dynamic_hs : 1; + }; + + bool IsStageEnabled(u32 stage) const { + switch (stage) { + case 0: + case 1: + return true; + case 2: + return gs_en; + case 3: + return es_en; + case 4: + return hs_en; + case 5: + return ls_en; + default: + UNREACHABLE(); + } + } +}; + +union GsInstances { + u32 raw; + struct { + u32 enable : 2; + u32 count : 6; + }; + + bool IsEnabled() const { + return enable && count > 0; + } +}; + +enum class GsOutputPrimitiveType : u32 { + PointList = 0, + LineStrip = 1, + TriangleStrip = 2, +}; + +union GsOutPrimitiveType { + u32 raw; + struct { + GsOutputPrimitiveType outprim_type : 6; + GsOutputPrimitiveType outprim_type1 : 6; + GsOutputPrimitiveType outprim_type2 : 6; + GsOutputPrimitiveType outprim_type3 : 6; + u32 reserved : 3; + u32 unique_type_per_stream : 1; + }; + + GsOutputPrimitiveType GetPrimitiveType(u32 stream) const { + if (unique_type_per_stream == 0) { + return outprim_type; + } + + switch (stream) { + case 0: + return outprim_type; + case 1: + return outprim_type1; + case 2: + return outprim_type2; + case 3: + return outprim_type3; + default: + UNREACHABLE(); + } + } +}; + +enum class GsScenario : u32 { + Off = 0, + ScenarioA = 1, + ScenarioB = 2, + ScenarioG = 3, + ScenarioC = 4, +}; + +struct GsMode { + GsScenario mode : 3; + u32 cut_mode : 2; + u32 : 17; + u32 onchip : 2; +}; + +struct StreamOutControl { + u32 offset_update_done : 1; + u32 : 31; +}; + +union StreamOutConfig { + u32 raw; + struct { + u32 streamout_0_en : 1; + u32 streamout_1_en : 1; + u32 streamout_2_en : 1; + u32 streamout_3_en : 1; + u32 rast_stream : 3; + u32 : 1; + u32 rast_stream_mask : 4; + u32 : 19; + u32 use_rast_stream_mask : 1; + }; +}; + +struct StreamOutBufferConfig { + u32 stream_0_buf_en : 4; + u32 stream_1_buf_en : 4; + u32 stream_2_buf_en : 4; + u32 stream_3_buf_en : 4; +}; + +struct LsHsConfig { + u32 num_patches : 8; + u32 hs_input_control_points : 6; + u32 hs_output_control_points : 6; +}; + +enum class TessellationType : u32 { + Isoline = 0, + Triangle = 1, + Quad = 2, +}; + +enum class TessellationPartitioning : u32 { + Integer = 0, + Pow2 = 1, + FracOdd = 2, + FracEven = 3, +}; + +enum class TessellationTopology : u32 { + Point = 0, + Line = 1, + TriangleCw = 2, + TriangleCcw = 3, +}; + +struct TessellationConfig { + TessellationType type : 2; + TessellationPartitioning partitioning : 3; + TessellationTopology topology : 3; +}; + +struct TessFactorMemoryBase { + u32 base; + + u64 MemoryBase() const { + return static_cast(base) << 8; + } +}; + +struct TessFactorClamp { + float hs_max_tess; + float hs_min_tess; +}; + +} // namespace AmdGpu diff --git a/src/video_core/amdgpu/types.h b/src/video_core/amdgpu/types.h deleted file mode 100644 index 009fbbbb2..000000000 --- a/src/video_core/amdgpu/types.h +++ /dev/null @@ -1,146 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include -#include -#include "common/types.h" - -namespace AmdGpu { - -enum class FpRoundMode : u32 { - NearestEven = 0, - PlusInf = 1, - MinInf = 2, - ToZero = 3, -}; - -enum class FpDenormMode : u32 { - InOutFlush = 0, - InAllowOutFlush = 1, - InFlushOutAllow = 2, - InOutAllow = 3, -}; - -enum class TessellationType : u32 { - Isoline = 0, - Triangle = 1, - Quad = 2, -}; - -constexpr std::string_view NameOf(TessellationType type) { - switch (type) { - case TessellationType::Isoline: - return "Isoline"; - case TessellationType::Triangle: - return "Triangle"; - case TessellationType::Quad: - return "Quad"; - default: - return "Unknown"; - } -} - -enum class TessellationPartitioning : u32 { - Integer = 0, - Pow2 = 1, - FracOdd = 2, - FracEven = 3, -}; - -constexpr std::string_view NameOf(TessellationPartitioning partitioning) { - switch (partitioning) { - case TessellationPartitioning::Integer: - return "Integer"; - case TessellationPartitioning::Pow2: - return "Pow2"; - case TessellationPartitioning::FracOdd: - return "FracOdd"; - case TessellationPartitioning::FracEven: - return "FracEven"; - default: - return "Unknown"; - } -} - -enum class TessellationTopology : u32 { - Point = 0, - Line = 1, - TriangleCw = 2, - TriangleCcw = 3, -}; - -constexpr std::string_view NameOf(TessellationTopology topology) { - switch (topology) { - case TessellationTopology::Point: - return "Point"; - case TessellationTopology::Line: - return "Line"; - case TessellationTopology::TriangleCw: - return "TriangleCw"; - case TessellationTopology::TriangleCcw: - return "TriangleCcw"; - default: - return "Unknown"; - } -} - -// See `VGT_PRIMITIVE_TYPE` description in [Radeon Sea Islands 3D/Compute Register Reference Guide] -enum class PrimitiveType : u32 { - None = 0, - PointList = 1, - LineList = 2, - LineStrip = 3, - TriangleList = 4, - TriangleFan = 5, - TriangleStrip = 6, - PatchPrimitive = 9, - AdjLineList = 10, - AdjLineStrip = 11, - AdjTriangleList = 12, - AdjTriangleStrip = 13, - RectList = 17, - LineLoop = 18, - QuadList = 19, - QuadStrip = 20, - Polygon = 21, -}; - -enum class GsOutputPrimitiveType : u32 { - PointList = 0, - LineStrip = 1, - TriangleStrip = 2, -}; - -} // namespace AmdGpu - -template <> -struct fmt::formatter { - constexpr auto parse(format_parse_context& ctx) { - return ctx.begin(); - } - auto format(AmdGpu::TessellationType type, format_context& ctx) const { - return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type)); - } -}; - -template <> -struct fmt::formatter { - constexpr auto parse(format_parse_context& ctx) { - return ctx.begin(); - } - auto format(AmdGpu::TessellationPartitioning type, format_context& ctx) const { - return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type)); - } -}; - -template <> -struct fmt::formatter { - constexpr auto parse(format_parse_context& ctx) { - return ctx.begin(); - } - auto format(AmdGpu::TessellationTopology type, format_context& ctx) const { - return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type)); - } -}; diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 9674acf26..04c473f1b 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -23,7 +23,7 @@ namespace VideoCore { static constexpr size_t DataShareBufferSize = 64_KB; static constexpr size_t StagingBufferSize = 512_MB; -static constexpr size_t UboStreamBufferSize = 128_MB; +static constexpr size_t UboStreamBufferSize = 64_MB; static constexpr size_t DownloadBufferSize = 128_MB; static constexpr size_t DeviceBufferSize = 128_MB; static constexpr size_t MaxPageFaults = 1024; @@ -329,8 +329,7 @@ void BufferCache::BindIndexBuffer(u32 index_offset) { const auto& regs = liverpool->regs; // Figure out index type and size. - const bool is_index16 = - regs.index_buffer_type.index_type == AmdGpu::Liverpool::IndexType::Index16; + const bool is_index16 = regs.index_buffer_type.index_type == AmdGpu::IndexType::Index16; const vk::IndexType index_type = is_index16 ? vk::IndexType::eUint16 : vk::IndexType::eUint32; const u32 index_size = is_index16 ? sizeof(u16) : sizeof(u32); const VAddr index_address = diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 1c51f9e80..9a631b9a7 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -13,27 +13,27 @@ namespace Vulkan::LiverpoolToVK { -using DepthBuffer = Liverpool::DepthBuffer; +using DepthBuffer = AmdGpu::DepthBuffer; -vk::StencilOp StencilOp(Liverpool::StencilFunc op) { +vk::StencilOp StencilOp(AmdGpu::StencilFunc op) { switch (op) { - case Liverpool::StencilFunc::Keep: + case AmdGpu::StencilFunc::Keep: return vk::StencilOp::eKeep; - case Liverpool::StencilFunc::Zero: + case AmdGpu::StencilFunc::Zero: return vk::StencilOp::eZero; - case Liverpool::StencilFunc::ReplaceTest: + case AmdGpu::StencilFunc::ReplaceTest: return vk::StencilOp::eReplace; - case Liverpool::StencilFunc::AddClamp: + case AmdGpu::StencilFunc::AddClamp: return vk::StencilOp::eIncrementAndClamp; - case Liverpool::StencilFunc::SubClamp: + case AmdGpu::StencilFunc::SubClamp: return vk::StencilOp::eDecrementAndClamp; - case Liverpool::StencilFunc::Invert: + case AmdGpu::StencilFunc::Invert: return vk::StencilOp::eInvert; - case Liverpool::StencilFunc::AddWrap: + case AmdGpu::StencilFunc::AddWrap: return vk::StencilOp::eIncrementAndWrap; - case Liverpool::StencilFunc::SubWrap: + case AmdGpu::StencilFunc::SubWrap: return vk::StencilOp::eDecrementAndWrap; - case Liverpool::StencilFunc::ReplaceOp: + case AmdGpu::StencilFunc::ReplaceOp: return vk::StencilOp::eReplace; default: UNREACHABLE(); @@ -41,23 +41,23 @@ vk::StencilOp StencilOp(Liverpool::StencilFunc op) { } } -vk::CompareOp CompareOp(Liverpool::CompareFunc func) { +vk::CompareOp CompareOp(AmdGpu::CompareFunc func) { switch (func) { - case Liverpool::CompareFunc::Always: + case AmdGpu::CompareFunc::Always: return vk::CompareOp::eAlways; - case Liverpool::CompareFunc::Equal: + case AmdGpu::CompareFunc::Equal: return vk::CompareOp::eEqual; - case Liverpool::CompareFunc::GreaterEqual: + case AmdGpu::CompareFunc::GreaterEqual: return vk::CompareOp::eGreaterOrEqual; - case Liverpool::CompareFunc::Greater: + case AmdGpu::CompareFunc::Greater: return vk::CompareOp::eGreater; - case Liverpool::CompareFunc::LessEqual: + case AmdGpu::CompareFunc::LessEqual: return vk::CompareOp::eLessOrEqual; - case Liverpool::CompareFunc::Less: + case AmdGpu::CompareFunc::Less: return vk::CompareOp::eLess; - case Liverpool::CompareFunc::NotEqual: + case AmdGpu::CompareFunc::NotEqual: return vk::CompareOp::eNotEqual; - case Liverpool::CompareFunc::Never: + case AmdGpu::CompareFunc::Never: return vk::CompareOp::eNever; default: UNREACHABLE(); @@ -126,13 +126,13 @@ vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type) { } } -vk::PolygonMode PolygonMode(Liverpool::PolygonMode mode) { +vk::PolygonMode PolygonMode(AmdGpu::PolygonMode mode) { switch (mode) { - case Liverpool::PolygonMode::Point: + case AmdGpu::PolygonMode::Point: return vk::PolygonMode::ePoint; - case Liverpool::PolygonMode::Line: + case AmdGpu::PolygonMode::Line: return vk::PolygonMode::eLine; - case Liverpool::PolygonMode::Fill: + case AmdGpu::PolygonMode::Fill: return vk::PolygonMode::eFill; default: UNREACHABLE(); @@ -140,15 +140,15 @@ vk::PolygonMode PolygonMode(Liverpool::PolygonMode mode) { } } -vk::CullModeFlags CullMode(Liverpool::CullMode mode) { +vk::CullModeFlags CullMode(AmdGpu::CullMode mode) { switch (mode) { - case Liverpool::CullMode::None: + case AmdGpu::CullMode::None: return vk::CullModeFlagBits::eNone; - case Liverpool::CullMode::Front: + case AmdGpu::CullMode::Front: return vk::CullModeFlagBits::eFront; - case Liverpool::CullMode::Back: + case AmdGpu::CullMode::Back: return vk::CullModeFlagBits::eBack; - case Liverpool::CullMode::FrontAndBack: + case AmdGpu::CullMode::FrontAndBack: return vk::CullModeFlagBits::eFrontAndBack; default: UNREACHABLE(); @@ -156,11 +156,11 @@ vk::CullModeFlags CullMode(Liverpool::CullMode mode) { } } -vk::FrontFace FrontFace(Liverpool::FrontFace face) { +vk::FrontFace FrontFace(AmdGpu::FrontFace face) { switch (face) { - case Liverpool::FrontFace::Clockwise: + case AmdGpu::FrontFace::Clockwise: return vk::FrontFace::eClockwise; - case Liverpool::FrontFace::CounterClockwise: + case AmdGpu::FrontFace::CounterClockwise: return vk::FrontFace::eCounterClockwise; default: UNREACHABLE(); @@ -168,8 +168,8 @@ vk::FrontFace FrontFace(Liverpool::FrontFace face) { } } -vk::BlendFactor BlendFactor(Liverpool::BlendControl::BlendFactor factor) { - using BlendFactor = Liverpool::BlendControl::BlendFactor; +vk::BlendFactor BlendFactor(AmdGpu::BlendControl::BlendFactor factor) { + using BlendFactor = AmdGpu::BlendControl::BlendFactor; switch (factor) { case BlendFactor::Zero: return vk::BlendFactor::eZero; @@ -214,8 +214,8 @@ vk::BlendFactor BlendFactor(Liverpool::BlendControl::BlendFactor factor) { } } -bool IsDualSourceBlendFactor(Liverpool::BlendControl::BlendFactor factor) { - using BlendFactor = Liverpool::BlendControl::BlendFactor; +bool IsDualSourceBlendFactor(AmdGpu::BlendControl::BlendFactor factor) { + using BlendFactor = AmdGpu::BlendControl::BlendFactor; switch (factor) { case BlendFactor::Src1Color: case BlendFactor::Src1Alpha: @@ -227,8 +227,8 @@ bool IsDualSourceBlendFactor(Liverpool::BlendControl::BlendFactor factor) { } } -vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func) { - using BlendFunc = Liverpool::BlendControl::BlendFunc; +vk::BlendOp BlendOp(AmdGpu::BlendControl::BlendFunc func) { + using BlendFunc = AmdGpu::BlendControl::BlendFunc; switch (func) { case BlendFunc::Add: return vk::BlendOp::eAdd; @@ -245,8 +245,8 @@ vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func) { } } -vk::LogicOp LogicOp(Liverpool::ColorControl::LogicOp logic_op) { - using LogicOp = Liverpool::ColorControl::LogicOp; +vk::LogicOp LogicOp(AmdGpu::ColorControl::LogicOp logic_op) { + using LogicOp = AmdGpu::ColorControl::LogicOp; switch (logic_op) { case LogicOp::Clear: return vk::LogicOp::eClear; @@ -805,9 +805,9 @@ vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat return format->vk_format; } -vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color_buffer) { +vk::ClearValue ColorBufferClearValue(const AmdGpu::ColorBuffer& color_buffer) { const auto comp_swizzle = color_buffer.Swizzle(); - const auto format = color_buffer.info.format.Value(); + const auto format = AmdGpu::DataFormat(color_buffer.info.format); const auto number_type = color_buffer.GetFixedNumberFormat(); const auto& c0 = color_buffer.clear_word0; diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.h b/src/video_core/renderer_vulkan/liverpool_to_vk.h index 61b7ea0a9..2ca737ac2 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.h +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h @@ -5,36 +5,37 @@ #include #include "common/assert.h" -#include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/pixel_format.h" +#include "video_core/amdgpu/regs_color.h" +#include "video_core/amdgpu/regs_depth.h" +#include "video_core/amdgpu/regs_primitive.h" +#include "video_core/amdgpu/regs_vertex.h" #include "video_core/amdgpu/resource.h" #include "video_core/renderer_vulkan/vk_common.h" namespace Vulkan::LiverpoolToVK { -using Liverpool = AmdGpu::Liverpool; +vk::StencilOp StencilOp(AmdGpu::StencilFunc op); -vk::StencilOp StencilOp(Liverpool::StencilFunc op); - -vk::CompareOp CompareOp(Liverpool::CompareFunc func); +vk::CompareOp CompareOp(AmdGpu::CompareFunc func); bool IsPrimitiveCulled(AmdGpu::PrimitiveType type); vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type); -vk::PolygonMode PolygonMode(Liverpool::PolygonMode mode); +vk::PolygonMode PolygonMode(AmdGpu::PolygonMode mode); -vk::CullModeFlags CullMode(Liverpool::CullMode mode); +vk::CullModeFlags CullMode(AmdGpu::CullMode mode); -vk::FrontFace FrontFace(Liverpool::FrontFace mode); +vk::FrontFace FrontFace(AmdGpu::FrontFace mode); -vk::BlendFactor BlendFactor(Liverpool::BlendControl::BlendFactor factor); +vk::BlendFactor BlendFactor(AmdGpu::BlendControl::BlendFactor factor); -bool IsDualSourceBlendFactor(Liverpool::BlendControl::BlendFactor factor); +bool IsDualSourceBlendFactor(AmdGpu::BlendControl::BlendFactor factor); -vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func); +vk::BlendOp BlendOp(AmdGpu::BlendControl::BlendFunc func); -vk::LogicOp LogicOp(Liverpool::ColorControl::LogicOp logic_op); +vk::LogicOp LogicOp(AmdGpu::ColorControl::LogicOp logic_op); vk::SamplerAddressMode ClampMode(AmdGpu::ClampMode mode); @@ -63,17 +64,17 @@ std::span SurfaceFormats(); vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format); struct DepthFormatInfo { - Liverpool::DepthBuffer::ZFormat z_format; - Liverpool::DepthBuffer::StencilFormat stencil_format; + AmdGpu::DepthBuffer::ZFormat z_format; + AmdGpu::DepthBuffer::StencilFormat stencil_format; vk::Format vk_format; vk::FormatFeatureFlags2 flags; }; std::span DepthFormats(); -vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format, - Liverpool::DepthBuffer::StencilFormat stencil_format); +vk::Format DepthFormat(AmdGpu::DepthBuffer::ZFormat z_format, + AmdGpu::DepthBuffer::StencilFormat stencil_format); -vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color_buffer); +vk::ClearValue ColorBufferClearValue(const AmdGpu::ColorBuffer& color_buffer); vk::SampleCountFlagBits NumSamples(u32 num_samples, vk::SampleCountFlags supported_flags); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index f6216f54f..2b93eb7f3 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -3,6 +3,7 @@ #include +#include "shader_recompiler/info.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -31,8 +32,8 @@ ComputePipeline::ComputePipeline(const Instance& instance, Scheduler& scheduler, const auto sharp = buffer.GetSharp(*info); bindings.push_back({ .binding = binding++, - .descriptorType = buffer.IsStorage(sharp, profile) ? vk::DescriptorType::eStorageBuffer - : vk::DescriptorType::eUniformBuffer, + .descriptorType = buffer.IsStorage(sharp) ? vk::DescriptorType::eStorageBuffer + : vk::DescriptorType::eUniformBuffer, .descriptorCount = 1, .stageFlags = vk::ShaderStageFlagBits::eCompute, }); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 2c910888b..9e2ce4848 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -4,12 +4,10 @@ #include #include #include -#include #include "common/assert.h" #include "shader_recompiler/backend/spirv/emit_spirv_quad_rect.h" -#include "shader_recompiler/frontend/fetch_shader.h" -#include "video_core/amdgpu/resource.h" +#include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -118,7 +116,7 @@ GraphicsPipeline::GraphicsPipeline( .lineWidth = 1.0f, }, vk::PipelineRasterizationProvokingVertexStateCreateInfoEXT{ - .provokingVertexMode = key.provoking_vtx_last == Liverpool::ProvokingVtxLast::First + .provokingVertexMode = key.provoking_vtx_last == AmdGpu::ProvokingVtxLast::First ? vk::ProvokingVertexModeEXT::eFirstVertex : vk::ProvokingVertexModeEXT::eLastVertex, }, @@ -142,7 +140,7 @@ GraphicsPipeline::GraphicsPipeline( }; const vk::PipelineViewportDepthClipControlCreateInfoEXT clip_control = { - .negativeOneToOne = key.clip_space == Liverpool::ClipSpace::MinusWToW, + .negativeOneToOne = key.clip_space == AmdGpu::ClipSpace::MinusWToW, }; const vk::PipelineViewportStateCreateInfo viewport_info = { @@ -259,7 +257,7 @@ GraphicsPipeline::GraphicsPipeline( color_formats[i] = color_format; } - std::array color_samples; + std::array color_samples; std::ranges::transform(key.color_samples, color_samples.begin(), [&instance](u8 num_samples) { return num_samples ? LiverpoolToVK::NumSamples(num_samples, instance.GetColorSampleCounts()) : vk::SampleCountFlagBits::e1; @@ -275,16 +273,15 @@ GraphicsPipeline::GraphicsPipeline( .pNext = instance.IsMixedDepthSamplesSupported() ? &mixed_samples : nullptr, .colorAttachmentCount = key.num_color_attachments, .pColorAttachmentFormats = color_formats.data(), - .depthAttachmentFormat = key.z_format != Liverpool::DepthBuffer::ZFormat::Invalid + .depthAttachmentFormat = key.z_format != AmdGpu::DepthBuffer::ZFormat::Invalid ? depth_format : vk::Format::eUndefined, - .stencilAttachmentFormat = - key.stencil_format != Liverpool::DepthBuffer::StencilFormat::Invalid - ? depth_format - : vk::Format::eUndefined, + .stencilAttachmentFormat = key.stencil_format != AmdGpu::DepthBuffer::StencilFormat::Invalid + ? depth_format + : vk::Format::eUndefined, }; - std::array attachments; + std::array attachments; for (u32 i = 0; i < key.num_color_attachments; i++) { const auto& control = key.blend_controls[i]; @@ -335,7 +332,7 @@ GraphicsPipeline::GraphicsPipeline( // Unfortunatelly, Vulkan doesn't provide any control on blend inputs, so below we detecting // such cases and override alpha value in order to emulate HW behaviour. const auto has_alpha_masked_out = - (key.cb_shader_mask.GetMask(i) & Liverpool::ColorBufferMask::ComponentA) == 0; + (key.cb_shader_mask.GetMask(i) & AmdGpu::ColorBufferMask::ComponentA) == 0; const auto has_src_alpha_in_src_blend = src_color == vk::BlendFactor::eSrcAlpha || src_color == vk::BlendFactor::eOneMinusSrcAlpha; const auto has_src_alpha_in_dst_blend = dst_color == vk::BlendFactor::eSrcAlpha || @@ -354,7 +351,7 @@ GraphicsPipeline::GraphicsPipeline( const vk::PipelineColorBlendStateCreateInfo color_blending = { .logicOpEnable = - instance.IsLogicOpSupported() && key.logic_op != Liverpool::ColorControl::LogicOp::Copy, + instance.IsLogicOpSupported() && key.logic_op != AmdGpu::ColorControl::LogicOp::Copy, .logicOp = LiverpoolToVK::LogicOp(key.logic_op), .attachmentCount = key.num_color_attachments, .pAttachments = attachments.data(), @@ -451,9 +448,8 @@ void GraphicsPipeline::BuildDescSetLayout() { const auto sharp = buffer.GetSharp(*stage); bindings.push_back({ .binding = binding++, - .descriptorType = buffer.IsStorage(sharp, profile) - ? vk::DescriptorType::eStorageBuffer - : vk::DescriptorType::eUniformBuffer, + .descriptorType = buffer.IsStorage(sharp) ? vk::DescriptorType::eStorageBuffer + : vk::DescriptorType::eUniformBuffer, .descriptorCount = 1, .stageFlags = stage_bit, }); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 4786c43ca..8254605cb 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -6,10 +6,10 @@ #include #include -#include "common/types.h" #include "shader_recompiler/frontend/fetch_shader.h" -#include "video_core/renderer_vulkan/liverpool_to_vk.h" -#include "video_core/renderer_vulkan/vk_common.h" +#include "video_core/amdgpu/regs_color.h" +#include "video_core/amdgpu/regs_depth.h" +#include "video_core/amdgpu/regs_primitive.h" #include "video_core/renderer_vulkan/vk_pipeline_common.h" namespace VideoCore { @@ -26,8 +26,6 @@ class Instance; class Scheduler; class DescriptorHeap; -using Liverpool = AmdGpu::Liverpool; - template using VertexInputs = boost::container::static_vector; @@ -36,25 +34,25 @@ struct GraphicsPipelineKey { std::array vertex_buffer_formats; u32 patch_control_points; u32 num_color_attachments; - std::array color_buffers; - std::array blend_controls; - std::array write_masks; - Liverpool::ColorBufferMask cb_shader_mask; - Liverpool::ColorControl::LogicOp logic_op; + std::array color_buffers; + std::array blend_controls; + std::array write_masks; + AmdGpu::ColorBufferMask cb_shader_mask; + AmdGpu::ColorControl::LogicOp logic_op; u8 num_samples; u8 depth_samples; - std::array color_samples; + std::array color_samples; u32 mrt_mask; struct { - Liverpool::DepthBuffer::ZFormat z_format : 2; - Liverpool::DepthBuffer::StencilFormat stencil_format : 1; + AmdGpu::DepthBuffer::ZFormat z_format : 2; + AmdGpu::DepthBuffer::StencilFormat stencil_format : 1; u32 depth_clamp_enable : 1; }; struct { AmdGpu::PrimitiveType prim_type : 5; - Liverpool::PolygonMode polygon_mode : 2; - Liverpool::ClipSpace clip_space : 1; - Liverpool::ProvokingVtxLast provoking_vtx_last : 1; + AmdGpu::PolygonMode polygon_mode : 2; + AmdGpu::ClipSpace clip_space : 1; + AmdGpu::ProvokingVtxLast provoking_vtx_last : 1; u32 depth_clip_enable : 1; }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f81f3283c..24daf9c1c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -12,14 +12,13 @@ #include "shader_recompiler/info.h" #include "shader_recompiler/recompiler.h" #include "shader_recompiler/runtime_info.h" +#include "video_core/amdgpu/liverpool.h" +#include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" -#include "video_core/renderer_vulkan/vk_presenter.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" -extern std::unique_ptr presenter; - namespace Vulkan { using Shader::LogicalStage; @@ -36,8 +35,7 @@ constexpr static std::array DescriptorHeapSizes = { vk::DescriptorPoolSize{vk::DescriptorType::eSampler, 1024}, }; -static u32 MapOutputs(std::span outputs, - const AmdGpu::Liverpool::VsOutputControl& ctl) { +static u32 MapOutputs(std::span outputs, const AmdGpu::VsOutputControl& ctl) { u32 num_outputs = 0; if (ctl.vs_out_misc_enable) { @@ -110,10 +108,10 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS } case Stage::Hull: { BuildCommon(regs.hs_program); - info.hs_info.num_input_control_points = regs.ls_hs_config.hs_input_control_points.Value(); - info.hs_info.num_threads = regs.ls_hs_config.hs_output_control_points.Value(); + info.hs_info.num_input_control_points = regs.ls_hs_config.hs_input_control_points; + info.hs_info.num_threads = regs.ls_hs_config.hs_output_control_points; info.hs_info.tess_type = regs.tess_config.type; - info.hs_info.offchip_lds_enable = regs.hs_program.settings.rsrc2_hs.oc_lds_en.Value(); + info.hs_info.offchip_lds_enable = regs.hs_program.settings.oc_lds_en; // We need to initialize most hs_info fields after finding the V# with tess constants break; @@ -130,7 +128,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS info.vs_info.num_outputs = MapOutputs(info.vs_info.outputs, regs.vs_output_control); info.vs_info.emulate_depth_negative_one_to_one = !instance.IsDepthClipControlSupported() && - regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW; + regs.clipper_control.clip_space == AmdGpu::ClipSpace::MinusWToW; info.vs_info.tess_emulated_primitive = regs.primitive_type == AmdGpu::PrimitiveType::RectList || regs.primitive_type == AmdGpu::PrimitiveType::QuadList; @@ -157,7 +155,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS gs_info.in_vertex_data_size = regs.vgt_esgs_ring_itemsize; gs_info.out_vertex_data_size = regs.vgt_gs_vert_itemsize[0]; gs_info.mode = regs.vgt_gs_mode.mode; - const auto params_vc = Liverpool::GetParams(regs.vs_program); + const auto params_vc = AmdGpu::GetParams(regs.vs_program); gs_info.vs_copy = params_vc.code; gs_info.vs_copy_hash = params_vc.hash; DumpShader(gs_info.vs_copy, gs_info.vs_copy_hash, Shader::Stage::Vertex, 0, "copy.bin"); @@ -191,7 +189,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS const auto& ps_inputs = regs.ps_inputs; for (u32 i = 0; i < regs.num_interp; i++) { info.fs_info.inputs[i] = { - .param_index = u8(ps_inputs[i].input_offset.Value()), + .param_index = u8(ps_inputs[i].input_offset), .is_default = bool(ps_inputs[i].use_default), .is_flat = bool(ps_inputs[i].flat_shade), .default_value = u8(ps_inputs[i].default_value), @@ -327,11 +325,11 @@ bool PipelineCache::RefreshGraphicsKey() { const bool db_enabled = regs.depth_buffer.DepthValid() || regs.depth_buffer.StencilValid(); - key.z_format = regs.depth_buffer.DepthValid() ? regs.depth_buffer.z_info.format.Value() - : Liverpool::DepthBuffer::ZFormat::Invalid; + key.z_format = regs.depth_buffer.DepthValid() ? regs.depth_buffer.z_info.format + : AmdGpu::DepthBuffer::ZFormat::Invalid; key.stencil_format = regs.depth_buffer.StencilValid() - ? regs.depth_buffer.stencil_info.format.Value() - : Liverpool::DepthBuffer::StencilFormat::Invalid; + ? regs.depth_buffer.stencil_info.format + : AmdGpu::DepthBuffer::StencilFormat::Invalid; key.depth_clamp_enable = !regs.depth_render_override.disable_viewport_clamp; key.depth_clip_enable = regs.clipper_control.ZclipEnable(); key.clip_space = regs.clipper_control.clip_space; @@ -339,17 +337,17 @@ bool PipelineCache::RefreshGraphicsKey() { key.prim_type = regs.primitive_type; key.polygon_mode = regs.polygon_control.PolyMode(); key.patch_control_points = - regs.stage_enable.hs_en ? regs.ls_hs_config.hs_input_control_points.Value() : 0; + regs.stage_enable.hs_en ? regs.ls_hs_config.hs_input_control_points : 0; key.logic_op = regs.color_control.rop3; key.depth_samples = db_enabled ? regs.depth_buffer.NumSamples() : 1; key.num_samples = key.depth_samples; key.cb_shader_mask = regs.color_shader_mask; const bool skip_cb_binding = - regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable; + regs.color_control.mode == AmdGpu::ColorControl::OperationMode::Disable; // First pass to fill render target information needed by shader recompiler - for (s32 cb = 0; cb < Liverpool::NumColorBuffers && !skip_cb_binding; ++cb) { + for (s32 cb = 0; cb < AmdGpu::NUM_COLOR_BUFFERS && !skip_cb_binding; ++cb) { const auto& col_buf = regs.color_buffers[cb]; if (!col_buf || !regs.color_target_mask.GetMask(cb)) { // No attachment bound or writing to it is disabled. @@ -436,15 +434,7 @@ bool PipelineCache::RefreshGraphicsStages() { return false; } - const auto& bininfo = Liverpool::GetBinaryInfo(*pgm); - if (!bininfo.Valid()) { - LOG_WARNING(Render_Vulkan, "Invalid binary info structure!"); - key.stage_hashes[stage_out_idx] = 0; - infos[stage_out_idx] = nullptr; - return false; - } - - auto params = Liverpool::GetParams(*pgm); + const auto params = AmdGpu::GetParams(*pgm); std::optional fetch_shader_; std::tie(infos[stage_out_idx], modules[stage_out_idx], fetch_shader_, key.stage_hashes[stage_out_idx]) = @@ -463,7 +453,7 @@ bool PipelineCache::RefreshGraphicsStages() { key.num_color_attachments = std::bit_width(key.mrt_mask); switch (regs.stage_enable.raw) { - case Liverpool::ShaderStageEnable::VgtStages::EsGs: + case AmdGpu::ShaderStageEnable::VgtStages::EsGs: if (!instance.IsGeometryStageSupported()) { LOG_WARNING(Render_Vulkan, "Geometry shader stage unsupported, skipping"); return false; @@ -479,7 +469,7 @@ bool PipelineCache::RefreshGraphicsStages() { return false; } break; - case Liverpool::ShaderStageEnable::VgtStages::LsHs: + case AmdGpu::ShaderStageEnable::VgtStages::LsHs: if (!instance.IsTessellationSupported() || (regs.tess_config.type == AmdGpu::TessellationType::Isoline && !instance.IsTessellationIsolinesSupported())) { @@ -519,7 +509,7 @@ bool PipelineCache::RefreshGraphicsStages() { bool PipelineCache::RefreshComputeKey() { Shader::Backend::Bindings binding{}; const auto& cs_pgm = liverpool->GetCsRegs(); - const auto cs_params = Liverpool::GetParams(cs_pgm); + const auto cs_params = AmdGpu::GetParams(cs_pgm); std::tie(infos[0], modules[0], fetch_shader, compute_key.value) = GetProgram(Shader::Stage::Compute, LogicalStage::Compute, cs_params, binding); return true; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index e077f857c..706b99536 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -19,6 +19,10 @@ struct std::hash { } }; +namespace AmdGpu { +class Liverpool; +} + namespace Shader { struct Info; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_common.cpp b/src/video_core/renderer_vulkan/vk_pipeline_common.cpp index 96e19d6a1..ef1623a54 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_common.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_common.cpp @@ -3,13 +3,11 @@ #include -#include "shader_recompiler/info.h" -#include "video_core/buffer_cache/buffer_cache.h" +#include "shader_recompiler/resource.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_pipeline_common.h" #include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/texture_cache/texture_cache.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_common.h b/src/video_core/renderer_vulkan/vk_pipeline_common.h index 9633fc4ea..eb4e64c80 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_common.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_common.h @@ -3,15 +3,16 @@ #pragma once -#include "shader_recompiler/backend/bindings.h" -#include "shader_recompiler/info.h" #include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" #include "video_core/renderer_vulkan/vk_common.h" -#include "video_core/texture_cache/texture_cache.h" -namespace VideoCore { -class BufferCache; -} // namespace VideoCore +#include + +namespace Shader { +struct Info; +struct PushData; +} // namespace Shader namespace Vulkan { @@ -74,7 +75,7 @@ protected: vk::UniqueDescriptorSetLayout desc_layout; std::array stages{}; bool uses_push_descriptors{}; - const bool is_compute; + bool is_compute; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp index 862ce42ec..7027d62f8 100644 --- a/src/video_core/renderer_vulkan/vk_platform.cpp +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -15,6 +15,7 @@ #include #include + #include "common/assert.h" #include "common/config.h" #include "common/logging/log.h" @@ -459,4 +460,4 @@ vk::UniqueDebugUtilsMessengerEXT CreateDebugCallback(vk::Instance instance) { return std::move(messenger); } -} // namespace Vulkan \ No newline at end of file +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_presenter.cpp b/src/video_core/renderer_vulkan/vk_presenter.cpp index 3605a3542..ac6b2b994 100644 --- a/src/video_core/renderer_vulkan/vk_presenter.cpp +++ b/src/video_core/renderer_vulkan/vk_presenter.cpp @@ -146,6 +146,10 @@ Presenter::~Presenter() { ImGui::Core::Shutdown(device); } +bool Presenter::IsVideoOutSurface(const AmdGpu::ColorBuffer& color_buffer) const { + return std::ranges::find(vo_buffers_addr, color_buffer.Address()) != vo_buffers_addr.cend(); +} + void Presenter::RecreateFrame(Frame* frame, u32 width, u32 height) { const vk::Device device = instance.GetDevice(); if (frame->imgui_texture) { @@ -288,7 +292,7 @@ static vk::Format GetFrameViewFormat(const Libraries::VideoOut::PixelFormat form Frame* Presenter::PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) { - auto desc = VideoCore::TextureCache::VideoOutDesc{attribute, cpu_address}; + auto desc = VideoCore::TextureCache::ImageDesc{attribute, cpu_address}; const auto image_id = texture_cache.FindImage(desc); texture_cache.UpdateImage(image_id); diff --git a/src/video_core/renderer_vulkan/vk_presenter.h b/src/video_core/renderer_vulkan/vk_presenter.h index b0913333d..c1748e9dd 100644 --- a/src/video_core/renderer_vulkan/vk_presenter.h +++ b/src/video_core/renderer_vulkan/vk_presenter.h @@ -6,9 +6,7 @@ #include #include "core/libraries/videoout/buffer.h" -#include "imgui/imgui_config.h" #include "imgui/imgui_texture.h" -#include "video_core/amdgpu/liverpool.h" #include "video_core/renderer_vulkan/host_passes/fsr_pass.h" #include "video_core/renderer_vulkan/host_passes/pp_pass.h" #include "video_core/renderer_vulkan/vk_instance.h" @@ -82,20 +80,18 @@ public: pp_settings.hdr = enable ? 1 : 0; } - bool IsVideoOutSurface(const AmdGpu::Liverpool::ColorBuffer& color_buffer) const { - return std::ranges::find(vo_buffers_addr, color_buffer.Address()) != vo_buffers_addr.cend(); - } - VideoCore::Image& RegisterVideoOutSurface( const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) { vo_buffers_addr.emplace_back(cpu_address); - auto desc = VideoCore::TextureCache::VideoOutDesc{attribute, cpu_address}; + auto desc = VideoCore::TextureCache::ImageDesc{attribute, cpu_address}; const auto image_id = texture_cache.FindImage(desc); auto& image = texture_cache.GetImage(image_id); image.usage.vo_surface = 1u; return image; } + bool IsVideoOutSurface(const AmdGpu::ColorBuffer& color_buffer) const; + Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 7dfef59a8..a47c523e1 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -6,6 +6,7 @@ #include "core/memory.h" #include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/liverpool.h" +#include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -19,7 +20,7 @@ namespace Vulkan { -static Shader::PushData MakeUserData(const AmdGpu::Liverpool::Regs& regs) { +static Shader::PushData MakeUserData(const AmdGpu::Regs& regs) { // TODO(roamic): Add support for multiple viewports and geometry shaders when ViewportIndex // is encountered and implemented in the recompiler. Shader::PushData push_data{}; @@ -60,20 +61,18 @@ void Rasterizer::CpSync() { bool Rasterizer::FilterDraw() { const auto& regs = liverpool->regs; - // There are several cases (e.g. FCE, FMask/HTile decompression) where we don't need to do an - // actual draw hence can skip pipeline creation. - if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::EliminateFastClear) { + if (regs.color_control.mode == AmdGpu::ColorControl::OperationMode::EliminateFastClear) { // Clears the render target if FCE is launched before any draws EliminateFastClear(); return false; } - if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::FmaskDecompress) { + if (regs.color_control.mode == AmdGpu::ColorControl::OperationMode::FmaskDecompress) { // TODO: check for a valid MRT1 to promote the draw to the resolve pass. LOG_TRACE(Render_Vulkan, "FMask decompression pass skipped"); ScopedMarkerInsert("FmaskDecompress"); return false; } - if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::Resolve) { + if (regs.color_control.mode == AmdGpu::ColorControl::OperationMode::Resolve) { LOG_TRACE(Render_Vulkan, "Resolve pass"); Resolve(); return false; @@ -85,7 +84,7 @@ bool Rasterizer::FilterDraw() { } const bool cb_disabled = - regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable; + regs.color_control.mode == AmdGpu::ColorControl::OperationMode::Disable; const auto depth_copy = regs.depth_render_override.force_z_dirty && regs.depth_render_override.force_z_valid && regs.depth_buffer.DepthValid() && regs.depth_buffer.DepthWriteValid() && @@ -116,7 +115,7 @@ void Rasterizer::PrepareRenderState(const GraphicsPipeline* pipeline) { } const bool skip_cb_binding = - regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable; + regs.color_control.mode == AmdGpu::ColorControl::OperationMode::Disable; for (s32 cb = 0; cb < std::bit_width(key.mrt_mask); ++cb) { auto& [image_id, desc] = cb_descs[cb]; const auto& col_buf = regs.color_buffers[cb]; @@ -147,8 +146,8 @@ void Rasterizer::PrepareRenderState(const GraphicsPipeline* pipeline) { } } -[[nodiscard]] std::pair GetDrawOffsets( - const AmdGpu::Liverpool::Regs& regs, const Shader::Info& info, +static std::pair GetDrawOffsets( + const AmdGpu::Regs& regs, const Shader::Info& info, const std::optional& fetch_shader) { u32 vertex_offset = regs.index_offset; u32 instance_offset = 0; @@ -168,7 +167,7 @@ void Rasterizer::EliminateFastClear() { if (!col_buf || !col_buf.info.fast_clear) { return; } - VideoCore::TextureCache::RenderTargetDesc desc(col_buf, liverpool->last_cb_extent[0]); + VideoCore::TextureCache::ImageDesc desc(col_buf, liverpool->last_cb_extent[0]); const auto image_id = texture_cache.FindImage(desc); const auto& image_view = texture_cache.FindRenderTarget(image_id, desc); if (!texture_cache.IsMetaCleared(col_buf.CmaskAddress(), col_buf.view.slice_start)) { @@ -540,7 +539,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding for (u32 i = 0; i < buffer_bindings.size(); i++) { const auto& [buffer_id, vsharp, size] = buffer_bindings[i]; const auto& desc = stage.buffers[i]; - const bool is_storage = desc.IsStorage(vsharp, pipeline_cache.GetProfile()); + const bool is_storage = desc.IsStorage(vsharp); const u32 alignment = is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment(); // Buffer is not from the cache, either a special buffer or unbound. @@ -846,37 +845,27 @@ RenderState Rasterizer::BeginRendering(const GraphicsPipeline* pipeline) { } void Rasterizer::Resolve() { - // Read from MRT0, average all samples, and write to MRT1, which is one-sample const auto& mrt0_hint = liverpool->last_cb_extent[0]; const auto& mrt1_hint = liverpool->last_cb_extent[1]; - VideoCore::TextureCache::RenderTargetDesc mrt0_desc{liverpool->regs.color_buffers[0], - mrt0_hint}; - VideoCore::TextureCache::RenderTargetDesc mrt1_desc{liverpool->regs.color_buffers[1], - mrt1_hint}; + VideoCore::TextureCache::ImageDesc mrt0_desc{liverpool->regs.color_buffers[0], mrt0_hint}; + VideoCore::TextureCache::ImageDesc mrt1_desc{liverpool->regs.color_buffers[1], mrt1_hint}; auto& mrt0_image = texture_cache.GetImage(texture_cache.FindImage(mrt0_desc, true)); auto& mrt1_image = texture_cache.GetImage(texture_cache.FindImage(mrt1_desc, true)); - VideoCore::SubresourceRange mrt0_range; - mrt0_range.base.layer = liverpool->regs.color_buffers[0].view.slice_start; - mrt0_range.extent.layers = liverpool->regs.color_buffers[0].NumSlices() - mrt0_range.base.layer; - VideoCore::SubresourceRange mrt1_range; - mrt1_range.base.layer = liverpool->regs.color_buffers[1].view.slice_start; - mrt1_range.extent.layers = liverpool->regs.color_buffers[1].NumSlices() - mrt1_range.base.layer; - ScopeMarkerBegin(fmt::format("Resolve:MRT0={:#x}:MRT1={:#x}", liverpool->regs.color_buffers[0].Address(), liverpool->regs.color_buffers[1].Address())); - mrt1_image.Resolve(mrt0_image, mrt0_range, mrt1_range); + mrt1_image.Resolve(mrt0_image, mrt0_desc.view_info.range, mrt1_desc.view_info.range); ScopeMarkerEnd(); } void Rasterizer::DepthStencilCopy(bool is_depth, bool is_stencil) { auto& regs = liverpool->regs; - auto read_desc = VideoCore::TextureCache::DepthTargetDesc( + auto read_desc = VideoCore::TextureCache::ImageDesc( regs.depth_buffer, regs.depth_view, regs.depth_control, regs.depth_htile_data_base.GetAddress(), liverpool->last_db_extent, false); - auto write_desc = VideoCore::TextureCache::DepthTargetDesc( + auto write_desc = VideoCore::TextureCache::ImageDesc( regs.depth_buffer, regs.depth_view, regs.depth_control, regs.depth_htile_data_base.GetAddress(), liverpool->last_db_extent, true); @@ -904,6 +893,7 @@ void Rasterizer::DepthStencilCopy(bool is_depth, bool is_stencil) { if (is_stencil) { aspect_mask |= vk::ImageAspectFlagBits::eStencil; } + vk::ImageCopy region = { .srcSubresource = { @@ -1013,16 +1003,16 @@ void Rasterizer::UpdateViewportScissorState() const { const auto combined_scissor_value_br = [](s16 scr, s16 win, s16 gen, s16 win_offset) { return std::min({scr, s16(win + win_offset), s16(gen + win_offset)}); }; - const bool enable_offset = !regs.window_scissor.window_offset_disable.Value(); + const bool enable_offset = !regs.window_scissor.window_offset_disable; - Liverpool::Scissor scsr{}; + AmdGpu::Scissor scsr{}; scsr.top_left_x = combined_scissor_value_tl( - regs.screen_scissor.top_left_x, s16(regs.window_scissor.top_left_x.Value()), - s16(regs.generic_scissor.top_left_x.Value()), + regs.screen_scissor.top_left_x, s16(regs.window_scissor.top_left_x), + s16(regs.generic_scissor.top_left_x), enable_offset ? regs.window_offset.window_x_offset : 0); scsr.top_left_y = combined_scissor_value_tl( - regs.screen_scissor.top_left_y, s16(regs.window_scissor.top_left_y.Value()), - s16(regs.generic_scissor.top_left_y.Value()), + regs.screen_scissor.top_left_y, s16(regs.window_scissor.top_left_y), + s16(regs.generic_scissor.top_left_y), enable_offset ? regs.window_offset.window_y_offset : 0); scsr.bottom_right_x = combined_scissor_value_br( regs.screen_scissor.bottom_right_x, regs.window_scissor.bottom_right_x, @@ -1033,8 +1023,8 @@ void Rasterizer::UpdateViewportScissorState() const { regs.generic_scissor.bottom_right_y, enable_offset ? regs.window_offset.window_y_offset : 0); - boost::container::static_vector viewports; - boost::container::static_vector scissors; + boost::container::static_vector viewports; + boost::container::static_vector scissors; if (regs.polygon_control.enable_window_offset && (regs.window_offset.window_x_offset != 0 || regs.window_offset.window_y_offset != 0)) { @@ -1043,7 +1033,7 @@ void Rasterizer::UpdateViewportScissorState() const { } const auto& vp_ctl = regs.viewport_control; - for (u32 i = 0; i < Liverpool::NumViewports; i++) { + for (u32 i = 0; i < AmdGpu::NUM_VIEWPORTS; i++) { const auto& vp = regs.viewports[i]; const auto& vp_d = regs.viewport_depths[i]; if (vp.xscale == 0) { @@ -1059,7 +1049,7 @@ void Rasterizer::UpdateViewportScissorState() const { // https://gitlab.freedesktop.org/mesa/mesa/-/blob/209a0ed/src/amd/vulkan/radv_cmd_buffer.c#L3103-3109 // When the clip space is ranged [-1...1], the zoffset is centered. // By reversing the above viewport calculations, we get the following: - if (regs.clipper_control.clip_space == AmdGpu::Liverpool::ClipSpace::MinusWToW) { + if (regs.clipper_control.clip_space == AmdGpu::ClipSpace::MinusWToW) { viewport.minDepth = zoffset - zscale; viewport.maxDepth = zoffset + zscale; } else { @@ -1098,13 +1088,13 @@ void Rasterizer::UpdateViewportScissorState() const { auto vp_scsr = scsr; if (regs.mode_control.vport_scissor_enable) { vp_scsr.top_left_x = - std::max(vp_scsr.top_left_x, s16(regs.viewport_scissors[i].top_left_x.Value())); + std::max(vp_scsr.top_left_x, s16(regs.viewport_scissors[i].top_left_x)); vp_scsr.top_left_y = - std::max(vp_scsr.top_left_y, s16(regs.viewport_scissors[i].top_left_y.Value())); - vp_scsr.bottom_right_x = - std::min(vp_scsr.bottom_right_x, regs.viewport_scissors[i].bottom_right_x); - vp_scsr.bottom_right_y = - std::min(vp_scsr.bottom_right_y, regs.viewport_scissors[i].bottom_right_y); + std::max(vp_scsr.top_left_y, s16(regs.viewport_scissors[i].top_left_y)); + vp_scsr.bottom_right_x = std::min(AmdGpu::Scissor::Clamp(vp_scsr.bottom_right_x), + regs.viewport_scissors[i].bottom_right_x); + vp_scsr.bottom_right_y = std::min(AmdGpu::Scissor::Clamp(vp_scsr.bottom_right_y), + regs.viewport_scissors[i].bottom_right_y); } scissors.push_back({ .offset = {vp_scsr.top_left_x, vp_scsr.top_left_y}, @@ -1187,8 +1177,8 @@ void Rasterizer::UpdateDepthStencilState() const { const auto back = regs.depth_control.backface_enable ? regs.stencil_ref_back : regs.stencil_ref_front; dynamic_state.SetStencilReferences(front.stencil_test_val, back.stencil_test_val); - dynamic_state.SetStencilWriteMasks(!stencil_clear ? front.stencil_write_mask.Value() : 0U, - !stencil_clear ? back.stencil_write_mask.Value() : 0U); + dynamic_state.SetStencilWriteMasks(!stencil_clear ? front.stencil_write_mask : 0U, + !stencil_clear ? back.stencil_write_mask : 0U); dynamic_state.SetStencilCompareMasks(front.stencil_mask, back.stencil_mask); } } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 9ba8bfdd3..524a8f06d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -127,22 +127,21 @@ private: Common::SharedFirstMutex mapped_ranges_mutex; PipelineCache pipeline_cache; - using RenderTargetInfo = - std::pair; - std::array cb_descs; - std::pair db_desc; - boost::container::static_vector image_infos; - boost::container::static_vector buffer_infos; - boost::container::static_vector bound_images; + using RenderTargetInfo = std::pair; + std::array cb_descs; + std::pair db_desc; + boost::container::static_vector image_infos; + boost::container::static_vector buffer_infos; + boost::container::static_vector bound_images; Pipeline::DescriptorWrites set_writes; Pipeline::BufferBarriers buffer_barriers; Shader::PushData push_data; using BufferBindingInfo = std::tuple; - boost::container::static_vector buffer_bindings; - using ImageBindingInfo = std::pair; - boost::container::static_vector image_bindings; + boost::container::static_vector buffer_bindings; + using ImageBindingInfo = std::pair; + boost::container::static_vector image_bindings; bool fault_process_pending{}; bool attachment_feedback_loop{}; }; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index f1e5937fe..78286957f 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include "common/assert.h" @@ -152,20 +152,20 @@ void Scheduler::SubmitExecution(SubmitInfo& info) { }; const vk::TimelineSemaphoreSubmitInfo timeline_si = { - .waitSemaphoreValueCount = static_cast(info.wait_ticks.size()), + .waitSemaphoreValueCount = info.num_wait_semas, .pWaitSemaphoreValues = info.wait_ticks.data(), - .signalSemaphoreValueCount = static_cast(info.signal_ticks.size()), + .signalSemaphoreValueCount = info.num_signal_semas, .pSignalSemaphoreValues = info.signal_ticks.data(), }; const vk::SubmitInfo submit_info = { .pNext = &timeline_si, - .waitSemaphoreCount = static_cast(info.wait_semas.size()), + .waitSemaphoreCount = info.num_wait_semas, .pWaitSemaphores = info.wait_semas.data(), .pWaitDstStageMask = wait_stage_masks.data(), .commandBufferCount = 1U, .pCommandBuffers = ¤t_cmdbuf, - .signalSemaphoreCount = static_cast(info.signal_semas.size()), + .signalSemaphoreCount = info.num_signal_semas, .pSignalSemaphores = info.signal_semas.data(), }; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index ef0f84822..506b84159 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -1,14 +1,15 @@ -// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once #include -#include +#include +#include -#include "common/types.h" #include "common/unique_function.h" -#include "video_core/amdgpu/liverpool.h" +#include "video_core/amdgpu/regs_color.h" +#include "video_core/amdgpu/regs_primitive.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_resource_pool.h" @@ -45,20 +46,22 @@ struct RenderState { }; struct SubmitInfo { - boost::container::static_vector wait_semas; - boost::container::static_vector wait_ticks; - boost::container::static_vector signal_semas; - boost::container::static_vector signal_ticks; + std::array wait_semas; + std::array wait_ticks; + std::array signal_semas; + std::array signal_ticks; vk::Fence fence; + u32 num_wait_semas; + u32 num_signal_semas; void AddWait(vk::Semaphore semaphore, u64 tick = 1) { - wait_semas.emplace_back(semaphore); - wait_ticks.emplace_back(tick); + wait_semas[num_wait_semas] = semaphore; + wait_ticks[num_wait_semas++] = tick; } void AddSignal(vk::Semaphore semaphore, u64 tick = 1) { - signal_semas.emplace_back(semaphore); - signal_ticks.emplace_back(tick); + signal_semas[num_signal_semas] = semaphore; + signal_ticks[num_signal_semas++] = tick; } void AddSignal(vk::Fence fence) { @@ -66,9 +69,9 @@ struct SubmitInfo { } }; -using Viewports = boost::container::static_vector; -using Scissors = boost::container::static_vector; -using ColorWriteMasks = std::array; +using Viewports = boost::container::static_vector; +using Scissors = boost::container::static_vector; +using ColorWriteMasks = std::array; struct StencilOps { vk::StencilOp fail_op{}; vk::StencilOp pass_op{}; @@ -413,6 +416,7 @@ private: const Instance& instance; MasterSemaphore master_semaphore; CommandPool command_pool; + DynamicState dynamic_state; vk::CommandBuffer current_cmdbuf; std::condition_variable_any event_cv; struct PendingOp { @@ -421,7 +425,6 @@ private: }; std::queue pending_ops; RenderState render_state; - DynamicState dynamic_state; bool is_rendering = false; tracy::VkCtxScope* profiler_scope{}; }; diff --git a/src/video_core/renderer_vulkan/vk_shader_hle.cpp b/src/video_core/renderer_vulkan/vk_shader_hle.cpp index d73fdbeb1..61941892d 100644 --- a/src/video_core/renderer_vulkan/vk_shader_hle.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_hle.cpp @@ -12,8 +12,7 @@ namespace Vulkan { static constexpr u64 COPY_SHADER_HASH = 0xfefebf9f; -static bool ExecuteCopyShaderHLE(const Shader::Info& info, - const AmdGpu::Liverpool::ComputeProgram& cs_program, +static bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::ComputeProgram& cs_program, Rasterizer& rasterizer) { auto& scheduler = rasterizer.GetScheduler(); auto& buffer_cache = rasterizer.GetBufferCache(); @@ -121,8 +120,8 @@ static bool ExecuteCopyShaderHLE(const Shader::Info& info, return true; } -bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs, - const AmdGpu::Liverpool::ComputeProgram& cs_program, Rasterizer& rasterizer) { +bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Regs& regs, + const AmdGpu::ComputeProgram& cs_program, Rasterizer& rasterizer) { switch (info.pgm_hash) { case COPY_SHADER_HASH: return ExecuteCopyShaderHLE(info, cs_program, rasterizer); diff --git a/src/video_core/renderer_vulkan/vk_shader_hle.h b/src/video_core/renderer_vulkan/vk_shader_hle.h index 008de8003..393ef29cc 100644 --- a/src/video_core/renderer_vulkan/vk_shader_hle.h +++ b/src/video_core/renderer_vulkan/vk_shader_hle.h @@ -3,7 +3,10 @@ #pragma once -#include "video_core/amdgpu/liverpool.h" +namespace AmdGpu { +struct ComputeProgram; +union Regs; +} // namespace AmdGpu namespace Shader { struct Info; @@ -14,7 +17,7 @@ namespace Vulkan { class Rasterizer; /// Attempts to execute a shader using HLE if possible. -bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs, - const AmdGpu::Liverpool::ComputeProgram& cs_program, Rasterizer& rasterizer); +bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Regs& regs, + const AmdGpu::ComputeProgram& cs_program, Rasterizer& rasterizer); } // namespace Vulkan diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index 451c7757a..00b9296b7 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -11,6 +11,8 @@ #include #include +#include +#include namespace Vulkan { class Instance; diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 9d5801440..b0b272e2f 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -4,7 +4,7 @@ #include "common/assert.h" #include "core/libraries/kernel/process.h" #include "core/libraries/videoout/buffer.h" -#include "shader_recompiler/info.h" +#include "shader_recompiler/resource.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/texture_cache/image_info.h" #include "video_core/texture_cache/tile.h" @@ -54,8 +54,7 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group, UpdateSize(); } -ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, - const AmdGpu::Liverpool::CbDbExtent& hint /*= {}*/) noexcept { +ImageInfo::ImageInfo(const AmdGpu::ColorBuffer& buffer, AmdGpu::CbDbExtent hint) noexcept { props.is_tiled = buffer.IsTiled(); tile_mode = buffer.GetTileMode(); array_mode = AmdGpu::GetArrayMode(tile_mode); @@ -74,27 +73,25 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, guest_address = buffer.Address(); if (props.is_tiled) { guest_size = buffer.GetColorSliceSize() * resources.layers; - mips_layout.emplace_back(guest_size, pitch, buffer.Height(), 0); + mips_layout[0] = MipInfo(guest_size, pitch, buffer.Height(), 0); } else { std::tie(std::ignore, std::ignore, guest_size) = ImageSizeLinearAligned(pitch, size.height, num_bits, num_samples); guest_size *= resources.layers; - mips_layout.emplace_back(guest_size, pitch, size.height, 0); + mips_layout[0] = MipInfo(guest_size, pitch, size.height, 0); } alt_tile = Libraries::Kernel::sceKernelIsNeoMode() && buffer.info.alt_tile_mode; } -ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices, - VAddr htile_address, const AmdGpu::Liverpool::CbDbExtent& hint, - bool write_buffer) noexcept { +ImageInfo::ImageInfo(const AmdGpu::DepthBuffer& buffer, u32 num_slices, VAddr htile_address, + AmdGpu::CbDbExtent hint, bool write_buffer) noexcept { tile_mode = buffer.GetTileMode(); array_mode = AmdGpu::GetArrayMode(tile_mode); pixel_format = LiverpoolToVK::DepthFormat(buffer.z_info.format, buffer.stencil_info.format); type = AmdGpu::ImageType::Color2D; props.is_tiled = buffer.IsTiled(); props.is_depth = true; - props.has_stencil = - buffer.stencil_info.format != AmdGpu::Liverpool::DepthBuffer::StencilFormat::Invalid; + props.has_stencil = buffer.stencil_info.format != AmdGpu::DepthBuffer::StencilFormat::Invalid; num_samples = buffer.NumSamples(); num_bits = buffer.NumBits(); size.width = hint.Valid() ? hint.width : buffer.Pitch(); @@ -102,7 +99,7 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice size.depth = 1; pitch = buffer.Pitch(); resources.layers = num_slices; - meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0; + meta_info.htile_addr = buffer.z_info.tile_surface_enable ? htile_address : 0; stencil_addr = write_buffer ? buffer.StencilWriteAddress() : buffer.StencilAddress(); stencil_size = pitch * size.height * sizeof(u8); @@ -110,12 +107,12 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice guest_address = write_buffer ? buffer.DepthWriteAddress() : buffer.DepthAddress(); if (props.is_tiled) { guest_size = buffer.GetDepthSliceSize() * resources.layers; - mips_layout.emplace_back(guest_size, pitch, buffer.Height(), 0); + mips_layout[0] = MipInfo(guest_size, pitch, buffer.Height(), 0); } else { std::tie(std::ignore, std::ignore, guest_size) = ImageSizeLinearAligned(pitch, size.height, num_bits, num_samples); guest_size *= resources.layers; - mips_layout.emplace_back(guest_size, pitch, size.height, 0); + mips_layout[0] = MipInfo(guest_size, pitch, size.height, 0); } } @@ -154,8 +151,6 @@ bool ImageInfo::IsCompatible(const ImageInfo& info) const { } void ImageInfo::UpdateSize() { - mips_layout.clear(); - MipInfo mip_info{}; guest_size = 0; for (s32 mip = 0; mip < resources.levels; ++mip) { u32 mip_w = pitch >> mip; @@ -175,6 +170,7 @@ void ImageInfo::UpdateSize() { mip_d = std::bit_ceil(mip_d); } + auto& mip_info = mips_layout[mip]; switch (array_mode) { case AmdGpu::ArrayMode::ArrayLinearAligned: { std::tie(mip_info.pitch, mip_info.height, mip_info.size) = @@ -210,7 +206,6 @@ void ImageInfo::UpdateSize() { } mip_info.size *= mip_d * resources.layers; mip_info.offset = guest_size; - mips_layout.emplace_back(mip_info); guest_size += mip_info.size; } } @@ -229,13 +224,9 @@ s32 ImageInfo::MipOf(const ImageInfo& info) const { return -1; } - if (info.mips_layout.empty()) { - UNREACHABLE(); - } - // Find mip auto mip = -1; - for (auto m = 0; m < info.mips_layout.size(); ++m) { + for (auto m = 0; m < info.resources.levels; ++m) { const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = info.mips_layout[m]; const VAddr mip_base = info.guest_address + mip_ofs; const VAddr mip_end = mip_base + mip_size; diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h index 583b0d7fa..0da9c8bfb 100644 --- a/src/video_core/texture_cache/image_info.h +++ b/src/video_core/texture_cache/image_info.h @@ -3,16 +3,18 @@ #pragma once -#include - #include "common/types.h" -#include "video_core/amdgpu/liverpool.h" +#include "video_core/amdgpu/cb_db_extent.h" +#include "video_core/amdgpu/tiling.h" #include "video_core/renderer_vulkan/vk_common.h" #include "video_core/texture_cache/types.h" namespace AmdGpu { +struct ColorBuffer; +struct DepthBuffer; +struct Image; enum class ImageType : u64; -} +} // namespace AmdGpu namespace Libraries::VideoOut { struct BufferAttributeGroup; @@ -36,10 +38,9 @@ struct ImageProperties { struct ImageInfo { ImageInfo() = default; ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group, VAddr cpu_address) noexcept; - ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, - const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept; - ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices, VAddr htile_address, - const AmdGpu::Liverpool::CbDbExtent& hint = {}, bool write_buffer = false) noexcept; + ImageInfo(const AmdGpu::ColorBuffer& buffer, AmdGpu::CbDbExtent hint) noexcept; + ImageInfo(const AmdGpu::DepthBuffer& buffer, u32 num_slices, VAddr htile_address, + AmdGpu::CbDbExtent hint, bool write_buffer = false) noexcept; ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept; bool IsTiled() const { @@ -60,7 +61,7 @@ struct ImageInfo { VAddr cmask_addr; VAddr fmask_addr; VAddr htile_addr; - u32 htile_clear_mask = u32(-1); + s32 htile_clear_mask = -1; } meta_info{}; ImageProperties props{}; @@ -79,7 +80,7 @@ struct ImageInfo { u32 height; u32 offset; }; - boost::container::static_vector mips_layout; + std::array mips_layout; VAddr guest_address{}; u32 guest_size{}; u8 bank_swizzle{}; diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 8aa19a711..4dd9ddf60 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -2,8 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/logging/log.h" -#include "shader_recompiler/info.h" -#include "video_core/amdgpu/resource.h" +#include "shader_recompiler/resource.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/texture_cache/image.h" @@ -71,17 +70,16 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageReso } } -ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer) noexcept { - range.base.layer = col_buffer.view.slice_start; +ImageViewInfo::ImageViewInfo(const AmdGpu::ColorBuffer& col_buffer) noexcept { + range.base.layer = col_buffer.BaseSlice(); range.extent.layers = col_buffer.NumSlices() - range.base.layer; type = range.extent.layers > 1 ? AmdGpu::ImageType::Color2DArray : AmdGpu::ImageType::Color2D; format = Vulkan::LiverpoolToVK::SurfaceFormat(col_buffer.GetDataFmt(), col_buffer.GetNumberFmt()); } -ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer, - AmdGpu::Liverpool::DepthView view, - AmdGpu::Liverpool::DepthControl ctl) { +ImageViewInfo::ImageViewInfo(const AmdGpu::DepthBuffer& depth_buffer, AmdGpu::DepthView view, + AmdGpu::DepthControl ctl) { format = Vulkan::LiverpoolToVK::DepthFormat(depth_buffer.z_info.format, depth_buffer.stencil_info.format); is_storage = ctl.depth_write_enable; diff --git a/src/video_core/texture_cache/image_view.h b/src/video_core/texture_cache/image_view.h index 7bdf0ee95..34326b759 100644 --- a/src/video_core/texture_cache/image_view.h +++ b/src/video_core/texture_cache/image_view.h @@ -3,12 +3,19 @@ #pragma once -#include "shader_recompiler/info.h" -#include "video_core/amdgpu/liverpool.h" +#include "video_core/amdgpu/regs_depth.h" #include "video_core/amdgpu/resource.h" #include "video_core/renderer_vulkan/vk_common.h" #include "video_core/texture_cache/types.h" +namespace AmdGpu { +struct ColorBuffer; +} + +namespace Shader { +struct ImageResource; +} + namespace Vulkan { class Instance; class Scheduler; @@ -19,9 +26,9 @@ namespace VideoCore { struct ImageViewInfo { ImageViewInfo() = default; ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept; - ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer) noexcept; - ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer, - AmdGpu::Liverpool::DepthView view, AmdGpu::Liverpool::DepthControl ctl); + ImageViewInfo(const AmdGpu::ColorBuffer& col_buffer) noexcept; + ImageViewInfo(const AmdGpu::DepthBuffer& depth_buffer, AmdGpu::DepthView view, + AmdGpu::DepthControl ctl); AmdGpu::ImageType type = AmdGpu::ImageType::Color2D; vk::Format format = vk::Format::eR8G8B8A8Unorm; diff --git a/src/video_core/texture_cache/sampler.cpp b/src/video_core/texture_cache/sampler.cpp index 6f4f58aba..3d74793f8 100644 --- a/src/video_core/texture_cache/sampler.cpp +++ b/src/video_core/texture_cache/sampler.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include -#include "video_core/amdgpu/resource.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/texture_cache/sampler.h" @@ -10,7 +9,7 @@ namespace VideoCore { Sampler::Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sampler, - const AmdGpu::Liverpool::BorderColorBufferBase& border_color_base) { + const AmdGpu::BorderColorBuffer border_color_base) { using namespace Vulkan; const bool anisotropy_enable = instance.IsAnisotropicFilteringSupported() && (AmdGpu::IsAnisoFilter(sampler.xy_mag_filter) || diff --git a/src/video_core/texture_cache/sampler.h b/src/video_core/texture_cache/sampler.h index 28ba0f67b..459cc9db5 100644 --- a/src/video_core/texture_cache/sampler.h +++ b/src/video_core/texture_cache/sampler.h @@ -3,6 +3,7 @@ #pragma once +#include "video_core/amdgpu/regs_texture.h" #include "video_core/amdgpu/resource.h" #include "video_core/renderer_vulkan/vk_common.h" @@ -15,7 +16,7 @@ namespace VideoCore { class Sampler { public: explicit Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sampler, - const AmdGpu::Liverpool::BorderColorBufferBase& border_color_base); + const AmdGpu::BorderColorBuffer border_color_base); ~Sampler(); Sampler(const Sampler&) = delete; diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index a878352a8..c7604995a 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -6,7 +6,6 @@ #include "common/assert.h" #include "common/config.h" #include "common/debug.h" -#include "common/polyfill_thread.h" #include "common/scope_exit.h" #include "core/memory.h" #include "video_core/buffer_cache/buffer_cache.h" @@ -140,8 +139,8 @@ void TextureCache::DownloadedImagesThread(const std::stop_token& token) { DownloadedImage image; { std::unique_lock lock{downloaded_images_mutex}; - Common::CondvarWait(downloaded_images_cv, lock, token, - [this] { return !downloaded_images_queue.empty(); }); + downloaded_images_cv.wait(lock, token, + [this] { return !downloaded_images_queue.empty(); }); if (token.stop_requested()) { break; } @@ -212,7 +211,7 @@ void TextureCache::InvalidateMemoryFromGPU(VAddr address, size_t max_size) { void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) { std::scoped_lock lk{mutex}; - boost::container::small_vector deleted_images; + ImageIds deleted_images; ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); for (const ImageId id : deleted_images) { // TODO: Download image data back to host. @@ -440,7 +439,7 @@ ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) { return new_image_id; } -ImageId TextureCache::FindImage(BaseDesc& desc, bool exact_fmt) { +ImageId TextureCache::FindImage(ImageDesc& desc, bool exact_fmt) { const auto& info = desc.info; if (info.guest_address == 0) [[unlikely]] { @@ -448,7 +447,7 @@ ImageId TextureCache::FindImage(BaseDesc& desc, bool exact_fmt) { } std::scoped_lock lock{mutex}; - boost::container::small_vector image_ids; + ImageIds image_ids; ForEachImageInRegion(info.guest_address, info.guest_size, [&](ImageId image_id, Image& image) { image_ids.push_back(image_id); }); @@ -529,13 +528,12 @@ ImageId TextureCache::FindImage(BaseDesc& desc, bool exact_fmt) { } ImageId TextureCache::FindImageFromRange(VAddr address, size_t size, bool ensure_valid) { - boost::container::small_vector image_ids; + ImageIds image_ids; ForEachImageInRegion(address, size, [&](ImageId image_id, Image& image) { if (image.info.guest_address != address) { return; } - if (ensure_valid && (False(image.flags & ImageFlagBits::GpuModified) || - True(image.flags & ImageFlagBits::Dirty))) { + if (ensure_valid && !image.SafeToDownload()) { return; } image_ids.push_back(image_id); @@ -559,7 +557,7 @@ ImageId TextureCache::FindImageFromRange(VAddr address, size_t size, bool ensure return {}; } -ImageView& TextureCache::FindTexture(ImageId image_id, const BaseDesc& desc) { +ImageView& TextureCache::FindTexture(ImageId image_id, const ImageDesc& desc) { Image& image = slot_images[image_id]; if (desc.type == BindingType::Storage) { image.flags |= ImageFlagBits::GpuModified; @@ -572,7 +570,7 @@ ImageView& TextureCache::FindTexture(ImageId image_id, const BaseDesc& desc) { return image.FindView(desc.view_info); } -ImageView& TextureCache::FindRenderTarget(ImageId image_id, const BaseDesc& desc) { +ImageView& TextureCache::FindRenderTarget(ImageId image_id, const ImageDesc& desc) { Image& image = slot_images[image_id]; image.flags |= ImageFlagBits::GpuModified; if (Config::readbackLinearImages() && !image.info.props.is_tiled) { @@ -597,7 +595,7 @@ ImageView& TextureCache::FindRenderTarget(ImageId image_id, const BaseDesc& desc return image.FindView(desc.view_info, false); } -ImageView& TextureCache::FindDepthTarget(ImageId image_id, const BaseDesc& desc) { +ImageView& TextureCache::FindDepthTarget(ImageId image_id, const ImageDesc& desc) { Image& image = slot_images[image_id]; image.flags |= ImageFlagBits::GpuModified; image.usage.depth_target = 1u; @@ -662,10 +660,8 @@ void TextureCache::RefreshImage(Image& image) { image.hash = hash; } - const auto& num_layers = image.info.resources.layers; - const auto& num_mips = image.info.resources.levels; - ASSERT(num_mips == image.info.mips_layout.size()); - + const u32 num_layers = image.info.resources.layers; + const u32 num_mips = image.info.resources.levels; const bool is_gpu_modified = True(image.flags & ImageFlagBits::GpuModified); const bool is_gpu_dirty = True(image.flags & ImageFlagBits::GpuDirty); @@ -731,9 +727,8 @@ void TextureCache::RefreshImage(Image& image) { image.Upload(image_copies, buffer, offset); } -vk::Sampler TextureCache::GetSampler( - const AmdGpu::Sampler& sampler, - const AmdGpu::Liverpool::BorderColorBufferBase& border_color_base) { +vk::Sampler TextureCache::GetSampler(const AmdGpu::Sampler& sampler, + AmdGpu::BorderColorBuffer border_color_base) { const u64 hash = XXH3_64bits(&sampler, sizeof(sampler)); const auto [it, new_sampler] = samplers.try_emplace(hash, instance, sampler, border_color_base); return it->second.Handle(); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 4bedea39d..9d25069db 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -3,13 +3,17 @@ #pragma once +#include +#include +#include #include #include +#include #include #include "common/lru_cache.h" #include "common/slot_vector.h" -#include "video_core/amdgpu/resource.h" +#include "shader_recompiler/resource.h" #include "video_core/multi_level_page_table.h" #include "video_core/texture_cache/blit_helper.h" #include "video_core/texture_cache/image.h" @@ -32,8 +36,10 @@ class TextureCache { static constexpr s64 DEFAULT_CRITICAL_GC_MEMORY = 3_GB; static constexpr s64 TARGET_GC_THRESHOLD = 8_GB; + using ImageIds = boost::container::small_vector; + struct Traits { - using Entry = boost::container::small_vector; + using Entry = ImageIds; static constexpr size_t AddressSpaceBits = 40; static constexpr size_t FirstLevelBits = 10; static constexpr size_t PageBits = 20; @@ -49,44 +55,24 @@ public: VideoOut, }; - struct BaseDesc { + struct ImageDesc { ImageInfo info; ImageViewInfo view_info; BindingType type{BindingType::Texture}; - BaseDesc() = default; - BaseDesc(BindingType type_, ImageInfo info_, ImageViewInfo view_info_) noexcept - : info{std::move(info_)}, view_info{std::move(view_info_)}, type{type_} {} - }; - - struct TextureDesc : public BaseDesc { - TextureDesc() = default; - TextureDesc(const AmdGpu::Image& image, const Shader::ImageResource& desc) - : BaseDesc{desc.is_written ? BindingType::Storage : BindingType::Texture, - ImageInfo{image, desc}, ImageViewInfo{image, desc}} {} - }; - - struct RenderTargetDesc : public BaseDesc { - RenderTargetDesc() = default; - RenderTargetDesc(const AmdGpu::Liverpool::ColorBuffer& buffer, - const AmdGpu::Liverpool::CbDbExtent& hint = {}) - : BaseDesc{BindingType::RenderTarget, ImageInfo{buffer, hint}, ImageViewInfo{buffer}} {} - }; - - struct DepthTargetDesc : public BaseDesc { - DepthTargetDesc() = default; - DepthTargetDesc(const AmdGpu::Liverpool::DepthBuffer& buffer, - const AmdGpu::Liverpool::DepthView& view, - const AmdGpu::Liverpool::DepthControl& ctl, VAddr htile_address, - const AmdGpu::Liverpool::CbDbExtent& hint = {}, bool write_buffer = false) - : BaseDesc{BindingType::DepthTarget, - ImageInfo{buffer, view.NumSlices(), htile_address, hint, write_buffer}, - ImageViewInfo{buffer, view, ctl}} {} - }; - - struct VideoOutDesc : public BaseDesc { - VideoOutDesc(const Libraries::VideoOut::BufferAttributeGroup& group, VAddr cpu_address) - : BaseDesc{BindingType::VideoOut, ImageInfo{group, cpu_address}, ImageViewInfo{}} {} + ImageDesc() = default; + ImageDesc(const AmdGpu::Image& image, const Shader::ImageResource& desc) + : info{image, desc}, view_info{image, desc}, + type{desc.is_written ? BindingType::Storage : BindingType::Texture} {} + ImageDesc(const AmdGpu::ColorBuffer& buffer, AmdGpu::CbDbExtent hint) + : info{buffer, hint}, view_info{buffer}, type{BindingType::RenderTarget} {} + ImageDesc(const AmdGpu::DepthBuffer& buffer, AmdGpu::DepthView view, + AmdGpu::DepthControl ctl, VAddr htile_address, AmdGpu::CbDbExtent hint, + bool write_buffer = false) + : info{buffer, view.NumSlices(), htile_address, hint, write_buffer}, + view_info{buffer, view, ctl}, type{BindingType::DepthTarget} {} + ImageDesc(const Libraries::VideoOut::BufferAttributeGroup& group, VAddr cpu_address) + : info{group, cpu_address}, type{BindingType::VideoOut} {} }; public: @@ -111,19 +97,19 @@ public: void ProcessDownloadImages(); /// Retrieves the image handle of the image with the provided attributes. - [[nodiscard]] ImageId FindImage(BaseDesc& desc, bool exact_fmt = false); + [[nodiscard]] ImageId FindImage(ImageDesc& desc, bool exact_fmt = false); /// Retrieves image whose address matches provided [[nodiscard]] ImageId FindImageFromRange(VAddr address, size_t size, bool ensure_valid = true); /// Retrieves an image view with the properties of the specified image id. - [[nodiscard]] ImageView& FindTexture(ImageId image_id, const BaseDesc& desc); + [[nodiscard]] ImageView& FindTexture(ImageId image_id, const ImageDesc& desc); /// Retrieves the render target with specified properties - [[nodiscard]] ImageView& FindRenderTarget(ImageId image_id, const BaseDesc& desc); + [[nodiscard]] ImageView& FindRenderTarget(ImageId image_id, const ImageDesc& desc); /// Retrieves the depth target with specified properties - [[nodiscard]] ImageView& FindDepthTarget(ImageId image_id, const BaseDesc& desc); + [[nodiscard]] ImageView& FindDepthTarget(ImageId image_id, const ImageDesc& desc); /// Updates image contents if it was modified by CPU. void UpdateImage(ImageId image_id) { @@ -151,9 +137,8 @@ public: void RefreshImage(Image& image); /// Retrieves the sampler that matches the provided S# descriptor. - [[nodiscard]] vk::Sampler GetSampler( - const AmdGpu::Sampler& sampler, - const AmdGpu::Liverpool::BorderColorBufferBase& border_color_base); + [[nodiscard]] vk::Sampler GetSampler(const AmdGpu::Sampler& sampler, + AmdGpu::BorderColorBuffer border_color_base); /// Retrieves the image with the specified id. [[nodiscard]] Image& GetImage(ImageId id) { @@ -212,7 +197,7 @@ public: void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { using FuncReturn = typename std::invoke_result::type; static constexpr bool BOOL_BREAK = std::is_same_v; - boost::container::small_vector images; + ImageIds images; ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) { const auto it = page_table.find(page); if (it == nullptr) { @@ -329,7 +314,6 @@ private: Common::LeastRecentlyUsedCache lru_cache; PageTable page_table; std::mutex mutex; - struct DownloadedImage { u64 tick; VAddr device_addr; @@ -340,7 +324,6 @@ private: std::mutex downloaded_images_mutex; std::condition_variable_any downloaded_images_cv; std::jthread downloaded_images_thread; - struct MetaDataInfo { enum class Type { CMask, @@ -348,7 +331,7 @@ private: HTile, }; Type type; - u32 clear_mask{u32(-1)}; + s32 clear_mask = -1; }; tsl::robin_map surface_metas; };