From db1eadfeb8ebbb9de6bc74cc653a7977ec7634aa Mon Sep 17 00:00:00 2001 From: Frodo Baggins Date: Thu, 12 Dec 2024 23:20:19 -0800 Subject: [PATCH] stop relying on GNMX HsConstants struct. Change runtime_info.hs_info and some regs --- src/core/libraries/gnmdriver/gnmdriver.cpp | 12 ++----- .../backend/spirv/emit_spirv.cpp | 2 +- .../backend/spirv/spirv_emit_context.cpp | 4 +-- .../ir/passes/hull_shader_transform.cpp | 28 ++++++++++----- src/shader_recompiler/runtime_info.h | 34 ++++++++++-------- src/video_core/amdgpu/liverpool.h | 35 ++++++------------- .../renderer_vulkan/vk_pipeline_cache.cpp | 10 ++---- 7 files changed, 58 insertions(+), 67 deletions(-) diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index efd7cf531..e85b8b890 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -1659,15 +1659,9 @@ s32 PS4_SYSV_ABI sceGnmSetHsShader(u32* cmdbuf, u32 size, const u32* hs_regs, u3 cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x108u, hs_regs[0], 0u); // SPI_SHADER_PGM_LO_HS cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x10au, hs_regs[2], hs_regs[3]); // SPI_SHADER_PGM_RSRC1_HS/SPI_SHADER_PGM_RSRC2_HS - // This is wrong but just stash them here for now - // Should read the tess constants buffer instead, which is bound as V#, into runtime_info. - // HsConstants member of HsProgram is used to derive TessellationDataConstantBuffer, its members - // dont correspond to real registers - cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x11cu, hs_regs[4], hs_regs[5], hs_regs[6], hs_regs[7], - hs_regs[8], hs_regs[9], hs_regs[10], hs_regs[11], hs_regs[12], - hs_regs[13]); // TODO comment - cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x286u, hs_regs[5], - hs_regs[6]); // VGT_HOS_MAX_TESS_LEVEL + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x286u, + hs_regs[5], // VGT_HOS_MAX_TESS_LEVEL + hs_regs[6]); // VGT_HOS_MIN_TESS_LEVEL cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2dbu, hs_regs[4]); // VGT_TF_PARAM cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2d6u, param4); // VGT_LS_HS_CONFIG diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 71747e96b..2faa0d02f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -308,7 +308,7 @@ void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) { execution_model = spv::ExecutionModel::TessellationControl; ctx.AddCapability(spv::Capability::Tessellation); ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, - ctx.runtime_info.hs_info.output_control_points); + ctx.runtime_info.hs_info.NumOutputControlPoints()); break; case LogicalStage::TessellationEval: { execution_model = spv::ExecutionModel::TessellationEvaluation; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 2e0183cb0..881090316 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -491,8 +491,8 @@ void EmitContext::DefineOutputs() { if (num_attrs > 0) { const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))}; // The input vertex count isn't statically known, so make length 32 (what glslang does) - const Id patch_array_type{ - TypeArray(per_vertex_type, ConstU32(runtime_info.hs_info.output_control_points))}; + const Id patch_array_type{TypeArray( + per_vertex_type, ConstU32(runtime_info.hs_info.NumOutputControlPoints()))}; output_attr_array = DefineOutput(patch_array_type, 0); Name(output_attr_array, "out_attrs"); } diff --git a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp index e8cc14341..f31599055 100644 --- a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp +++ b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp @@ -336,7 +336,7 @@ static IR::F32 ReadTessInputComponent(IR::U32 addr, const u32 stride, IR::IREmit } // namespace void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) { - Info& info = program.info; + const Info& info = program.info; for (IR::Block* block : program.blocks) { for (IR::Inst& inst : block->Instructions()) { @@ -366,19 +366,31 @@ void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) { const IR::Value data = inst.Arg(2); auto get_factor_attr = [&](u32 gcn_factor_idx) -> IR::Patch { - ASSERT(gcn_factor_idx * 4 < runtime_info.hs_info.tess_factor_stride); - - switch (runtime_info.hs_info.tess_factor_stride) { - case 24: + // The hull outputs tess factors in different formats depending on the shader. + // For triangle domains, it seems to pack the entries into 4 consecutive floats, + // with the 3 edge factors followed by the 1 interior factor. + // For quads, it does the expected 4 edge factors then 2 interior. + // There is a tess factor stride member of the GNMX hull constants struct in + // a hull program shader binary archive, but this doesn't seem to be + // communicated to the driver. The fixed function tessellator would need to know + // this somehow. It's probably implied by the type of the abstract domain. If + // this is causing problems, good idea to check the hs_regs argument to + // sceGnmSetHsShader. The memory containing the tess factor stride probably + // follows the memory for hs_regs if the app is providing a pointer into the + // program they loaded from disk + switch (runtime_info.hs_info.tess_type) { + case AmdGpu::TessellationType::Quad: + ASSERT(gcn_factor_idx < 6); return IR::PatchFactor(gcn_factor_idx); - case 16: + case AmdGpu::TessellationType::Triangle: + ASSERT(gcn_factor_idx < 4); if (gcn_factor_idx == 3) { return IR::Patch::TessellationLodInteriorU; } return IR::PatchFactor(gcn_factor_idx); - default: - UNREACHABLE_MSG("Unhandled tess factor stride"); + // TODO point domain types haven't been seen so far + UNREACHABLE_MSG("Unhandled tess type"); } }; diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index e0f10d4c0..ea1e1faa9 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -104,36 +104,40 @@ struct VertexRuntimeInfo { struct HullRuntimeInfo { // from registers - u32 output_control_points; - - // from HullStateConstants in HsProgram (TODO dont rely on this) - u32 tess_factor_stride; + u32 num_input_control_points; + u32 num_threads; + AmdGpu::TessellationType tess_type; // from tess constants buffer u32 ls_stride; u32 hs_output_cp_stride; - u32 hs_num_patch; u32 hs_output_base; - u32 patch_const_size; - u32 patch_const_base; - u32 patch_output_size; - u32 first_edge_tess_factor_index; auto operator<=>(const HullRuntimeInfo&) const noexcept = default; + // It might be possible for a non-passthrough TCS to have these conditions, in some + // dumb situation. + // In that case, it should be fine to assume passthrough and declare some extra + // output control points and attributes that shouldnt be read by the TES anyways + // + // TODO now having patch control points dynamic state is kind of pointless if we depend on + // num_input_control_points bool IsPassthrough() const { - return hs_output_base == 0; + return hs_output_base == 0 && num_threads == 1; }; + // regs.ls_hs_config.hs_output_control_points contains the number of threads, which + // isn't exactly the number of output control points. + // For passthrough shaders, the register field is set to 1, so use the number of + // input control points + u32 NumOutputControlPoints() const { + return IsPassthrough() ? num_input_control_points : num_threads; + } + void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) { ls_stride = tess_constants.m_lsStride; hs_output_cp_stride = tess_constants.m_hsCpStride; - hs_num_patch = tess_constants.m_hsNumPatch; hs_output_base = tess_constants.m_hsOutputBase; - patch_const_size = tess_constants.m_patchConstSize; - patch_const_base = tess_constants.m_patchConstBase; - patch_output_size = tess_constants.m_patchOutputSize; - first_edge_tess_factor_index = tess_constants.m_firstEdgeTessFactorIndex; } }; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 908a4c73b..8acce8539 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -143,20 +143,11 @@ struct Liverpool { } }; - struct HsStageRegisters { - u32 vgt_tf_param; - u32 vgt_hos_max_tess_level; - u32 vgt_hos_min_tess_level; - }; - - struct HsConstants { - u32 num_input_cp; - u32 num_output_cp; - u32 num_patch_const; - u32 cp_stride; - u32 num_threads; - u32 tess_factor_stride; - u32 first_edge_tess_factor_index; + struct HsTessFactorClamp { + // I've only seen min=0.0, max=1.0 so far. + // TODO why is max set to 1.0? Makes no sense + float hs_max_tess; + float hs_min_tess; }; struct ComputeProgram { @@ -1102,9 +1093,6 @@ struct Liverpool { }; union TessFactorMemoryBase { - // TODO: was going to use this to check against UD used in tcs shader - // but only seen set to 0 - // Remove this and other added regs if they end up unused u32 base; u64 MemoryBase() const { @@ -1162,11 +1150,7 @@ struct Liverpool { ShaderProgram es_program; INSERT_PADDING_WORDS(0x2C); ShaderProgram hs_program; - // TODO delete. These don't actually correspond to real registers, but I'll stash them - // here to debug - HsStageRegisters hs_registers; - HsConstants hs_constants; - INSERT_PADDING_WORDS(0x2D48 - 0x2d08 - 20 - 3 - 7); + INSERT_PADDING_WORDS(0x2D48 - 0x2d08 - 20); ShaderProgram ls_program; INSERT_PADDING_WORDS(0xA4); ComputeProgram cs_program; @@ -1233,7 +1217,9 @@ struct Liverpool { PolygonControl polygon_control; ViewportControl viewport_control; VsOutputControl vs_output_control; - INSERT_PADDING_WORDS(0xA290 - 0xA207 - 1); + INSERT_PADDING_WORDS(0xA287 - 0xA207 - 1); + HsTessFactorClamp hs_clamp; + INSERT_PADDING_WORDS(0xA290 - 0xA287 - 2); GsMode vgt_gs_mode; INSERT_PADDING_WORDS(1); ModeControl mode_control; @@ -1453,8 +1439,6 @@ static_assert(GFX6_3D_REG_INDEX(vs_program.user_data) == 0x2C4C); static_assert(GFX6_3D_REG_INDEX(gs_program) == 0x2C88); static_assert(GFX6_3D_REG_INDEX(es_program) == 0x2CC8); static_assert(GFX6_3D_REG_INDEX(hs_program) == 0x2D08); -static_assert(GFX6_3D_REG_INDEX(hs_registers) == 0x2D1C); -static_assert(GFX6_3D_REG_INDEX(hs_constants) == 0x2D1F); static_assert(GFX6_3D_REG_INDEX(ls_program) == 0x2D48); static_assert(GFX6_3D_REG_INDEX(cs_program) == 0x2E00); static_assert(GFX6_3D_REG_INDEX(cs_program.dim_z) == 0x2E03); @@ -1493,6 +1477,7 @@ static_assert(GFX6_3D_REG_INDEX(color_control) == 0xA202); static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204); static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206); static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207); +static_assert(GFX6_3D_REG_INDEX(hs_clamp) == 0xA287); static_assert(GFX6_3D_REG_INDEX(vgt_gs_mode) == 0xA290); static_assert(GFX6_3D_REG_INDEX(mode_control) == 0xA292); static_assert(GFX6_3D_REG_INDEX(vgt_gs_out_prim_type) == 0xA29B); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index c3675e93a..90ca3f7b4 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -107,13 +107,9 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_ } case Stage::Hull: { BuildCommon(regs.hs_program); - // TODO: ls_hs_config.output_control_points seems to be == 1 when doing passthrough - // instead of the real number which matches the input patch topology - // info.hs_info.output_control_points = regs.ls_hs_config.hs_output_control_points.Value(); - - // TODO dont rely on HullStateConstants - info.hs_info.output_control_points = regs.hs_constants.num_output_cp; - info.hs_info.tess_factor_stride = regs.hs_constants.tess_factor_stride; + info.hs_info.num_input_control_points = regs.ls_hs_config.hs_input_control_points.Value(); + info.hs_info.num_threads = regs.ls_hs_config.hs_output_control_points.Value(); + info.hs_info.tess_type = regs.tess_config.type; // We need to initialize most hs_info fields after finding the V# with tess constants break;