stop relying on GNMX HsConstants struct. Change runtime_info.hs_info and some regs

This commit is contained in:
Frodo Baggins 2024-12-12 23:20:19 -08:00
parent d1988dbf9a
commit db1eadfeb8
7 changed files with 58 additions and 67 deletions

View File

@ -1659,15 +1659,9 @@ s32 PS4_SYSV_ABI sceGnmSetHsShader(u32* cmdbuf, u32 size, const u32* hs_regs, u3
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x108u, hs_regs[0], 0u); // SPI_SHADER_PGM_LO_HS
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x10au, hs_regs[2],
hs_regs[3]); // SPI_SHADER_PGM_RSRC1_HS/SPI_SHADER_PGM_RSRC2_HS
// This is wrong but just stash them here for now
// Should read the tess constants buffer instead, which is bound as V#, into runtime_info.
// HsConstants member of HsProgram is used to derive TessellationDataConstantBuffer, its members
// dont correspond to real registers
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x11cu, hs_regs[4], hs_regs[5], hs_regs[6], hs_regs[7],
hs_regs[8], hs_regs[9], hs_regs[10], hs_regs[11], hs_regs[12],
hs_regs[13]); // TODO comment
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x286u, hs_regs[5],
hs_regs[6]); // VGT_HOS_MAX_TESS_LEVEL
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x286u,
hs_regs[5], // VGT_HOS_MAX_TESS_LEVEL
hs_regs[6]); // VGT_HOS_MIN_TESS_LEVEL
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2dbu, hs_regs[4]); // VGT_TF_PARAM
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2d6u, param4); // VGT_LS_HS_CONFIG

View File

@ -308,7 +308,7 @@ void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) {
execution_model = spv::ExecutionModel::TessellationControl;
ctx.AddCapability(spv::Capability::Tessellation);
ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
ctx.runtime_info.hs_info.output_control_points);
ctx.runtime_info.hs_info.NumOutputControlPoints());
break;
case LogicalStage::TessellationEval: {
execution_model = spv::ExecutionModel::TessellationEvaluation;

View File

@ -491,8 +491,8 @@ void EmitContext::DefineOutputs() {
if (num_attrs > 0) {
const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
// The input vertex count isn't statically known, so make length 32 (what glslang does)
const Id patch_array_type{
TypeArray(per_vertex_type, ConstU32(runtime_info.hs_info.output_control_points))};
const Id patch_array_type{TypeArray(
per_vertex_type, ConstU32(runtime_info.hs_info.NumOutputControlPoints()))};
output_attr_array = DefineOutput(patch_array_type, 0);
Name(output_attr_array, "out_attrs");
}

View File

@ -336,7 +336,7 @@ static IR::F32 ReadTessInputComponent(IR::U32 addr, const u32 stride, IR::IREmit
} // namespace
void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
Info& info = program.info;
const Info& info = program.info;
for (IR::Block* block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
@ -366,19 +366,31 @@ void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
const IR::Value data = inst.Arg(2);
auto get_factor_attr = [&](u32 gcn_factor_idx) -> IR::Patch {
ASSERT(gcn_factor_idx * 4 < runtime_info.hs_info.tess_factor_stride);
switch (runtime_info.hs_info.tess_factor_stride) {
case 24:
// The hull outputs tess factors in different formats depending on the shader.
// For triangle domains, it seems to pack the entries into 4 consecutive floats,
// with the 3 edge factors followed by the 1 interior factor.
// For quads, it does the expected 4 edge factors then 2 interior.
// There is a tess factor stride member of the GNMX hull constants struct in
// a hull program shader binary archive, but this doesn't seem to be
// communicated to the driver. The fixed function tessellator would need to know
// this somehow. It's probably implied by the type of the abstract domain. If
// this is causing problems, good idea to check the hs_regs argument to
// sceGnmSetHsShader. The memory containing the tess factor stride probably
// follows the memory for hs_regs if the app is providing a pointer into the
// program they loaded from disk
switch (runtime_info.hs_info.tess_type) {
case AmdGpu::TessellationType::Quad:
ASSERT(gcn_factor_idx < 6);
return IR::PatchFactor(gcn_factor_idx);
case 16:
case AmdGpu::TessellationType::Triangle:
ASSERT(gcn_factor_idx < 4);
if (gcn_factor_idx == 3) {
return IR::Patch::TessellationLodInteriorU;
}
return IR::PatchFactor(gcn_factor_idx);
default:
UNREACHABLE_MSG("Unhandled tess factor stride");
// TODO point domain types haven't been seen so far
UNREACHABLE_MSG("Unhandled tess type");
}
};

View File

@ -104,36 +104,40 @@ struct VertexRuntimeInfo {
struct HullRuntimeInfo {
// from registers
u32 output_control_points;
// from HullStateConstants in HsProgram (TODO dont rely on this)
u32 tess_factor_stride;
u32 num_input_control_points;
u32 num_threads;
AmdGpu::TessellationType tess_type;
// from tess constants buffer
u32 ls_stride;
u32 hs_output_cp_stride;
u32 hs_num_patch;
u32 hs_output_base;
u32 patch_const_size;
u32 patch_const_base;
u32 patch_output_size;
u32 first_edge_tess_factor_index;
auto operator<=>(const HullRuntimeInfo&) const noexcept = default;
// It might be possible for a non-passthrough TCS to have these conditions, in some
// dumb situation.
// In that case, it should be fine to assume passthrough and declare some extra
// output control points and attributes that shouldnt be read by the TES anyways
//
// TODO now having patch control points dynamic state is kind of pointless if we depend on
// num_input_control_points
bool IsPassthrough() const {
return hs_output_base == 0;
return hs_output_base == 0 && num_threads == 1;
};
// regs.ls_hs_config.hs_output_control_points contains the number of threads, which
// isn't exactly the number of output control points.
// For passthrough shaders, the register field is set to 1, so use the number of
// input control points
u32 NumOutputControlPoints() const {
return IsPassthrough() ? num_input_control_points : num_threads;
}
void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) {
ls_stride = tess_constants.m_lsStride;
hs_output_cp_stride = tess_constants.m_hsCpStride;
hs_num_patch = tess_constants.m_hsNumPatch;
hs_output_base = tess_constants.m_hsOutputBase;
patch_const_size = tess_constants.m_patchConstSize;
patch_const_base = tess_constants.m_patchConstBase;
patch_output_size = tess_constants.m_patchOutputSize;
first_edge_tess_factor_index = tess_constants.m_firstEdgeTessFactorIndex;
}
};

View File

@ -143,20 +143,11 @@ struct Liverpool {
}
};
struct HsStageRegisters {
u32 vgt_tf_param;
u32 vgt_hos_max_tess_level;
u32 vgt_hos_min_tess_level;
};
struct HsConstants {
u32 num_input_cp;
u32 num_output_cp;
u32 num_patch_const;
u32 cp_stride;
u32 num_threads;
u32 tess_factor_stride;
u32 first_edge_tess_factor_index;
struct HsTessFactorClamp {
// I've only seen min=0.0, max=1.0 so far.
// TODO why is max set to 1.0? Makes no sense
float hs_max_tess;
float hs_min_tess;
};
struct ComputeProgram {
@ -1102,9 +1093,6 @@ struct Liverpool {
};
union TessFactorMemoryBase {
// TODO: was going to use this to check against UD used in tcs shader
// but only seen set to 0
// Remove this and other added regs if they end up unused
u32 base;
u64 MemoryBase() const {
@ -1162,11 +1150,7 @@ struct Liverpool {
ShaderProgram es_program;
INSERT_PADDING_WORDS(0x2C);
ShaderProgram hs_program;
// TODO delete. These don't actually correspond to real registers, but I'll stash them
// here to debug
HsStageRegisters hs_registers;
HsConstants hs_constants;
INSERT_PADDING_WORDS(0x2D48 - 0x2d08 - 20 - 3 - 7);
INSERT_PADDING_WORDS(0x2D48 - 0x2d08 - 20);
ShaderProgram ls_program;
INSERT_PADDING_WORDS(0xA4);
ComputeProgram cs_program;
@ -1233,7 +1217,9 @@ struct Liverpool {
PolygonControl polygon_control;
ViewportControl viewport_control;
VsOutputControl vs_output_control;
INSERT_PADDING_WORDS(0xA290 - 0xA207 - 1);
INSERT_PADDING_WORDS(0xA287 - 0xA207 - 1);
HsTessFactorClamp hs_clamp;
INSERT_PADDING_WORDS(0xA290 - 0xA287 - 2);
GsMode vgt_gs_mode;
INSERT_PADDING_WORDS(1);
ModeControl mode_control;
@ -1453,8 +1439,6 @@ static_assert(GFX6_3D_REG_INDEX(vs_program.user_data) == 0x2C4C);
static_assert(GFX6_3D_REG_INDEX(gs_program) == 0x2C88);
static_assert(GFX6_3D_REG_INDEX(es_program) == 0x2CC8);
static_assert(GFX6_3D_REG_INDEX(hs_program) == 0x2D08);
static_assert(GFX6_3D_REG_INDEX(hs_registers) == 0x2D1C);
static_assert(GFX6_3D_REG_INDEX(hs_constants) == 0x2D1F);
static_assert(GFX6_3D_REG_INDEX(ls_program) == 0x2D48);
static_assert(GFX6_3D_REG_INDEX(cs_program) == 0x2E00);
static_assert(GFX6_3D_REG_INDEX(cs_program.dim_z) == 0x2E03);
@ -1493,6 +1477,7 @@ static_assert(GFX6_3D_REG_INDEX(color_control) == 0xA202);
static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204);
static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206);
static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
static_assert(GFX6_3D_REG_INDEX(hs_clamp) == 0xA287);
static_assert(GFX6_3D_REG_INDEX(vgt_gs_mode) == 0xA290);
static_assert(GFX6_3D_REG_INDEX(mode_control) == 0xA292);
static_assert(GFX6_3D_REG_INDEX(vgt_gs_out_prim_type) == 0xA29B);

View File

@ -107,13 +107,9 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_
}
case Stage::Hull: {
BuildCommon(regs.hs_program);
// TODO: ls_hs_config.output_control_points seems to be == 1 when doing passthrough
// instead of the real number which matches the input patch topology
// info.hs_info.output_control_points = regs.ls_hs_config.hs_output_control_points.Value();
// TODO dont rely on HullStateConstants
info.hs_info.output_control_points = regs.hs_constants.num_output_cp;
info.hs_info.tess_factor_stride = regs.hs_constants.tess_factor_stride;
info.hs_info.num_input_control_points = regs.ls_hs_config.hs_input_control_points.Value();
info.hs_info.num_threads = regs.ls_hs_config.hs_output_control_points.Value();
info.hs_info.tess_type = regs.tess_config.type;
// We need to initialize most hs_info fields after finding the V# with tess constants
break;