From d1988dbf9a9145131a4874e77446ee9ddddc6adb Mon Sep 17 00:00:00 2001 From: Frodo Baggins Date: Thu, 12 Dec 2024 14:59:45 -0800 Subject: [PATCH] get rid of GetAttributes for special tess constants reads. Immediately replace some upon seeing readconstbuffer. Gets rid of some extra passes over IR --- .../spirv/emit_spirv_context_get_set.cpp | 21 ++- src/shader_recompiler/frontend/tessellation.h | 13 ++ .../frontend/translate/translate.cpp | 18 +- src/shader_recompiler/ir/attribute.cpp | 20 -- src/shader_recompiler/ir/attribute.h | 15 +- .../ir/passes/hull_shader_transform.cpp | 176 +++++------------- src/shader_recompiler/ir/passes/ir_passes.h | 1 - src/shader_recompiler/recompiler.cpp | 4 - 8 files changed, 95 insertions(+), 173 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index e811fc254..ffaf2a637 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -302,7 +302,6 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) { return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1[1], ctx.front_facing), ctx.u32_one_value, ctx.u32_zero_value); case IR::Attribute::PrimitiveId: - case IR::Attribute::TessPatchIdInVgt: // TODO see why this isnt DCEd return ctx.OpLoad(ctx.U32[1], ctx.primitive_id); case IR::Attribute::InvocationId: ASSERT(ctx.info.l_stage == LogicalStage::Geometry || @@ -311,10 +310,22 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) { case IR::Attribute::PatchVertices: ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl); return ctx.OpLoad(ctx.U32[1], ctx.patch_vertices); - case IR::Attribute::PackedHullInvocationInfo: - // TODO figure out what to do with this - // should be dead code, but otherwise return 0 or concat PrimitiveId and InvocationId - return ctx.u32_zero_value; + case IR::Attribute::PackedHullInvocationInfo: { + ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl); + // [0:8]: patch id within VGT + // [8:12]: output control point id + // But 0:8 should be treated as 0 for attribute addressing purposes + if (ctx.runtime_info.hs_info.IsPassthrough()) { + // Gcn shader would run with 1 thread, but we need to run a thread for + // each output control point. + // If Gcn shader uses this value, we should make sure all threads in the + // Vulkan shader use 0 + return ctx.ConstU32(0u); + } else { + const Id invocation_id = ctx.OpLoad(ctx.U32[1], ctx.invocation_id); + return ctx.OpShiftLeftLogical(ctx.U32[1], invocation_id, ctx.ConstU32(8u)); + } + } default: UNREACHABLE_MSG("Read U32 attribute {}", attr); } diff --git a/src/shader_recompiler/frontend/tessellation.h b/src/shader_recompiler/frontend/tessellation.h index 97e298486..2125541dc 100644 --- a/src/shader_recompiler/frontend/tessellation.h +++ b/src/shader_recompiler/frontend/tessellation.h @@ -21,4 +21,17 @@ struct TessellationDataConstantBuffer { u32 m_firstEdgeTessFactorIndex; }; +// TODO comment +enum class TessConstantAttribute : u32 { + LsStride, + HsCpStride, + HsNumPatch, + HsOutputBase, + PatchConstSize, + PatchConstBase, + PatchOutputSize, + OffChipTessellationFactorThreshold, + FirstEdgeTessFactorIndex, +}; + } // namespace Shader \ No newline at end of file diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 897533c3a..3d49606b8 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -124,6 +124,9 @@ void Translator::EmitPrologue() { } break; case LogicalStage::TessellationControl: { + // Should be laid out like: + // [0:8]: patch id within VGT + // [8:12]: output control point id ir.SetVectorReg(IR::VectorReg::V1, ir.GetAttributeU32(IR::Attribute::PackedHullInvocationInfo)); // TODO need PrimitiveId also like TES? @@ -134,10 +137,17 @@ void Translator::EmitPrologue() { ir.GetAttribute(IR::Attribute::TessellationEvaluationPointU)); ir.SetVectorReg(IR::VectorReg::V1, ir.GetAttribute(IR::Attribute::TessellationEvaluationPointV)); - // I think V2 is actually the patch id within the patches running on the local CU, used in - // compiler generated address calcs, - // and V3 is the patch id within the draw - ir.SetVectorReg(IR::VectorReg::V2, ir.GetAttributeU32(IR::Attribute::TessPatchIdInVgt)); + // V2 is similar to PrimitiveID but not the same. It seems to only be used in + // compiler-generated address calculations. Its probably the patch id within the + // patches running locally on a given VGT (or CU, whichever is the granularity of LDS + // memory). So it is probably equal to PrimitiveID % #patches_per_VGT (or per CU). + // We should be able to safely set V2 to 0, along with other special values read from the + // tess constants buffer, since in the recompiled Vulkan shaders a thread can only + // read/write control points and patch const attributes within the local patch. This (V2) + // and other special values of TessellationDataConstantBuffer are (probably) just an + // implementation detail from the ps4 shader compiler and only used for addressing. + ir.SetVectorReg(IR::VectorReg::V2, ir.Imm32(0u)); + // V3 is the actual PrimitiveID as intended by the shader author. ir.SetVectorReg(IR::VectorReg::V3, ir.GetAttributeU32(IR::Attribute::PrimitiveId)); break; case LogicalStage::Compute: diff --git a/src/shader_recompiler/ir/attribute.cpp b/src/shader_recompiler/ir/attribute.cpp index e297bda6e..6a267e21b 100644 --- a/src/shader_recompiler/ir/attribute.cpp +++ b/src/shader_recompiler/ir/attribute.cpp @@ -126,26 +126,6 @@ std::string NameOf(Attribute attribute) { return "TessellationEvaluationPointV"; case Attribute::PackedHullInvocationInfo: return "PackedHullInvocationInfo"; - case Attribute::TcsLsStride: - return "TcsLsStride"; - case Attribute::TcsCpStride: - return "TcsCpStride"; - case Attribute::TcsNumPatches: - return "TcsNumPatches"; - case Attribute::TcsOutputBase: - return "TcsOutputBase"; - case Attribute::TcsPatchConstSize: - return "TcsPatchConstSize"; - case Attribute::TcsPatchConstBase: - return "TcsPatchConstBase"; - case Attribute::TcsPatchOutputSize: - return "TcsPatchOutputSize"; - case Attribute::TcsOffChipTessellationFactorThreshold: - return "TcsOffChipTessellationFactorThreshold"; - case Attribute::TcsFirstEdgeTessFactorIndex: - return "TcsFirstEdgeTessFactorIndex"; - case Attribute::TessPatchIdInVgt: - return "TessPatchIdInVgt"; default: break; } diff --git a/src/shader_recompiler/ir/attribute.h b/src/shader_recompiler/ir/attribute.h index b8d77d45c..bcb2b44a9 100644 --- a/src/shader_recompiler/ir/attribute.h +++ b/src/shader_recompiler/ir/attribute.h @@ -78,20 +78,7 @@ enum class Attribute : u64 { PatchVertices = 81, TessellationEvaluationPointU = 82, TessellationEvaluationPointV = 83, - PackedHullInvocationInfo = - 84, // PrimitiveId (patch id) and InvocationId (output control point id) - // Probably don't need all these. - // Most should be dead after hull shader transform - TcsLsStride = 85, - TcsCpStride = 86, - TcsNumPatches = 87, - TcsOutputBase = 88, - TcsPatchConstSize = 89, - TcsPatchConstBase = 90, - TcsPatchOutputSize = 91, - TcsOffChipTessellationFactorThreshold = 92, - TcsFirstEdgeTessFactorIndex = 93, - TessPatchIdInVgt = 94, + PackedHullInvocationInfo = 84, // contains patch id within the VGT and invocation ID Max, }; diff --git a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp index b45ee6db6..e8cc14341 100644 --- a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp +++ b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp @@ -154,14 +154,13 @@ std::optional FindTessConstantSharp(IR::Inst* read_const_buff // Walker that helps deduce what type of attribute a DS instruction is reading // or writing, which could be an input control point, output control point, // or per-patch constant (PatchConst). -// For certain ReadConstBuffer instructions using the tess constants V#, -// which we preprocess and transform into a named GetAttribute, we visit the users +// For certain ReadConstBuffer instructions using the tess constants V#,, we visit the users // recursively and increment a counter on the Load/WriteShared users. -// Namely TcsNumPatches (from m_hsNumPatch), TcsOutputBase (m_hsOutputBase), -// and TcsPatchConstBase (m_patchConstBase). -// In addr calculations, the term TcsNumPatches * ls_stride * #input_cp_in_patch +// Namely NumPatch (from m_hsNumPatch), HsOutputBase (m_hsOutputBase), +// and PatchConstBase (m_patchConstBase). +// In addr calculations, the term NumPatch * ls_stride * #input_cp_in_patch // is used as an addend to skip the region for input control points, and similarly -// TcsNumPatches * hs_cp_stride * #output_cp_in_patch is used to skip the region +// NumPatch * hs_cp_stride * #output_cp_in_patch is used to skip the region // for output control points. // The Input CP, Output CP, and PatchConst regions are laid out in that order for the // entire thread group, so seeing the TcsNumPatches attribute used in an addr calc should @@ -174,21 +173,21 @@ std::optional FindTessConstantSharp(IR::Inst* read_const_buff // and find the interval it's inside of? (phis are still a problem here) class TessConstantUseWalker { public: - void MarkTessAttributeUsers(IR::Inst* get_attribute) { + void MarkTessAttributeUsers(IR::Inst* read_const_buffer, TessConstantAttribute attr) { uint inc; - switch (get_attribute->Arg(0).Attribute()) { - case IR::Attribute::TcsNumPatches: - case IR::Attribute::TcsOutputBase: + switch (attr) { + case TessConstantAttribute::HsNumPatch: + case TessConstantAttribute::HsOutputBase: inc = 1; break; - case IR::Attribute::TcsPatchConstBase: + case TessConstantAttribute::PatchConstBase: inc = 2; break; default: - return; + UNREACHABLE(); } - for (IR::Use use : get_attribute->Uses()) { + for (IR::Use use : read_const_buffer->Uses()) { MarkTessAttributeUsersHelper(use, inc); } @@ -276,14 +275,6 @@ static bool IsDivisibleByStride(IR::Value term, u32 stride) { IR::Value a, b; if (MatchU32(stride).Match(term)) { return true; - } else if (M_GETATTRIBUTEU32(MatchAttribute(IR::Attribute::TcsLsStride), MatchU32(0)) - .Match(term) || - M_GETATTRIBUTEU32(MatchAttribute(IR::Attribute::TcsCpStride), MatchU32(0)) - .Match(term)) { - // TODO if we fold in constants earlier (Dont produce attributes, instead just emit - // constants) then this case isnt needed. Also should assert that this correct attribute is - // being used depending on stage and whether this is an input or output attribute - return true; } else if (M_BITFIELDUEXTRACT(MatchValue(a), MatchU32(0), MatchU32(24)).Match(term) || M_BITFIELDSEXTRACT(MatchValue(a), MatchU32(0), MatchU32(24)).Match(term)) { return IsDivisibleByStride(a, stride); @@ -309,7 +300,7 @@ static bool TryOptimizeAddendInModulo(IR::Value addend, u32 stride, std::vector< } } -// In calculation addr = (a + b + ...) % stride +// In calculation (a + b + ...) % stride // Use this fact // (a + b) mod N = (a mod N + b mod N) mod N // If any addend is divisible by stride, then we can replace it with 0 in the attribute @@ -517,6 +508,7 @@ void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) { // if (InvocationId == 0) { // program ... // } + // But as long as we treat invocation ID as 0 for all threads, shouldn't matter functionally } } @@ -600,8 +592,7 @@ void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info) { InitTessConstants(sharp_location->ptr_base, static_cast(sharp_location->dword_off), info, runtime_info, tess_constants); - break; // TODO - // continue; + break; // break out of switch and loop } UNREACHABLE_MSG("Failed to match tess constant sharp"); } @@ -617,10 +608,11 @@ void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info) { ASSERT(info.FoundTessConstantsSharp()); + TessConstantUseWalker walker; + for (IR::Block* block : program.blocks) { for (IR::Inst& inst : block->Instructions()) { - switch (inst.GetOpcode()) { - case IR::Opcode::ReadConstBuffer: { + if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) { auto sharp_location = FindTessConstantSharp(&inst); if (sharp_location && sharp_location->ptr_base == info.tess_consts_ptr_base && sharp_location->dword_off == info.tess_consts_dword_offset) { @@ -630,33 +622,46 @@ void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info) { ASSERT_MSG(index.IsImmediate(), "Tessellation constant read with dynamic index"); - u32 offset = index.U32(); - ASSERT(offset < static_cast(IR::Attribute::TcsFirstEdgeTessFactorIndex) - - static_cast(IR::Attribute::TcsLsStride) + 1); - IR::Attribute tess_constant_attr = static_cast( - static_cast(IR::Attribute::TcsLsStride) + offset); - IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)}; + u32 off_dw = index.U32(); + ASSERT(off_dw <= + static_cast(TessConstantAttribute::FirstEdgeTessFactorIndex)); - IR::U32 replacement; - if (tess_constant_attr == - IR::Attribute::TcsOffChipTessellationFactorThreshold) { - replacement = ir.BitCast( - ir.GetAttribute(IR::Attribute::TcsOffChipTessellationFactorThreshold)); - } else { - replacement = ir.GetAttributeU32(tess_constant_attr); + auto tess_const_attr = static_cast(off_dw); + switch (tess_const_attr) { + case TessConstantAttribute::LsStride: + // If not, we may need to make this runtime state for TES + ASSERT(info.l_stage == LogicalStage::TessellationControl); + inst.ReplaceUsesWithAndRemove(IR::Value(tess_constants.m_lsStride)); + break; + case TessConstantAttribute::HsCpStride: + inst.ReplaceUsesWithAndRemove(IR::Value(tess_constants.m_hsCpStride)); + break; + case TessConstantAttribute::HsNumPatch: + case TessConstantAttribute::HsOutputBase: + case TessConstantAttribute::PatchConstBase: + walker.MarkTessAttributeUsers(&inst, tess_const_attr); + // We should be able to safely set these to 0 so that indexing happens only + // within the local patch in the recompiled Vulkan shader. This assumes + // these values only contribute to address calculations for in/out + // attributes in the original gcn shader. + // See the explanation for why we set V2 to 0 when emitting the prologue. + inst.ReplaceUsesWithAndRemove(IR::Value(0u)); + break; + // PatchConstSize: + // PatchOutputSize: + // OffChipTessellationFactorThreshold: + // FirstEdgeTessFactorIndex: + default: + break; } - - inst.ReplaceUsesWithAndRemove(replacement); } - break; - } - - default: - break; } } } + // These pattern matching are neccessary for now unless we support dynamic indexing of + // PatchConst attributes and tess factors. PatchConst should be easy, turn those into a single + // vec4 array like in/out attrs. Not sure about tess factors. if (info.l_stage == LogicalStage::TessellationControl) { // Replace the BFEs on V1 (packed with patch id and output cp id) for easier pattern // matching @@ -669,9 +674,6 @@ void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info) { MatchU32(0), MatchU32(8)) .Match(IR::Value{&inst})) { IR::IREmitter emit(*block, it); - // IR::Value replacement = - // emit.GetAttributeU32(IR::Attribute::TessPatchIdInVgt); - // TODO should be fine but check this IR::Value replacement(0u); inst.ReplaceUsesWithAndRemove(replacement); } else if (M_BITFIELDUEXTRACT( @@ -694,82 +696,6 @@ void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info) { } } } - - TessConstantUseWalker walker; - for (IR::Block* block : program.blocks) { - for (IR::Inst& inst : block->Instructions()) { - if (inst.GetOpcode() == IR::Opcode::GetAttributeU32) { - walker.MarkTessAttributeUsers(&inst); - } - } - } - - for (IR::Block* block : program.blocks) { - for (IR::Inst& inst : block->Instructions()) { - if (inst.GetOpcode() == IR::Opcode::GetAttributeU32) { - switch (inst.Arg(0).Attribute()) { - // Should verify that these are only used in address calculations for attr - // read/write - // Replace with 0 so we can dynamically index the control points within the - // region allocated for this patch (input or output). These terms should only - // contribute to the base address of that region, so replacing with 0 *should* - // be fine - case IR::Attribute::TcsNumPatches: - case IR::Attribute::TcsOutputBase: - case IR::Attribute::TcsPatchConstBase: - case IR::Attribute::TessPatchIdInVgt: - inst.ReplaceUsesWithAndRemove(IR::Value(0u)); - break; - default: - break; - } - } - } - } -} - -void TessellationPostprocess(IR::Program& program, RuntimeInfo& runtime_info) { - Shader::Info& info = program.info; - TessellationDataConstantBuffer tess_constants; - InitTessConstants(info.tess_consts_ptr_base, info.tess_consts_dword_offset, info, runtime_info, - tess_constants); - - for (IR::Block* block : program.blocks) { - for (IR::Inst& inst : block->Instructions()) { - if (inst.GetOpcode() == IR::Opcode::GetAttributeU32) { - switch (inst.Arg(0).Attribute()) { - case IR::Attribute::TcsLsStride: - ASSERT(info.l_stage == LogicalStage::TessellationControl); - inst.ReplaceUsesWithAndRemove(IR::Value(tess_constants.m_lsStride)); - break; - case IR::Attribute::TcsCpStride: - inst.ReplaceUsesWithAndRemove(IR::Value(tess_constants.m_hsCpStride)); - break; - default: - break; - } - } - } - } - - // TODO delete - for (IR::Block* block : program.blocks) { - for (IR::Inst& inst : block->Instructions()) { - switch (inst.GetOpcode()) { - case IR::Opcode::LoadSharedU32: - case IR::Opcode::LoadSharedU64: - case IR::Opcode::LoadSharedU128: - case IR::Opcode::WriteSharedU32: - case IR::Opcode::WriteSharedU64: - case IR::Opcode::WriteSharedU128: - UNREACHABLE_MSG("Remaining DS instruction. {} transform failed", - info.l_stage == LogicalStage::TessellationControl ? "Hull" - : "Domain"); - default: - break; - } - } - } } } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h index 57fd79d55..61f43e7e4 100644 --- a/src/shader_recompiler/ir/passes/ir_passes.h +++ b/src/shader_recompiler/ir/passes/ir_passes.h @@ -21,6 +21,5 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info); void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info); void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info); -void TessellationPostprocess(IR::Program& program, RuntimeInfo& runtime_info); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index dea5498ca..b95503357 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -92,15 +92,11 @@ IR::Program TranslateProgram(std::span code, Pools& pools, Info& info dumpMatchingIR("pre_hull"); Shader::Optimization::HullShaderTransform(program, runtime_info); dumpMatchingIR("post_hull"); - Shader::Optimization::TessellationPostprocess(program, runtime_info); - dumpMatchingIR("post_hull_postprocess"); } else if (info.l_stage == LogicalStage::TessellationEval) { Shader::Optimization::TessellationPreprocess(program, runtime_info); Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); - dumpMatchingIR("pre_domain"); Shader::Optimization::DomainShaderTransform(program, runtime_info); dumpMatchingIR("post_domain"); - Shader::Optimization::TessellationPostprocess(program, runtime_info); } Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); Shader::Optimization::RingAccessElimination(program, runtime_info, stage);