shader_recompiler: Replace buffer pulling with attribute divisor for instance step rates (#3238)

* shader_recompiler: Replace buffer pulling with attribute divisor for instance step rates * flatten_extended_userdata: Remove special step rate buffer handling * Review comments * spirv_emit_context: Name all instance rate attribs properly * spirv: Merge ReadConstBuffer again template function only has 1 user now * attribute: Add missing attributes * translate: Reimplement step rate instance id * Resolve validation warnings * shader_recompiler: Separate vertex inputs from LS stage, cleanup tess
2025-12-10 05:38:49 +00:00 · 2025-07-14 00:32:02 +03:00
parent b403e1be33
commit 399a725343
22 changed files with 208 additions and 274 deletions
--- a/src/shader_recompiler/frontend/fetch_shader.h
+++ b/src/shader_recompiler/frontend/fetch_shader.h
@@ -3,7 +3,6 @@

 #pragma once

-#include <ranges>
 #include <vector>
 #include "common/types.h"
 #include "shader_recompiler/info.h"
@@ -29,11 +28,6 @@ struct VertexAttribute {
        return static_cast<InstanceIdType>(instance_data);
    }

-    [[nodiscard]] bool UsesStepRates() const {
-        const auto step_rate = GetStepRate();
-        return step_rate == OverStepRate0 || step_rate == OverStepRate1;
-    }
-
    [[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Shader::Info& info) const noexcept {
        return info.ReadUdReg<AmdGpu::Buffer>(sgpr_base, dword_offset);
    }
@@ -52,12 +46,6 @@ struct FetchShaderData {
    s8 vertex_offset_sgpr = -1;   ///< SGPR of vertex offset from VADDR
    s8 instance_offset_sgpr = -1; ///< SGPR of instance offset from VADDR

-    [[nodiscard]] bool UsesStepRates() const {
-        return std::ranges::find_if(attributes, [](const VertexAttribute& attribute) {
-                   return attribute.UsesStepRates();
-               }) != attributes.end();
-    }
-
    bool operator==(const FetchShaderData& other) const {
        return attributes == other.attributes && vertex_offset_sgpr == other.vertex_offset_sgpr &&
               instance_offset_sgpr == other.instance_offset_sgpr;
--- a/src/shader_recompiler/frontend/translate/translate.cpp
+++ b/src/shader_recompiler/frontend/translate/translate.cpp
@@ -90,17 +90,40 @@ void Translator::EmitPrologue(IR::Block* first_block) {
    case LogicalStage::Vertex:
        // v0: vertex ID, always present
        ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId));
-        // v1: instance ID, step rate 0
-        if (runtime_info.num_input_vgprs > 0) {
-            ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId0));
-        }
-        // v2: instance ID, step rate 1
-        if (runtime_info.num_input_vgprs > 1) {
-            ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId1));
-        }
-        // v3: instance ID, plain
-        if (runtime_info.num_input_vgprs > 2) {
-            ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId));
+        if (info.stage == Stage::Local) {
+            // v1: rel patch ID
+            if (runtime_info.num_input_vgprs > 0) {
+                ir.SetVectorReg(dst_vreg++, ir.Imm32(0));
+            }
+            // v2: instance ID
+            if (runtime_info.num_input_vgprs > 1) {
+                ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId));
+            }
+        } else {
+            // v1: instance ID, step rate 0
+            if (runtime_info.num_input_vgprs > 0) {
+                if (runtime_info.vs_info.step_rate_0 != 0) {
+                    ir.SetVectorReg(dst_vreg++,
+                                    ir.IDiv(ir.GetAttributeU32(IR::Attribute::InstanceId),
+                                            ir.Imm32(runtime_info.vs_info.step_rate_0)));
+                } else {
+                    ir.SetVectorReg(dst_vreg++, ir.Imm32(0));
+                }
+            }
+            // v2: instance ID, step rate 1
+            if (runtime_info.num_input_vgprs > 1) {
+                if (runtime_info.vs_info.step_rate_1 != 0) {
+                    ir.SetVectorReg(dst_vreg++,
+                                    ir.IDiv(ir.GetAttributeU32(IR::Attribute::InstanceId),
+                                            ir.Imm32(runtime_info.vs_info.step_rate_1)));
+                } else {
+                    ir.SetVectorReg(dst_vreg++, ir.Imm32(0));
+                }
+            }
+            // v3: instance ID, plain
+            if (runtime_info.num_input_vgprs > 2) {
+                ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId));
+            }
        }
        break;
    case LogicalStage::Fragment:
@@ -183,10 +206,8 @@ void Translator::EmitPrologue(IR::Block* first_block) {
        switch (runtime_info.gs_info.out_primitive[0]) {
        case AmdGpu::GsOutputPrimitiveType::TriangleStrip:
            ir.SetVectorReg(IR::VectorReg::V3, ir.Imm32(2u)); // vertex 2
-            [[fallthrough]];
        case AmdGpu::GsOutputPrimitiveType::LineStrip:
            ir.SetVectorReg(IR::VectorReg::V1, ir.Imm32(1u)); // vertex 1
-            [[fallthrough]];
        default:
            ir.SetVectorReg(IR::VectorReg::V0, ir.Imm32(0u)); // vertex 0
            break;
@@ -481,11 +502,11 @@ void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_ra
 }

 void Translator::EmitFetch(const GcnInst& inst) {
-    // Read the pointer to the fetch shader assembly.
    const auto code_sgpr_base = inst.src[0].code;
+
+    // The fetch shader must be inlined to access as regular buffers, so that
+    // bounds checks can be emitted to emulate robust buffer access.
    if (!profile.supports_robust_buffer_access) {
-        // The fetch shader must be inlined to access as regular buffers, so that
-        // bounds checks can be emitted to emulate robust buffer access.
        const auto* code = GetFetchShaderCode(info, code_sgpr_base);
        GcnCodeSlice slice(code, code + std::numeric_limits<u32>::max());
        GcnDecodeContext decoder;
@@ -535,16 +556,6 @@ void Translator::EmitFetch(const GcnInst& inst) {
        for (u32 i = 0; i < 4; i++) {
            ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(swizzled, i)});
        }
-
-        // In case of programmable step rates we need to fallback to instance data pulling in
-        // shader, so VBs should be bound as regular data buffers
-        if (attrib.UsesStepRates()) {
-            info.buffers.push_back({
-                .sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4),
-                .used_types = IR::Type::F32,
-                .instance_attrib = attrib.semantic,
-            });
-        }
    }
 }