Handle offsets and format overrides in fetch shaders (#3486)

Co-authored-by: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com>
2025-12-08 20:58:41 +00:00 · 2025-08-30 16:20:23 -05:00
parent ed3f9ee626
commit c26f56ab02
4 changed files with 36 additions and 24 deletions
--- a/src/shader_recompiler/frontend/fetch_shader.cpp
+++ b/src/shader_recompiler/frontend/fetch_shader.cpp
@@ -1,19 +1,12 @@
 // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later

-#include <algorithm>
 #include "common/assert.h"
 #include "shader_recompiler/frontend/decode.h"
 #include "shader_recompiler/frontend/fetch_shader.h"

 namespace Shader::Gcn {

-const u32* GetFetchShaderCode(const Info& info, u32 sgpr_base) {
-    const u32* code;
-    std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code));
-    return code;
-}
-
 /**
 * s_load_dwordx4 s[8:11], s[2:3], 0x00
 * s_load_dwordx4 s[12:15], s[2:3], 0x04
@@ -39,6 +32,19 @@ const u32* GetFetchShaderCode(const Info& info, u32 sgpr_base) {
 * We take the reverse way, extract the original input semantics from these instructions.
 **/

+static bool IsTypedBufferLoad(const Gcn::GcnInst& inst) {
+    return inst.opcode == Opcode::TBUFFER_LOAD_FORMAT_X ||
+           inst.opcode == Opcode::TBUFFER_LOAD_FORMAT_XY ||
+           inst.opcode == Opcode::TBUFFER_LOAD_FORMAT_XYZ ||
+           inst.opcode == Opcode::TBUFFER_LOAD_FORMAT_XYZW;
+}
+
+const u32* GetFetchShaderCode(const Info& info, u32 sgpr_base) {
+    const u32* code;
+    std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code));
+    return code;
+}
+
 std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info) {
    if (!info.has_fetch_shader) {
        return std::nullopt;
@@ -51,7 +57,7 @@ std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info) {

    struct VsharpLoad {
        u32 dword_offset{};
-        s32 base_sgpr{};
+        u32 base_sgpr{};
    };
    std::array<VsharpLoad, 104> loads{};

@@ -65,8 +71,7 @@ std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info) {
        }

        if (inst.inst_class == InstClass::ScalarMemRd) {
-            loads[inst.dst[0].code] =
-                VsharpLoad{inst.control.smrd.offset, static_cast<s32>(inst.src[0].code) * 2};
+            loads[inst.dst[0].code] = VsharpLoad{inst.control.smrd.offset, inst.src[0].code * 2};
            continue;
        }

@@ -88,20 +93,18 @@ std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info) {
        if (inst.inst_class == InstClass::VectorMemBufFmt) {
            // SRSRC is in units of 4 SPGRs while SBASE is in pairs of SGPRs
            const u32 base_sgpr = inst.src[2].code * 4;
-
-            // Find the load instruction that loaded the V# to the SPGR.
-            // This is so we can determine its index in the vertex table.
-            const auto it = loads[base_sgpr];
-
            auto& attrib = data.attributes.emplace_back();
            attrib.semantic = semantic_index++;
            attrib.dest_vgpr = inst.src[1].code;
            attrib.num_elements = inst.control.mubuf.count;
-            attrib.sgpr_base = it.base_sgpr;
-            attrib.dword_offset = it.dword_offset;
-
-            // Store instance id rate
+            attrib.sgpr_base = loads[base_sgpr].base_sgpr;
+            attrib.dword_offset = loads[base_sgpr].dword_offset;
+            attrib.inst_offset = inst.control.mtbuf.offset;
            attrib.instance_data = inst.src[0].code;
+            if (IsTypedBufferLoad(inst)) {
+                attrib.data_format = inst.control.mtbuf.dfmt;
+                attrib.num_format = inst.control.mtbuf.nfmt;
+            }
        }
    }

--- a/src/shader_recompiler/frontend/fetch_shader.h
+++ b/src/shader_recompiler/frontend/fetch_shader.h
@@ -23,13 +23,22 @@ struct VertexAttribute {
    u8 sgpr_base;     ///< SGPR that contains the pointer to the list of vertex V#
    u8 dword_offset;  ///< The dword offset of the V# that describes this attribute.
    u8 instance_data; ///< Indicates that the buffer will be accessed in instance rate
+    u8 inst_offset;   ///< Instruction offset applied on the formatted buffer loads
+    u8 data_format{}; ///< Data format override when typed buffer loads are used
+    u8 num_format{};  ///< Number format override when typed buffer loads are used

-    [[nodiscard]] InstanceIdType GetStepRate() const {
+    InstanceIdType GetStepRate() const {
        return static_cast<InstanceIdType>(instance_data);
    }

-    [[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Shader::Info& info) const noexcept {
-        return info.ReadUdReg<AmdGpu::Buffer>(sgpr_base, dword_offset);
+    constexpr AmdGpu::Buffer GetSharp(const Shader::Info& info) const noexcept {
+        auto buffer = info.ReadUdReg<AmdGpu::Buffer>(sgpr_base, dword_offset);
+        buffer.base_address += inst_offset;
+        if (data_format) {
+            buffer.data_format = data_format;
+            buffer.num_format = num_format;
+        }
+        return buffer;
    }

    bool operator==(const VertexAttribute& other) const {
--- a/src/shader_recompiler/frontend/translate/translate.cpp
+++ b/src/shader_recompiler/frontend/translate/translate.cpp
@@ -553,7 +553,7 @@ void Translator::EmitFetch(const GcnInst& inst) {
        IR::VectorReg dst_reg{attrib.dest_vgpr};

        // Read the V# of the attribute to figure out component number and type.
-        const auto buffer = info.ReadUdReg<AmdGpu::Buffer>(attrib.sgpr_base, attrib.dword_offset);
+        const auto buffer = attrib.GetSharp(info);
        const auto values =
            ir.CompositeConstruct(ir.GetAttribute(attr, 0), ir.GetAttribute(attr, 1),
                                  ir.GetAttribute(attr, 2), ir.GetAttribute(attr, 3));
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -385,7 +385,7 @@ void GraphicsPipeline::GetVertexInputs(
    const auto& vs_info = GetStage(Shader::LogicalStage::Vertex);
    for (const auto& attrib : fetch_shader->attributes) {
        const auto step_rate = attrib.GetStepRate();
-        const auto& buffer = attrib.GetSharp(vs_info);
+        const auto buffer = attrib.GetSharp(vs_info);
        attributes.push_back(Attribute{
            .location = attrib.semantic,
            .binding = attrib.semantic,