Handle offsets and format overrides in fetch shaders (#3486)

Co-authored-by: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com>
This commit is contained in:
Stephen Miller
2025-08-30 16:20:23 -05:00
committed by GitHub
parent ed3f9ee626
commit c26f56ab02
4 changed files with 36 additions and 24 deletions

View File

@@ -1,19 +1,12 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include "common/assert.h"
#include "shader_recompiler/frontend/decode.h"
#include "shader_recompiler/frontend/fetch_shader.h"
namespace Shader::Gcn {
const u32* GetFetchShaderCode(const Info& info, u32 sgpr_base) {
const u32* code;
std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code));
return code;
}
/**
* s_load_dwordx4 s[8:11], s[2:3], 0x00
* s_load_dwordx4 s[12:15], s[2:3], 0x04
@@ -39,6 +32,19 @@ const u32* GetFetchShaderCode(const Info& info, u32 sgpr_base) {
* We take the reverse way, extract the original input semantics from these instructions.
**/
static bool IsTypedBufferLoad(const Gcn::GcnInst& inst) {
return inst.opcode == Opcode::TBUFFER_LOAD_FORMAT_X ||
inst.opcode == Opcode::TBUFFER_LOAD_FORMAT_XY ||
inst.opcode == Opcode::TBUFFER_LOAD_FORMAT_XYZ ||
inst.opcode == Opcode::TBUFFER_LOAD_FORMAT_XYZW;
}
const u32* GetFetchShaderCode(const Info& info, u32 sgpr_base) {
const u32* code;
std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code));
return code;
}
std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info) {
if (!info.has_fetch_shader) {
return std::nullopt;
@@ -51,7 +57,7 @@ std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info) {
struct VsharpLoad {
u32 dword_offset{};
s32 base_sgpr{};
u32 base_sgpr{};
};
std::array<VsharpLoad, 104> loads{};
@@ -65,8 +71,7 @@ std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info) {
}
if (inst.inst_class == InstClass::ScalarMemRd) {
loads[inst.dst[0].code] =
VsharpLoad{inst.control.smrd.offset, static_cast<s32>(inst.src[0].code) * 2};
loads[inst.dst[0].code] = VsharpLoad{inst.control.smrd.offset, inst.src[0].code * 2};
continue;
}
@@ -88,20 +93,18 @@ std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info) {
if (inst.inst_class == InstClass::VectorMemBufFmt) {
// SRSRC is in units of 4 SPGRs while SBASE is in pairs of SGPRs
const u32 base_sgpr = inst.src[2].code * 4;
// Find the load instruction that loaded the V# to the SPGR.
// This is so we can determine its index in the vertex table.
const auto it = loads[base_sgpr];
auto& attrib = data.attributes.emplace_back();
attrib.semantic = semantic_index++;
attrib.dest_vgpr = inst.src[1].code;
attrib.num_elements = inst.control.mubuf.count;
attrib.sgpr_base = it.base_sgpr;
attrib.dword_offset = it.dword_offset;
// Store instance id rate
attrib.sgpr_base = loads[base_sgpr].base_sgpr;
attrib.dword_offset = loads[base_sgpr].dword_offset;
attrib.inst_offset = inst.control.mtbuf.offset;
attrib.instance_data = inst.src[0].code;
if (IsTypedBufferLoad(inst)) {
attrib.data_format = inst.control.mtbuf.dfmt;
attrib.num_format = inst.control.mtbuf.nfmt;
}
}
}

View File

@@ -23,13 +23,22 @@ struct VertexAttribute {
u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V#
u8 dword_offset; ///< The dword offset of the V# that describes this attribute.
u8 instance_data; ///< Indicates that the buffer will be accessed in instance rate
u8 inst_offset; ///< Instruction offset applied on the formatted buffer loads
u8 data_format{}; ///< Data format override when typed buffer loads are used
u8 num_format{}; ///< Number format override when typed buffer loads are used
[[nodiscard]] InstanceIdType GetStepRate() const {
InstanceIdType GetStepRate() const {
return static_cast<InstanceIdType>(instance_data);
}
[[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Shader::Info& info) const noexcept {
return info.ReadUdReg<AmdGpu::Buffer>(sgpr_base, dword_offset);
constexpr AmdGpu::Buffer GetSharp(const Shader::Info& info) const noexcept {
auto buffer = info.ReadUdReg<AmdGpu::Buffer>(sgpr_base, dword_offset);
buffer.base_address += inst_offset;
if (data_format) {
buffer.data_format = data_format;
buffer.num_format = num_format;
}
return buffer;
}
bool operator==(const VertexAttribute& other) const {

View File

@@ -553,7 +553,7 @@ void Translator::EmitFetch(const GcnInst& inst) {
IR::VectorReg dst_reg{attrib.dest_vgpr};
// Read the V# of the attribute to figure out component number and type.
const auto buffer = info.ReadUdReg<AmdGpu::Buffer>(attrib.sgpr_base, attrib.dword_offset);
const auto buffer = attrib.GetSharp(info);
const auto values =
ir.CompositeConstruct(ir.GetAttribute(attr, 0), ir.GetAttribute(attr, 1),
ir.GetAttribute(attr, 2), ir.GetAttribute(attr, 3));

View File

@@ -385,7 +385,7 @@ void GraphicsPipeline::GetVertexInputs(
const auto& vs_info = GetStage(Shader::LogicalStage::Vertex);
for (const auto& attrib : fetch_shader->attributes) {
const auto step_rate = attrib.GetStepRate();
const auto& buffer = attrib.GetSharp(vs_info);
const auto buffer = attrib.GetSharp(vs_info);
attributes.push_back(Attribute{
.location = attrib.semantic,
.binding = attrib.semantic,