shader_recompiler: Replace buffer pulling with attribute divisor for instance step rates (#3238)

* shader_recompiler: Replace buffer pulling with attribute divisor for instance step rates

* flatten_extended_userdata: Remove special step rate buffer handling

* Review comments

* spirv_emit_context: Name all instance rate attribs properly

* spirv: Merge ReadConstBuffer again

template function only has 1 user now

* attribute: Add missing attributes

* translate: Reimplement step rate instance id

* Resolve validation warnings

* shader_recompiler: Separate vertex inputs from LS stage, cleanup tess
This commit is contained in:
TheTurtle
2025-07-14 00:32:02 +03:00
committed by GitHub
parent b403e1be33
commit 399a725343
22 changed files with 208 additions and 274 deletions

View File

@@ -100,22 +100,36 @@ std::string NameOf(Attribute attribute) {
return "Param30";
case Attribute::Param31:
return "Param31";
case Attribute::ClipDistance:
return "ClipDistanace";
case Attribute::CullDistance:
return "CullDistance";
case Attribute::RenderTargetId:
return "RenderTargetId";
case Attribute::ViewportId:
return "ViewportId";
case Attribute::VertexId:
return "VertexId";
case Attribute::InstanceId:
return "InstanceId";
case Attribute::PrimitiveId:
return "PrimitiveId";
case Attribute::FragCoord:
return "FragCoord";
case Attribute::InstanceId:
return "InstanceId";
case Attribute::IsFrontFace:
return "IsFrontFace";
case Attribute::SampleIndex:
return "SampleIndex";
case Attribute::GlobalInvocationId:
return "GlobalInvocationId";
case Attribute::WorkgroupId:
return "WorkgroupId";
case Attribute::WorkgroupIndex:
return "WorkgroupIndex";
case Attribute::LocalInvocationId:
return "LocalInvocationId";
case Attribute::LocalInvocationIndex:
return "LocalInvocationIndex";
case Attribute::FragCoord:
return "FragCoord";
case Attribute::InvocationId:
return "InvocationId";
case Attribute::PatchVertices:

View File

@@ -73,8 +73,6 @@ enum class Attribute : u64 {
LocalInvocationId = 76,
LocalInvocationIndex = 77,
FragCoord = 78,
InstanceId0 = 79, // step rate 0
InstanceId1 = 80, // step rate 1
InvocationId = 81, // TCS id in output patch and instanced geometry shader id
PatchVertices = 82,
TessellationEvaluationPointU = 83,

View File

@@ -255,8 +255,8 @@ void IREmitter::SetM0(const U32& value) {
Inst(Opcode::SetM0, value);
}
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, IR::Value index) {
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp), index);
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, u32 index) {
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp), Imm32(index));
}
U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) {

View File

@@ -81,8 +81,7 @@ public:
[[nodiscard]] U1 Condition(IR::Condition cond);
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0,
IR::Value index = IR::Value(u32(0u)));
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0, u32 index = 0);
[[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);

View File

@@ -191,7 +191,7 @@ static void VisitPointer(u32 off_dw, IR::Inst* subtree, PassInfo& pass_info,
static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {
Xbyak::CodeGenerator& c = g_srt_codegen;
if (info.srt_info.srt_reservations.empty() && pass_info.srt_roots.empty()) {
if (pass_info.srt_roots.empty()) {
return;
}
@@ -205,29 +205,7 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {
}
info.srt_info.walker_func = c.getCurr<PFN_SrtWalker>();
pass_info.dst_off_dw = NumUserDataRegs;
// Special case for V# step rate buffers in fetch shader
for (const auto [sgpr_base, dword_offset, num_dwords] : info.srt_info.srt_reservations) {
// get pointer to V#
if (sgpr_base != IR::NumScalarRegs) {
PushPtr(c, sgpr_base);
}
u32 src_off = dword_offset << 2;
for (auto j = 0; j < num_dwords; j++) {
c.mov(r11d, ptr[rdi + src_off]);
c.mov(ptr[rsi + (pass_info.dst_off_dw << 2)], r11d);
src_off += 4;
++pass_info.dst_off_dw;
}
if (sgpr_base != IR::NumScalarRegs) {
PopPtr(c);
}
}
ASSERT(pass_info.dst_off_dw == info.srt_info.flattened_bufsize_dw);
for (const auto& [sgpr_base, root] : pass_info.srt_roots) {

View File

@@ -33,12 +33,9 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
bool is_composite = opcode == IR::Opcode::WriteSharedU64;
u32 num_components = opcode == IR::Opcode::WriteSharedU32 ? 1 : 2;
u32 offset = 0;
const auto* addr = inst.Arg(0).InstRecursive();
if (addr->GetOpcode() == IR::Opcode::IAdd32) {
ASSERT(addr->Arg(1).IsImmediate());
offset = addr->Arg(1).U32();
}
ASSERT(inst.Arg(0).IsImmediate());
u32 offset = inst.Arg(0).U32();
IR::Value data = is_composite ? ir.UnpackUint2x32(IR::U64{inst.Arg(1).Resolve()})
: inst.Arg(1).Resolve();
for (s32 i = 0; i < num_components; i++) {
@@ -116,7 +113,7 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
}
const auto shl_inst = inst.Arg(1).TryInstRecursive();
const auto vertex_id = ir.Imm32(shl_inst->Arg(0).Resolve().U32() >> 2);
const auto vertex_id = shl_inst->Arg(0).Resolve().U32() >> 2;
const auto offset = inst.Arg(1).TryInstRecursive()->Arg(1);
const auto bucket = offset.Resolve().U32() / 256u;
const auto attrib = bucket < 4 ? IR::Attribute::Position0

View File

@@ -20,18 +20,7 @@ struct PersistentSrtInfo {
};
PFN_SrtWalker walker_func{};
boost::container::small_vector<SrtSharpReservation, 2> srt_reservations;
u32 flattened_bufsize_dw = 16; // NumUserDataRegs
// Special case for fetch shaders because we don't generate IR to read from step rate buffers,
// so we won't see usage with GetUserData/ReadConst.
// Reserve space in the flattened buffer for a sharp ahead of time
u32 ReserveSharp(u32 sgpr_base, u32 dword_offset, u32 num_dwords) {
u32 rv = flattened_bufsize_dw;
srt_reservations.emplace_back(sgpr_base, dword_offset, num_dwords);
flattened_bufsize_dw += num_dwords;
return rv;
}
};
} // namespace Shader
} // namespace Shader