mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-12-12 14:48:52 +00:00
shader_recompiler: Replace buffer pulling with attribute divisor for instance step rates (#3238)
* shader_recompiler: Replace buffer pulling with attribute divisor for instance step rates * flatten_extended_userdata: Remove special step rate buffer handling * Review comments * spirv_emit_context: Name all instance rate attribs properly * spirv: Merge ReadConstBuffer again template function only has 1 user now * attribute: Add missing attributes * translate: Reimplement step rate instance id * Resolve validation warnings * shader_recompiler: Separate vertex inputs from LS stage, cleanup tess
This commit is contained in:
@@ -100,22 +100,36 @@ std::string NameOf(Attribute attribute) {
|
||||
return "Param30";
|
||||
case Attribute::Param31:
|
||||
return "Param31";
|
||||
case Attribute::ClipDistance:
|
||||
return "ClipDistanace";
|
||||
case Attribute::CullDistance:
|
||||
return "CullDistance";
|
||||
case Attribute::RenderTargetId:
|
||||
return "RenderTargetId";
|
||||
case Attribute::ViewportId:
|
||||
return "ViewportId";
|
||||
case Attribute::VertexId:
|
||||
return "VertexId";
|
||||
case Attribute::InstanceId:
|
||||
return "InstanceId";
|
||||
case Attribute::PrimitiveId:
|
||||
return "PrimitiveId";
|
||||
case Attribute::FragCoord:
|
||||
return "FragCoord";
|
||||
case Attribute::InstanceId:
|
||||
return "InstanceId";
|
||||
case Attribute::IsFrontFace:
|
||||
return "IsFrontFace";
|
||||
case Attribute::SampleIndex:
|
||||
return "SampleIndex";
|
||||
case Attribute::GlobalInvocationId:
|
||||
return "GlobalInvocationId";
|
||||
case Attribute::WorkgroupId:
|
||||
return "WorkgroupId";
|
||||
case Attribute::WorkgroupIndex:
|
||||
return "WorkgroupIndex";
|
||||
case Attribute::LocalInvocationId:
|
||||
return "LocalInvocationId";
|
||||
case Attribute::LocalInvocationIndex:
|
||||
return "LocalInvocationIndex";
|
||||
case Attribute::FragCoord:
|
||||
return "FragCoord";
|
||||
case Attribute::InvocationId:
|
||||
return "InvocationId";
|
||||
case Attribute::PatchVertices:
|
||||
|
||||
@@ -73,8 +73,6 @@ enum class Attribute : u64 {
|
||||
LocalInvocationId = 76,
|
||||
LocalInvocationIndex = 77,
|
||||
FragCoord = 78,
|
||||
InstanceId0 = 79, // step rate 0
|
||||
InstanceId1 = 80, // step rate 1
|
||||
InvocationId = 81, // TCS id in output patch and instanced geometry shader id
|
||||
PatchVertices = 82,
|
||||
TessellationEvaluationPointU = 83,
|
||||
|
||||
@@ -255,8 +255,8 @@ void IREmitter::SetM0(const U32& value) {
|
||||
Inst(Opcode::SetM0, value);
|
||||
}
|
||||
|
||||
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, IR::Value index) {
|
||||
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp), index);
|
||||
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, u32 index) {
|
||||
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp), Imm32(index));
|
||||
}
|
||||
|
||||
U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) {
|
||||
|
||||
@@ -81,8 +81,7 @@ public:
|
||||
|
||||
[[nodiscard]] U1 Condition(IR::Condition cond);
|
||||
|
||||
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0,
|
||||
IR::Value index = IR::Value(u32(0u)));
|
||||
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0, u32 index = 0);
|
||||
[[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
|
||||
void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);
|
||||
|
||||
|
||||
@@ -191,7 +191,7 @@ static void VisitPointer(u32 off_dw, IR::Inst* subtree, PassInfo& pass_info,
|
||||
static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {
|
||||
Xbyak::CodeGenerator& c = g_srt_codegen;
|
||||
|
||||
if (info.srt_info.srt_reservations.empty() && pass_info.srt_roots.empty()) {
|
||||
if (pass_info.srt_roots.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -205,29 +205,7 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {
|
||||
}
|
||||
|
||||
info.srt_info.walker_func = c.getCurr<PFN_SrtWalker>();
|
||||
|
||||
pass_info.dst_off_dw = NumUserDataRegs;
|
||||
|
||||
// Special case for V# step rate buffers in fetch shader
|
||||
for (const auto [sgpr_base, dword_offset, num_dwords] : info.srt_info.srt_reservations) {
|
||||
// get pointer to V#
|
||||
if (sgpr_base != IR::NumScalarRegs) {
|
||||
PushPtr(c, sgpr_base);
|
||||
}
|
||||
u32 src_off = dword_offset << 2;
|
||||
|
||||
for (auto j = 0; j < num_dwords; j++) {
|
||||
c.mov(r11d, ptr[rdi + src_off]);
|
||||
c.mov(ptr[rsi + (pass_info.dst_off_dw << 2)], r11d);
|
||||
|
||||
src_off += 4;
|
||||
++pass_info.dst_off_dw;
|
||||
}
|
||||
if (sgpr_base != IR::NumScalarRegs) {
|
||||
PopPtr(c);
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT(pass_info.dst_off_dw == info.srt_info.flattened_bufsize_dw);
|
||||
|
||||
for (const auto& [sgpr_base, root] : pass_info.srt_roots) {
|
||||
|
||||
@@ -33,12 +33,9 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
||||
bool is_composite = opcode == IR::Opcode::WriteSharedU64;
|
||||
u32 num_components = opcode == IR::Opcode::WriteSharedU32 ? 1 : 2;
|
||||
|
||||
u32 offset = 0;
|
||||
const auto* addr = inst.Arg(0).InstRecursive();
|
||||
if (addr->GetOpcode() == IR::Opcode::IAdd32) {
|
||||
ASSERT(addr->Arg(1).IsImmediate());
|
||||
offset = addr->Arg(1).U32();
|
||||
}
|
||||
ASSERT(inst.Arg(0).IsImmediate());
|
||||
|
||||
u32 offset = inst.Arg(0).U32();
|
||||
IR::Value data = is_composite ? ir.UnpackUint2x32(IR::U64{inst.Arg(1).Resolve()})
|
||||
: inst.Arg(1).Resolve();
|
||||
for (s32 i = 0; i < num_components; i++) {
|
||||
@@ -116,7 +113,7 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
||||
}
|
||||
|
||||
const auto shl_inst = inst.Arg(1).TryInstRecursive();
|
||||
const auto vertex_id = ir.Imm32(shl_inst->Arg(0).Resolve().U32() >> 2);
|
||||
const auto vertex_id = shl_inst->Arg(0).Resolve().U32() >> 2;
|
||||
const auto offset = inst.Arg(1).TryInstRecursive()->Arg(1);
|
||||
const auto bucket = offset.Resolve().U32() / 256u;
|
||||
const auto attrib = bucket < 4 ? IR::Attribute::Position0
|
||||
|
||||
@@ -20,18 +20,7 @@ struct PersistentSrtInfo {
|
||||
};
|
||||
|
||||
PFN_SrtWalker walker_func{};
|
||||
boost::container::small_vector<SrtSharpReservation, 2> srt_reservations;
|
||||
u32 flattened_bufsize_dw = 16; // NumUserDataRegs
|
||||
|
||||
// Special case for fetch shaders because we don't generate IR to read from step rate buffers,
|
||||
// so we won't see usage with GetUserData/ReadConst.
|
||||
// Reserve space in the flattened buffer for a sharp ahead of time
|
||||
u32 ReserveSharp(u32 sgpr_base, u32 dword_offset, u32 num_dwords) {
|
||||
u32 rv = flattened_bufsize_dw;
|
||||
srt_reservations.emplace_back(sgpr_base, dword_offset, num_dwords);
|
||||
flattened_bufsize_dw += num_dwords;
|
||||
return rv;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Shader
|
||||
} // namespace Shader
|
||||
|
||||
Reference in New Issue
Block a user