mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-08-04 16:32:39 +00:00
get rid of GetAttributes for special tess constants reads. Immediately replace some upon seeing readconstbuffer. Gets rid of some extra passes over IR
This commit is contained in:
parent
634b04c517
commit
d1988dbf9a
@ -302,7 +302,6 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
||||
return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1[1], ctx.front_facing), ctx.u32_one_value,
|
||||
ctx.u32_zero_value);
|
||||
case IR::Attribute::PrimitiveId:
|
||||
case IR::Attribute::TessPatchIdInVgt: // TODO see why this isnt DCEd
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.primitive_id);
|
||||
case IR::Attribute::InvocationId:
|
||||
ASSERT(ctx.info.l_stage == LogicalStage::Geometry ||
|
||||
@ -311,10 +310,22 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
||||
case IR::Attribute::PatchVertices:
|
||||
ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl);
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.patch_vertices);
|
||||
case IR::Attribute::PackedHullInvocationInfo:
|
||||
// TODO figure out what to do with this
|
||||
// should be dead code, but otherwise return 0 or concat PrimitiveId and InvocationId
|
||||
return ctx.u32_zero_value;
|
||||
case IR::Attribute::PackedHullInvocationInfo: {
|
||||
ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl);
|
||||
// [0:8]: patch id within VGT
|
||||
// [8:12]: output control point id
|
||||
// But 0:8 should be treated as 0 for attribute addressing purposes
|
||||
if (ctx.runtime_info.hs_info.IsPassthrough()) {
|
||||
// Gcn shader would run with 1 thread, but we need to run a thread for
|
||||
// each output control point.
|
||||
// If Gcn shader uses this value, we should make sure all threads in the
|
||||
// Vulkan shader use 0
|
||||
return ctx.ConstU32(0u);
|
||||
} else {
|
||||
const Id invocation_id = ctx.OpLoad(ctx.U32[1], ctx.invocation_id);
|
||||
return ctx.OpShiftLeftLogical(ctx.U32[1], invocation_id, ctx.ConstU32(8u));
|
||||
}
|
||||
}
|
||||
default:
|
||||
UNREACHABLE_MSG("Read U32 attribute {}", attr);
|
||||
}
|
||||
|
@ -21,4 +21,17 @@ struct TessellationDataConstantBuffer {
|
||||
u32 m_firstEdgeTessFactorIndex;
|
||||
};
|
||||
|
||||
// TODO comment
|
||||
enum class TessConstantAttribute : u32 {
|
||||
LsStride,
|
||||
HsCpStride,
|
||||
HsNumPatch,
|
||||
HsOutputBase,
|
||||
PatchConstSize,
|
||||
PatchConstBase,
|
||||
PatchOutputSize,
|
||||
OffChipTessellationFactorThreshold,
|
||||
FirstEdgeTessFactorIndex,
|
||||
};
|
||||
|
||||
} // namespace Shader
|
@ -124,6 +124,9 @@ void Translator::EmitPrologue() {
|
||||
}
|
||||
break;
|
||||
case LogicalStage::TessellationControl: {
|
||||
// Should be laid out like:
|
||||
// [0:8]: patch id within VGT
|
||||
// [8:12]: output control point id
|
||||
ir.SetVectorReg(IR::VectorReg::V1,
|
||||
ir.GetAttributeU32(IR::Attribute::PackedHullInvocationInfo));
|
||||
// TODO need PrimitiveId also like TES?
|
||||
@ -134,10 +137,17 @@ void Translator::EmitPrologue() {
|
||||
ir.GetAttribute(IR::Attribute::TessellationEvaluationPointU));
|
||||
ir.SetVectorReg(IR::VectorReg::V1,
|
||||
ir.GetAttribute(IR::Attribute::TessellationEvaluationPointV));
|
||||
// I think V2 is actually the patch id within the patches running on the local CU, used in
|
||||
// compiler generated address calcs,
|
||||
// and V3 is the patch id within the draw
|
||||
ir.SetVectorReg(IR::VectorReg::V2, ir.GetAttributeU32(IR::Attribute::TessPatchIdInVgt));
|
||||
// V2 is similar to PrimitiveID but not the same. It seems to only be used in
|
||||
// compiler-generated address calculations. Its probably the patch id within the
|
||||
// patches running locally on a given VGT (or CU, whichever is the granularity of LDS
|
||||
// memory). So it is probably equal to PrimitiveID % #patches_per_VGT (or per CU).
|
||||
// We should be able to safely set V2 to 0, along with other special values read from the
|
||||
// tess constants buffer, since in the recompiled Vulkan shaders a thread can only
|
||||
// read/write control points and patch const attributes within the local patch. This (V2)
|
||||
// and other special values of TessellationDataConstantBuffer are (probably) just an
|
||||
// implementation detail from the ps4 shader compiler and only used for addressing.
|
||||
ir.SetVectorReg(IR::VectorReg::V2, ir.Imm32(0u));
|
||||
// V3 is the actual PrimitiveID as intended by the shader author.
|
||||
ir.SetVectorReg(IR::VectorReg::V3, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
|
||||
break;
|
||||
case LogicalStage::Compute:
|
||||
|
@ -126,26 +126,6 @@ std::string NameOf(Attribute attribute) {
|
||||
return "TessellationEvaluationPointV";
|
||||
case Attribute::PackedHullInvocationInfo:
|
||||
return "PackedHullInvocationInfo";
|
||||
case Attribute::TcsLsStride:
|
||||
return "TcsLsStride";
|
||||
case Attribute::TcsCpStride:
|
||||
return "TcsCpStride";
|
||||
case Attribute::TcsNumPatches:
|
||||
return "TcsNumPatches";
|
||||
case Attribute::TcsOutputBase:
|
||||
return "TcsOutputBase";
|
||||
case Attribute::TcsPatchConstSize:
|
||||
return "TcsPatchConstSize";
|
||||
case Attribute::TcsPatchConstBase:
|
||||
return "TcsPatchConstBase";
|
||||
case Attribute::TcsPatchOutputSize:
|
||||
return "TcsPatchOutputSize";
|
||||
case Attribute::TcsOffChipTessellationFactorThreshold:
|
||||
return "TcsOffChipTessellationFactorThreshold";
|
||||
case Attribute::TcsFirstEdgeTessFactorIndex:
|
||||
return "TcsFirstEdgeTessFactorIndex";
|
||||
case Attribute::TessPatchIdInVgt:
|
||||
return "TessPatchIdInVgt";
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -78,20 +78,7 @@ enum class Attribute : u64 {
|
||||
PatchVertices = 81,
|
||||
TessellationEvaluationPointU = 82,
|
||||
TessellationEvaluationPointV = 83,
|
||||
PackedHullInvocationInfo =
|
||||
84, // PrimitiveId (patch id) and InvocationId (output control point id)
|
||||
// Probably don't need all these.
|
||||
// Most should be dead after hull shader transform
|
||||
TcsLsStride = 85,
|
||||
TcsCpStride = 86,
|
||||
TcsNumPatches = 87,
|
||||
TcsOutputBase = 88,
|
||||
TcsPatchConstSize = 89,
|
||||
TcsPatchConstBase = 90,
|
||||
TcsPatchOutputSize = 91,
|
||||
TcsOffChipTessellationFactorThreshold = 92,
|
||||
TcsFirstEdgeTessFactorIndex = 93,
|
||||
TessPatchIdInVgt = 94,
|
||||
PackedHullInvocationInfo = 84, // contains patch id within the VGT and invocation ID
|
||||
Max,
|
||||
};
|
||||
|
||||
|
@ -154,14 +154,13 @@ std::optional<TessSharpLocation> FindTessConstantSharp(IR::Inst* read_const_buff
|
||||
// Walker that helps deduce what type of attribute a DS instruction is reading
|
||||
// or writing, which could be an input control point, output control point,
|
||||
// or per-patch constant (PatchConst).
|
||||
// For certain ReadConstBuffer instructions using the tess constants V#,
|
||||
// which we preprocess and transform into a named GetAttribute, we visit the users
|
||||
// For certain ReadConstBuffer instructions using the tess constants V#,, we visit the users
|
||||
// recursively and increment a counter on the Load/WriteShared users.
|
||||
// Namely TcsNumPatches (from m_hsNumPatch), TcsOutputBase (m_hsOutputBase),
|
||||
// and TcsPatchConstBase (m_patchConstBase).
|
||||
// In addr calculations, the term TcsNumPatches * ls_stride * #input_cp_in_patch
|
||||
// Namely NumPatch (from m_hsNumPatch), HsOutputBase (m_hsOutputBase),
|
||||
// and PatchConstBase (m_patchConstBase).
|
||||
// In addr calculations, the term NumPatch * ls_stride * #input_cp_in_patch
|
||||
// is used as an addend to skip the region for input control points, and similarly
|
||||
// TcsNumPatches * hs_cp_stride * #output_cp_in_patch is used to skip the region
|
||||
// NumPatch * hs_cp_stride * #output_cp_in_patch is used to skip the region
|
||||
// for output control points.
|
||||
// The Input CP, Output CP, and PatchConst regions are laid out in that order for the
|
||||
// entire thread group, so seeing the TcsNumPatches attribute used in an addr calc should
|
||||
@ -174,21 +173,21 @@ std::optional<TessSharpLocation> FindTessConstantSharp(IR::Inst* read_const_buff
|
||||
// and find the interval it's inside of? (phis are still a problem here)
|
||||
class TessConstantUseWalker {
|
||||
public:
|
||||
void MarkTessAttributeUsers(IR::Inst* get_attribute) {
|
||||
void MarkTessAttributeUsers(IR::Inst* read_const_buffer, TessConstantAttribute attr) {
|
||||
uint inc;
|
||||
switch (get_attribute->Arg(0).Attribute()) {
|
||||
case IR::Attribute::TcsNumPatches:
|
||||
case IR::Attribute::TcsOutputBase:
|
||||
switch (attr) {
|
||||
case TessConstantAttribute::HsNumPatch:
|
||||
case TessConstantAttribute::HsOutputBase:
|
||||
inc = 1;
|
||||
break;
|
||||
case IR::Attribute::TcsPatchConstBase:
|
||||
case TessConstantAttribute::PatchConstBase:
|
||||
inc = 2;
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
for (IR::Use use : get_attribute->Uses()) {
|
||||
for (IR::Use use : read_const_buffer->Uses()) {
|
||||
MarkTessAttributeUsersHelper(use, inc);
|
||||
}
|
||||
|
||||
@ -276,14 +275,6 @@ static bool IsDivisibleByStride(IR::Value term, u32 stride) {
|
||||
IR::Value a, b;
|
||||
if (MatchU32(stride).Match(term)) {
|
||||
return true;
|
||||
} else if (M_GETATTRIBUTEU32(MatchAttribute(IR::Attribute::TcsLsStride), MatchU32(0))
|
||||
.Match(term) ||
|
||||
M_GETATTRIBUTEU32(MatchAttribute(IR::Attribute::TcsCpStride), MatchU32(0))
|
||||
.Match(term)) {
|
||||
// TODO if we fold in constants earlier (Dont produce attributes, instead just emit
|
||||
// constants) then this case isnt needed. Also should assert that this correct attribute is
|
||||
// being used depending on stage and whether this is an input or output attribute
|
||||
return true;
|
||||
} else if (M_BITFIELDUEXTRACT(MatchValue(a), MatchU32(0), MatchU32(24)).Match(term) ||
|
||||
M_BITFIELDSEXTRACT(MatchValue(a), MatchU32(0), MatchU32(24)).Match(term)) {
|
||||
return IsDivisibleByStride(a, stride);
|
||||
@ -309,7 +300,7 @@ static bool TryOptimizeAddendInModulo(IR::Value addend, u32 stride, std::vector<
|
||||
}
|
||||
}
|
||||
|
||||
// In calculation addr = (a + b + ...) % stride
|
||||
// In calculation (a + b + ...) % stride
|
||||
// Use this fact
|
||||
// (a + b) mod N = (a mod N + b mod N) mod N
|
||||
// If any addend is divisible by stride, then we can replace it with 0 in the attribute
|
||||
@ -517,6 +508,7 @@ void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
|
||||
// if (InvocationId == 0) {
|
||||
// program ...
|
||||
// }
|
||||
// But as long as we treat invocation ID as 0 for all threads, shouldn't matter functionally
|
||||
}
|
||||
}
|
||||
|
||||
@ -600,8 +592,7 @@ void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info) {
|
||||
InitTessConstants(sharp_location->ptr_base,
|
||||
static_cast<s32>(sharp_location->dword_off), info,
|
||||
runtime_info, tess_constants);
|
||||
break; // TODO
|
||||
// continue;
|
||||
break; // break out of switch and loop
|
||||
}
|
||||
UNREACHABLE_MSG("Failed to match tess constant sharp");
|
||||
}
|
||||
@ -617,10 +608,11 @@ void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info) {
|
||||
|
||||
ASSERT(info.FoundTessConstantsSharp());
|
||||
|
||||
TessConstantUseWalker walker;
|
||||
|
||||
for (IR::Block* block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::ReadConstBuffer: {
|
||||
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) {
|
||||
auto sharp_location = FindTessConstantSharp(&inst);
|
||||
if (sharp_location && sharp_location->ptr_base == info.tess_consts_ptr_base &&
|
||||
sharp_location->dword_off == info.tess_consts_dword_offset) {
|
||||
@ -630,33 +622,46 @@ void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info) {
|
||||
|
||||
ASSERT_MSG(index.IsImmediate(),
|
||||
"Tessellation constant read with dynamic index");
|
||||
u32 offset = index.U32();
|
||||
ASSERT(offset < static_cast<u32>(IR::Attribute::TcsFirstEdgeTessFactorIndex) -
|
||||
static_cast<u32>(IR::Attribute::TcsLsStride) + 1);
|
||||
IR::Attribute tess_constant_attr = static_cast<IR::Attribute>(
|
||||
static_cast<u32>(IR::Attribute::TcsLsStride) + offset);
|
||||
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
u32 off_dw = index.U32();
|
||||
ASSERT(off_dw <=
|
||||
static_cast<u32>(TessConstantAttribute::FirstEdgeTessFactorIndex));
|
||||
|
||||
IR::U32 replacement;
|
||||
if (tess_constant_attr ==
|
||||
IR::Attribute::TcsOffChipTessellationFactorThreshold) {
|
||||
replacement = ir.BitCast<IR::U32>(
|
||||
ir.GetAttribute(IR::Attribute::TcsOffChipTessellationFactorThreshold));
|
||||
} else {
|
||||
replacement = ir.GetAttributeU32(tess_constant_attr);
|
||||
auto tess_const_attr = static_cast<TessConstantAttribute>(off_dw);
|
||||
switch (tess_const_attr) {
|
||||
case TessConstantAttribute::LsStride:
|
||||
// If not, we may need to make this runtime state for TES
|
||||
ASSERT(info.l_stage == LogicalStage::TessellationControl);
|
||||
inst.ReplaceUsesWithAndRemove(IR::Value(tess_constants.m_lsStride));
|
||||
break;
|
||||
case TessConstantAttribute::HsCpStride:
|
||||
inst.ReplaceUsesWithAndRemove(IR::Value(tess_constants.m_hsCpStride));
|
||||
break;
|
||||
case TessConstantAttribute::HsNumPatch:
|
||||
case TessConstantAttribute::HsOutputBase:
|
||||
case TessConstantAttribute::PatchConstBase:
|
||||
walker.MarkTessAttributeUsers(&inst, tess_const_attr);
|
||||
// We should be able to safely set these to 0 so that indexing happens only
|
||||
// within the local patch in the recompiled Vulkan shader. This assumes
|
||||
// these values only contribute to address calculations for in/out
|
||||
// attributes in the original gcn shader.
|
||||
// See the explanation for why we set V2 to 0 when emitting the prologue.
|
||||
inst.ReplaceUsesWithAndRemove(IR::Value(0u));
|
||||
break;
|
||||
// PatchConstSize:
|
||||
// PatchOutputSize:
|
||||
// OffChipTessellationFactorThreshold:
|
||||
// FirstEdgeTessFactorIndex:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
inst.ReplaceUsesWithAndRemove(replacement);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// These pattern matching are neccessary for now unless we support dynamic indexing of
|
||||
// PatchConst attributes and tess factors. PatchConst should be easy, turn those into a single
|
||||
// vec4 array like in/out attrs. Not sure about tess factors.
|
||||
if (info.l_stage == LogicalStage::TessellationControl) {
|
||||
// Replace the BFEs on V1 (packed with patch id and output cp id) for easier pattern
|
||||
// matching
|
||||
@ -669,9 +674,6 @@ void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info) {
|
||||
MatchU32(0), MatchU32(8))
|
||||
.Match(IR::Value{&inst})) {
|
||||
IR::IREmitter emit(*block, it);
|
||||
// IR::Value replacement =
|
||||
// emit.GetAttributeU32(IR::Attribute::TessPatchIdInVgt);
|
||||
// TODO should be fine but check this
|
||||
IR::Value replacement(0u);
|
||||
inst.ReplaceUsesWithAndRemove(replacement);
|
||||
} else if (M_BITFIELDUEXTRACT(
|
||||
@ -694,82 +696,6 @@ void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TessConstantUseWalker walker;
|
||||
for (IR::Block* block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (inst.GetOpcode() == IR::Opcode::GetAttributeU32) {
|
||||
walker.MarkTessAttributeUsers(&inst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (IR::Block* block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (inst.GetOpcode() == IR::Opcode::GetAttributeU32) {
|
||||
switch (inst.Arg(0).Attribute()) {
|
||||
// Should verify that these are only used in address calculations for attr
|
||||
// read/write
|
||||
// Replace with 0 so we can dynamically index the control points within the
|
||||
// region allocated for this patch (input or output). These terms should only
|
||||
// contribute to the base address of that region, so replacing with 0 *should*
|
||||
// be fine
|
||||
case IR::Attribute::TcsNumPatches:
|
||||
case IR::Attribute::TcsOutputBase:
|
||||
case IR::Attribute::TcsPatchConstBase:
|
||||
case IR::Attribute::TessPatchIdInVgt:
|
||||
inst.ReplaceUsesWithAndRemove(IR::Value(0u));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TessellationPostprocess(IR::Program& program, RuntimeInfo& runtime_info) {
|
||||
Shader::Info& info = program.info;
|
||||
TessellationDataConstantBuffer tess_constants;
|
||||
InitTessConstants(info.tess_consts_ptr_base, info.tess_consts_dword_offset, info, runtime_info,
|
||||
tess_constants);
|
||||
|
||||
for (IR::Block* block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (inst.GetOpcode() == IR::Opcode::GetAttributeU32) {
|
||||
switch (inst.Arg(0).Attribute()) {
|
||||
case IR::Attribute::TcsLsStride:
|
||||
ASSERT(info.l_stage == LogicalStage::TessellationControl);
|
||||
inst.ReplaceUsesWithAndRemove(IR::Value(tess_constants.m_lsStride));
|
||||
break;
|
||||
case IR::Attribute::TcsCpStride:
|
||||
inst.ReplaceUsesWithAndRemove(IR::Value(tess_constants.m_hsCpStride));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO delete
|
||||
for (IR::Block* block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::LoadSharedU32:
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
case IR::Opcode::LoadSharedU128:
|
||||
case IR::Opcode::WriteSharedU32:
|
||||
case IR::Opcode::WriteSharedU64:
|
||||
case IR::Opcode::WriteSharedU128:
|
||||
UNREACHABLE_MSG("Remaining DS instruction. {} transform failed",
|
||||
info.l_stage == LogicalStage::TessellationControl ? "Hull"
|
||||
: "Domain");
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
|
@ -21,6 +21,5 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
||||
void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info);
|
||||
void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info);
|
||||
void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info);
|
||||
void TessellationPostprocess(IR::Program& program, RuntimeInfo& runtime_info);
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
|
@ -92,15 +92,11 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
|
||||
dumpMatchingIR("pre_hull");
|
||||
Shader::Optimization::HullShaderTransform(program, runtime_info);
|
||||
dumpMatchingIR("post_hull");
|
||||
Shader::Optimization::TessellationPostprocess(program, runtime_info);
|
||||
dumpMatchingIR("post_hull_postprocess");
|
||||
} else if (info.l_stage == LogicalStage::TessellationEval) {
|
||||
Shader::Optimization::TessellationPreprocess(program, runtime_info);
|
||||
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||
dumpMatchingIR("pre_domain");
|
||||
Shader::Optimization::DomainShaderTransform(program, runtime_info);
|
||||
dumpMatchingIR("post_domain");
|
||||
Shader::Optimization::TessellationPostprocess(program, runtime_info);
|
||||
}
|
||||
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||
Shader::Optimization::RingAccessElimination(program, runtime_info, stage);
|
||||
|
Loading…
Reference in New Issue
Block a user