mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-12-10 05:38:49 +00:00
shader_recompiler: Rework sharp tracking for robustness (#3327)
* shader_recompiler: Remove remnants of old discard Also constant propagate conditional discard if condition is constant * resource_tracking_pass: Rework sharp tracking for robustness * resource_tracking_pass: Add source dominance analysis When reachability is not enough to prune source list, check if a source dominates all other sources * resource_tracking_pass: Fix immediate check How did this work before * resource_tracking_pass: Remove unused template type * readlane_elimination_pass: Don't add phi when all args are the same New sharp tracking exposed some bad sources coming on sampler sharps with aniso disable pattern that also were part of readlane pattern, fix tracking by removing the unnecessary phis inbetween * resource_tracking_pass: Allow phi in disable aniso pattern * resource_tracking_pass: Handle not valid buffer sharp and more phi in aniso pattern
This commit is contained in:
@@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <unordered_map>
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "shader_recompiler/frontend/control_flow_graph.h"
|
||||
@@ -350,19 +349,7 @@ void CFG::LinkBlocks() {
|
||||
block.branch_false = end_block;
|
||||
block.end_class = EndClass::Branch;
|
||||
} else if (end_inst.opcode == Opcode::S_ENDPGM) {
|
||||
const auto& prev_inst = inst_list[block.end_index - 1];
|
||||
if (prev_inst.opcode == Opcode::EXP && prev_inst.control.exp.en == 0) {
|
||||
if (prev_inst.control.exp.target != 9) {
|
||||
block.end_class = EndClass::Kill;
|
||||
} else if (const auto& exec_mask = inst_list[block.end_index - 2];
|
||||
exec_mask.src[0].field == OperandField::ConstZero) {
|
||||
block.end_class = EndClass::Kill;
|
||||
} else {
|
||||
block.end_class = EndClass::Exit;
|
||||
}
|
||||
} else {
|
||||
block.end_class = EndClass::Exit;
|
||||
}
|
||||
block.end_class = EndClass::Exit;
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
@@ -403,12 +390,6 @@ std::string CFG::Dot() const {
|
||||
fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n", node_uid);
|
||||
++node_uid;
|
||||
break;
|
||||
case EndClass::Kill:
|
||||
dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
|
||||
dot +=
|
||||
fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n", node_uid);
|
||||
++node_uid;
|
||||
break;
|
||||
}
|
||||
}
|
||||
dot += "\t\tlabel = \"main\";\n\t}\n";
|
||||
|
||||
@@ -23,7 +23,6 @@ using Hook =
|
||||
enum class EndClass {
|
||||
Branch, ///< Block ends with a (un)conditional branch.
|
||||
Exit, ///< Block ends with an exit instruction.
|
||||
Kill, ///< Block ends with a discard instruction.
|
||||
};
|
||||
|
||||
/// A block represents a linear range of instructions.
|
||||
|
||||
@@ -39,7 +39,6 @@ enum class StatementType {
|
||||
Loop,
|
||||
Break,
|
||||
Return,
|
||||
Kill,
|
||||
Unreachable,
|
||||
Function,
|
||||
Identity,
|
||||
@@ -88,7 +87,6 @@ struct Statement : ListBaseHook {
|
||||
Statement(Break, Statement* cond_, Statement* up_)
|
||||
: cond{cond_}, up{up_}, type{StatementType::Break} {}
|
||||
Statement(Return, Statement* up_) : up{up_}, type{StatementType::Return} {}
|
||||
Statement(Kill, Statement* up_) : up{up_}, type{StatementType::Kill} {}
|
||||
Statement(Unreachable, Statement* up_) : up{up_}, type{StatementType::Unreachable} {}
|
||||
Statement(FunctionTag) : children{}, type{StatementType::Function} {}
|
||||
Statement(Identity, IR::Condition cond_, Statement* up_)
|
||||
@@ -174,9 +172,6 @@ std::string DumpExpr(const Statement* stmt) {
|
||||
case StatementType::Return:
|
||||
ret += fmt::format("{} return;\n", indent);
|
||||
break;
|
||||
case StatementType::Kill:
|
||||
ret += fmt::format("{} kill;\n", indent);
|
||||
break;
|
||||
case StatementType::Unreachable:
|
||||
ret += fmt::format("{} unreachable;\n", indent);
|
||||
break;
|
||||
@@ -335,9 +330,9 @@ private:
|
||||
}
|
||||
}
|
||||
// Expensive operation:
|
||||
if (!AreSiblings(goto_stmt, label_stmt)) {
|
||||
UNREACHABLE_MSG("Goto is not a sibling with the label");
|
||||
}
|
||||
// if (!AreSiblings(goto_stmt, label_stmt)) {
|
||||
// UNREACHABLE_MSG("Goto is not a sibling with the label");
|
||||
//}
|
||||
// goto_stmt and label_stmt are guaranteed to be siblings, eliminate
|
||||
if (std::next(goto_stmt) == label_stmt) {
|
||||
// Simply eliminate the goto if the label is next to it
|
||||
@@ -410,9 +405,6 @@ private:
|
||||
case EndClass::Exit:
|
||||
root.insert(ip, *pool.Create(Return{}, &root_stmt));
|
||||
break;
|
||||
case EndClass::Kill:
|
||||
root.insert(ip, *pool.Create(Kill{}, &root_stmt));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -637,6 +629,7 @@ private:
|
||||
if (!stmt.block->is_dummy) {
|
||||
const u32 start = stmt.block->begin_index;
|
||||
const u32 size = stmt.block->end_index - start + 1;
|
||||
current_block->cfg_block = stmt.block;
|
||||
translator.Translate(current_block, stmt.block->begin,
|
||||
inst_list.subspan(start, size));
|
||||
}
|
||||
@@ -770,18 +763,6 @@ private:
|
||||
syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return;
|
||||
break;
|
||||
}
|
||||
case StatementType::Kill: {
|
||||
ensure_block();
|
||||
IR::Block* demote_block{MergeBlock(parent, stmt)};
|
||||
IR::IREmitter{*current_block}.Discard();
|
||||
current_block->AddBranch(demote_block);
|
||||
current_block = demote_block;
|
||||
|
||||
auto& merge{syntax_list.emplace_back()};
|
||||
merge.type = IR::AbstractSyntaxNode::Type::Block;
|
||||
merge.data.block = demote_block;
|
||||
break;
|
||||
}
|
||||
case StatementType::Unreachable: {
|
||||
ensure_block();
|
||||
current_block = nullptr;
|
||||
@@ -789,7 +770,7 @@ private:
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw NotImplementedException("Statement type {}", u32(stmt.type));
|
||||
UNREACHABLE_MSG("Statement type {}", u32(stmt.type));
|
||||
}
|
||||
}
|
||||
if (current_block) {
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) {
|
||||
void Translator::EmitFlowControl(const GcnInst& inst) {
|
||||
switch (inst.opcode) {
|
||||
case Opcode::S_BARRIER:
|
||||
return S_BARRIER();
|
||||
@@ -20,7 +20,7 @@ void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) {
|
||||
LOG_WARNING(Render_Vulkan, "S_TRAP instruction!");
|
||||
return;
|
||||
case Opcode::S_GETPC_B64:
|
||||
return S_GETPC_B64(pc, inst);
|
||||
return S_GETPC_B64(inst);
|
||||
case Opcode::S_SETPC_B64:
|
||||
case Opcode::S_WAITCNT:
|
||||
case Opcode::S_NOP:
|
||||
@@ -45,9 +45,7 @@ void Translator::S_BARRIER() {
|
||||
ir.Barrier();
|
||||
}
|
||||
|
||||
void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) {
|
||||
// This only really exists to let resource tracking pass know
|
||||
// there is an inline cbuf.
|
||||
void Translator::S_GETPC_B64(const GcnInst& inst) {
|
||||
const IR::ScalarReg dst{inst.dst[0].code};
|
||||
ir.SetScalarReg(dst, ir.Imm32(pc));
|
||||
ir.SetScalarReg(dst + 1, ir.Imm32(0));
|
||||
|
||||
@@ -520,14 +520,13 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
||||
GcnDecodeContext decoder;
|
||||
|
||||
// Decode and save instructions
|
||||
u32 sub_pc = 0;
|
||||
while (!slice.atEnd()) {
|
||||
const auto sub_inst = decoder.decodeInstruction(slice);
|
||||
if (sub_inst.opcode == Opcode::S_SETPC_B64) {
|
||||
// Assume we're swapping back to the main shader.
|
||||
break;
|
||||
}
|
||||
TranslateInstruction(sub_inst, sub_pc++);
|
||||
TranslateInstruction(sub_inst);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -574,11 +573,12 @@ void Translator::LogMissingOpcode(const GcnInst& inst) {
|
||||
info.translation_failed = true;
|
||||
}
|
||||
|
||||
void Translator::Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list) {
|
||||
void Translator::Translate(IR::Block* block, u32 start_pc, std::span<const GcnInst> inst_list) {
|
||||
if (inst_list.empty()) {
|
||||
return;
|
||||
}
|
||||
ir = IR::IREmitter{*block, block->begin()};
|
||||
pc = start_pc;
|
||||
for (const auto& inst : inst_list) {
|
||||
pc += inst.length;
|
||||
|
||||
@@ -590,11 +590,11 @@ void Translator::Translate(IR::Block* block, u32 pc, std::span<const GcnInst> in
|
||||
continue;
|
||||
}
|
||||
|
||||
TranslateInstruction(inst, pc);
|
||||
TranslateInstruction(inst);
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::TranslateInstruction(const GcnInst& inst, const u32 pc) {
|
||||
void Translator::TranslateInstruction(const GcnInst& inst) {
|
||||
// Emit instructions for each category.
|
||||
switch (inst.category) {
|
||||
case InstCategory::DataShare:
|
||||
@@ -613,7 +613,7 @@ void Translator::TranslateInstruction(const GcnInst& inst, const u32 pc) {
|
||||
EmitExport(inst);
|
||||
break;
|
||||
case InstCategory::FlowControl:
|
||||
EmitFlowControl(pc, inst);
|
||||
EmitFlowControl(inst);
|
||||
break;
|
||||
case InstCategory::ScalarALU:
|
||||
EmitScalarAlu(inst);
|
||||
|
||||
@@ -61,13 +61,13 @@ public:
|
||||
explicit Translator(Info& info, const RuntimeInfo& runtime_info, const Profile& profile);
|
||||
|
||||
void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list);
|
||||
void TranslateInstruction(const GcnInst& inst, u32 pc);
|
||||
void TranslateInstruction(const GcnInst& inst);
|
||||
|
||||
// Instruction categories
|
||||
void EmitPrologue(IR::Block* first_block);
|
||||
void EmitFetch(const GcnInst& inst);
|
||||
void EmitExport(const GcnInst& inst);
|
||||
void EmitFlowControl(u32 pc, const GcnInst& inst);
|
||||
void EmitFlowControl(const GcnInst& inst);
|
||||
void EmitScalarAlu(const GcnInst& inst);
|
||||
void EmitScalarMemory(const GcnInst& inst);
|
||||
void EmitVectorAlu(const GcnInst& inst);
|
||||
@@ -126,7 +126,7 @@ public:
|
||||
void S_FLBIT_I32_B32(const GcnInst& inst);
|
||||
void S_FLBIT_I32_B64(const GcnInst& inst);
|
||||
void S_BITSET_B32(const GcnInst& inst, u32 bit_value);
|
||||
void S_GETPC_B64(u32 pc, const GcnInst& inst);
|
||||
void S_GETPC_B64(const GcnInst& inst);
|
||||
void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst);
|
||||
void S_ABS_I32(const GcnInst& inst);
|
||||
|
||||
@@ -337,6 +337,7 @@ private:
|
||||
std::unordered_map<u32, IR::VectorReg> vgpr_map;
|
||||
std::array<IR::Attribute, MaxInterpVgpr> vgpr_to_interp{};
|
||||
bool opcode_missing = false;
|
||||
u32 pc{};
|
||||
};
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
||||
@@ -588,7 +588,7 @@ void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) {
|
||||
|
||||
IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::ScalarReg tsharp_reg,
|
||||
const IR::ScalarReg sampler_reg, const IR::VectorReg addr_reg,
|
||||
bool gather) {
|
||||
bool gather, u32 pc) {
|
||||
const auto& mimg = inst.control.mimg;
|
||||
const auto flags = MimgModifierFlags(mimg.mod);
|
||||
|
||||
@@ -602,6 +602,7 @@ IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::Scal
|
||||
info.is_array.Assign(mimg.da);
|
||||
info.is_unnormalized.Assign(mimg.unrm);
|
||||
info.is_r128.Assign(mimg.r128);
|
||||
info.pc.Assign(pc);
|
||||
|
||||
if (gather) {
|
||||
info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1);
|
||||
@@ -610,11 +611,11 @@ IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::Scal
|
||||
info.has_derivatives.Assign(flags.test(MimgModifier::Derivative));
|
||||
}
|
||||
|
||||
// Load first dword of T# and S#. We will use them as the handle that will guide resource
|
||||
// tracking pass where to read the sharps. This will later also get patched to the SPIRV texture
|
||||
// binding index.
|
||||
const IR::Value handle = ir.GetScalarReg(tsharp_reg);
|
||||
const IR::Value inline_sampler =
|
||||
// Load first dword of T# and the full S#. We will use them as the handle that will guide
|
||||
// resource tracking pass where to read the sharps. This will later also get patched to the
|
||||
// backend texture binding index.
|
||||
const IR::Value image_handle = ir.GetScalarReg(tsharp_reg);
|
||||
const IR::Value sampler_handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sampler_reg), ir.GetScalarReg(sampler_reg + 1),
|
||||
ir.GetScalarReg(sampler_reg + 2), ir.GetScalarReg(sampler_reg + 3));
|
||||
|
||||
@@ -652,8 +653,8 @@ IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::Scal
|
||||
const IR::Value address4 = get_addr_reg(12);
|
||||
|
||||
// Issue the placeholder IR instruction.
|
||||
IR::Value texel =
|
||||
ir.ImageSampleRaw(handle, address1, address2, address3, address4, inline_sampler, info);
|
||||
IR::Value texel = ir.ImageSampleRaw(image_handle, sampler_handle, address1, address2, address3,
|
||||
address4, info);
|
||||
if (info.is_depth && !gather) {
|
||||
// For non-gather depth sampling, only return a single value.
|
||||
texel = ir.CompositeExtract(texel, 0);
|
||||
@@ -669,7 +670,7 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
||||
const IR::ScalarReg sampler_reg{inst.src[3].code * 4};
|
||||
const auto flags = MimgModifierFlags(mimg.mod);
|
||||
|
||||
const IR::Value texel = EmitImageSample(ir, inst, tsharp_reg, sampler_reg, addr_reg, false);
|
||||
const IR::Value texel = EmitImageSample(ir, inst, tsharp_reg, sampler_reg, addr_reg, false, pc);
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
if (((mimg.dmask >> i) & 1) == 0) {
|
||||
continue;
|
||||
@@ -698,7 +699,7 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) {
|
||||
// should be always 1st (R) component for depth
|
||||
ASSERT(!flags.test(MimgModifier::Pcf) || mimg.dmask & 1);
|
||||
|
||||
const IR::Value texel = EmitImageSample(ir, inst, tsharp_reg, sampler_reg, addr_reg, true);
|
||||
const IR::Value texel = EmitImageSample(ir, inst, tsharp_reg, sampler_reg, addr_reg, true, pc);
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
const IR::F32 value = IR::F32{ir.CompositeExtract(texel, i)};
|
||||
ir.SetVectorReg(dest_reg++, value);
|
||||
|
||||
Reference in New Issue
Block a user