mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-08-05 17:02:40 +00:00
Some checks are pending
Build and Release / reuse (push) Waiting to run
Build and Release / clang-format (push) Waiting to run
Build and Release / get-info (push) Waiting to run
Build and Release / windows-sdl (push) Blocked by required conditions
Build and Release / windows-qt (push) Blocked by required conditions
Build and Release / macos-sdl (push) Blocked by required conditions
Build and Release / macos-qt (push) Blocked by required conditions
Build and Release / linux-sdl (push) Blocked by required conditions
Build and Release / linux-qt (push) Blocked by required conditions
Build and Release / linux-sdl-gcc (push) Blocked by required conditions
Build and Release / linux-qt-gcc (push) Blocked by required conditions
Build and Release / pre-release (push) Blocked by required conditions
* control_flow_graph: Improve divergence handling * recompiler: Simplify optimization passes Removes a redudant constant propagation and cleans up the passes a little * ir_passes: Add new readlane elimination pass The algorithm has grown complex enough where it deserves its own pass. The old implementation could only handle a single phi level properly, however this one should be able to eliminate vast majority of lane cases remaining. It first performs a traversal of the phi tree to ensure that all phi sources can be rewritten into an expected value and then performs elimintation by recursively duplicating the phi nodes at each step, in order to preserve control flow. * clang format * control_flow_graph: Remove debug code
374 lines
14 KiB
C++
374 lines
14 KiB
C++
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
#include <algorithm>
|
|
#include <unordered_map>
|
|
#include "common/assert.h"
|
|
#include "shader_recompiler/frontend/control_flow_graph.h"
|
|
|
|
namespace Shader::Gcn {
|
|
|
|
struct Compare {
|
|
bool operator()(const Block& lhs, u32 rhs) const noexcept {
|
|
return lhs.begin < rhs;
|
|
}
|
|
|
|
bool operator()(u32 lhs, const Block& rhs) const noexcept {
|
|
return lhs < rhs.begin;
|
|
}
|
|
|
|
bool operator()(const Block& lhs, const Block& rhs) const noexcept {
|
|
return lhs.begin < rhs.begin;
|
|
}
|
|
};
|
|
|
|
static IR::Condition MakeCondition(const GcnInst& inst) {
|
|
if (inst.IsCmpx()) {
|
|
return IR::Condition::Execnz;
|
|
}
|
|
|
|
switch (inst.opcode) {
|
|
case Opcode::S_CBRANCH_SCC0:
|
|
return IR::Condition::Scc0;
|
|
case Opcode::S_CBRANCH_SCC1:
|
|
return IR::Condition::Scc1;
|
|
case Opcode::S_CBRANCH_VCCZ:
|
|
return IR::Condition::Vccz;
|
|
case Opcode::S_CBRANCH_VCCNZ:
|
|
return IR::Condition::Vccnz;
|
|
case Opcode::S_CBRANCH_EXECZ:
|
|
return IR::Condition::Execz;
|
|
case Opcode::S_CBRANCH_EXECNZ:
|
|
return IR::Condition::Execnz;
|
|
default:
|
|
return IR::Condition::True;
|
|
}
|
|
}
|
|
|
|
static bool IgnoresExecMask(const GcnInst& inst) {
|
|
// EXEC mask does not affect scalar instructions or branches.
|
|
switch (inst.category) {
|
|
case InstCategory::ScalarALU:
|
|
case InstCategory::ScalarMemory:
|
|
case InstCategory::FlowControl:
|
|
return true;
|
|
default:
|
|
break;
|
|
}
|
|
// Read/Write Lane instructions are not affected either.
|
|
switch (inst.opcode) {
|
|
case Opcode::V_READLANE_B32:
|
|
case Opcode::V_WRITELANE_B32:
|
|
case Opcode::V_READFIRSTLANE_B32:
|
|
return true;
|
|
default:
|
|
break;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static constexpr size_t LabelReserveSize = 32;
|
|
|
|
CFG::CFG(Common::ObjectPool<Block>& block_pool_, std::span<const GcnInst> inst_list_)
|
|
: block_pool{block_pool_}, inst_list{inst_list_} {
|
|
index_to_pc.resize(inst_list.size() + 1);
|
|
labels.reserve(LabelReserveSize);
|
|
EmitLabels();
|
|
EmitBlocks();
|
|
LinkBlocks();
|
|
SplitDivergenceScopes();
|
|
}
|
|
|
|
void CFG::EmitLabels() {
|
|
// Always set a label at entry point.
|
|
u32 pc = 0;
|
|
AddLabel(pc);
|
|
|
|
// Iterate instruction list and add labels to branch targets.
|
|
for (u32 i = 0; i < inst_list.size(); i++) {
|
|
index_to_pc[i] = pc;
|
|
const GcnInst inst = inst_list[i];
|
|
if (inst.IsUnconditionalBranch()) {
|
|
const u32 target = inst.BranchTarget(pc);
|
|
AddLabel(target);
|
|
// Emit this label so that the block ends with s_branch instruction
|
|
AddLabel(pc + inst.length);
|
|
} else if (inst.IsConditionalBranch()) {
|
|
const u32 true_label = inst.BranchTarget(pc);
|
|
const u32 false_label = pc + inst.length;
|
|
AddLabel(true_label);
|
|
AddLabel(false_label);
|
|
} else if (inst.opcode == Opcode::S_ENDPGM) {
|
|
const u32 next_label = pc + inst.length;
|
|
AddLabel(next_label);
|
|
}
|
|
pc += inst.length;
|
|
}
|
|
index_to_pc[inst_list.size()] = pc;
|
|
|
|
// Sort labels to make sure block insertion is correct.
|
|
std::ranges::sort(labels);
|
|
}
|
|
|
|
void CFG::SplitDivergenceScopes() {
|
|
const auto is_open_scope = [](const GcnInst& inst) {
|
|
// An open scope instruction is an instruction that modifies EXEC
|
|
// but also saves the previous value to restore later. This indicates
|
|
// we are entering a scope.
|
|
return inst.opcode == Opcode::S_AND_SAVEEXEC_B64 ||
|
|
// While this instruction does not save EXEC it is often used paired
|
|
// with SAVEEXEC to mask the threads that didn't pass the condition
|
|
// of initial branch.
|
|
(inst.opcode == Opcode::S_ANDN2_B64 && inst.dst[0].field == OperandField::ExecLo) ||
|
|
inst.IsCmpx();
|
|
};
|
|
const auto is_close_scope = [](const GcnInst& inst) {
|
|
// Closing an EXEC scope can be either a branch instruction
|
|
// (typical case when S_AND_SAVEEXEC_B64 is right before a branch)
|
|
// or by a move instruction to EXEC that restores the backup.
|
|
return (inst.opcode == Opcode::S_MOV_B64 && inst.dst[0].field == OperandField::ExecLo) ||
|
|
// Sometimes compiler might insert instructions between the SAVEEXEC and the branch.
|
|
// Those instructions need to be wrapped in the condition as well so allow branch
|
|
// as end scope instruction.
|
|
inst.opcode == Opcode::S_CBRANCH_EXECZ || inst.opcode == Opcode::S_ENDPGM ||
|
|
(inst.opcode == Opcode::S_ANDN2_B64 && inst.dst[0].field == OperandField::ExecLo);
|
|
};
|
|
|
|
for (auto blk = blocks.begin(); blk != blocks.end(); blk++) {
|
|
auto next_blk = std::next(blk);
|
|
s32 curr_begin = -1;
|
|
for (size_t index = blk->begin_index; index <= blk->end_index; index++) {
|
|
const auto& inst = inst_list[index];
|
|
const bool is_close = is_close_scope(inst);
|
|
if ((is_close || index == blk->end_index) && curr_begin != -1) {
|
|
// If there are no instructions inside scope don't do anything.
|
|
if (index - curr_begin == 1) {
|
|
curr_begin = -1;
|
|
continue;
|
|
}
|
|
// If all instructions in the scope ignore exec masking, we shouldn't insert a
|
|
// scope.
|
|
const auto start = inst_list.begin() + curr_begin + 1;
|
|
if (!std::ranges::all_of(start, inst_list.begin() + index, IgnoresExecMask)) {
|
|
// Determine the first instruction affected by the exec mask.
|
|
do {
|
|
++curr_begin;
|
|
} while (IgnoresExecMask(inst_list[curr_begin]));
|
|
|
|
// Determine the last instruction affected by the exec mask.
|
|
s32 curr_end = index;
|
|
while (IgnoresExecMask(inst_list[curr_end])) {
|
|
--curr_end;
|
|
}
|
|
|
|
// Create a new block for the divergence scope.
|
|
Block* block = block_pool.Create();
|
|
block->begin = index_to_pc[curr_begin];
|
|
block->end = index_to_pc[curr_end];
|
|
block->begin_index = curr_begin;
|
|
block->end_index = curr_end;
|
|
block->end_inst = inst_list[curr_end];
|
|
blocks.insert_before(next_blk, *block);
|
|
|
|
// If we are inside the parent block, make an epilogue block and jump to it.
|
|
if (curr_end != blk->end_index) {
|
|
Block* epi_block = block_pool.Create();
|
|
epi_block->begin = index_to_pc[curr_end + 1];
|
|
epi_block->end = blk->end;
|
|
epi_block->begin_index = curr_end + 1;
|
|
epi_block->end_index = blk->end_index;
|
|
epi_block->end_inst = blk->end_inst;
|
|
epi_block->cond = blk->cond;
|
|
epi_block->end_class = blk->end_class;
|
|
epi_block->branch_true = blk->branch_true;
|
|
epi_block->branch_false = blk->branch_false;
|
|
blocks.insert_before(next_blk, *epi_block);
|
|
|
|
// Have divergence block always jump to epilogue block.
|
|
block->cond = IR::Condition::True;
|
|
block->branch_true = epi_block;
|
|
block->branch_false = nullptr;
|
|
|
|
// If the parent block fails to enter divergence block make it jump to
|
|
// epilogue too
|
|
blk->branch_false = epi_block;
|
|
} else {
|
|
// No epilogue block is needed since the divergence block
|
|
// also ends the parent block. Inherit the end condition.
|
|
auto& parent_blk = *blk;
|
|
ASSERT(blk->cond == IR::Condition::True && blk->branch_true);
|
|
block->cond = IR::Condition::True;
|
|
block->branch_true = blk->branch_true;
|
|
block->branch_false = nullptr;
|
|
|
|
// If the parent block didn't enter the divergence scope
|
|
// have it jump directly to the next one
|
|
blk->branch_false = blk->branch_true;
|
|
}
|
|
|
|
// Shrink parent block to end right before curr_begin
|
|
// and make it jump to divergence block
|
|
--curr_begin;
|
|
blk->end = index_to_pc[curr_begin];
|
|
blk->end_index = curr_begin;
|
|
blk->end_inst = inst_list[curr_begin];
|
|
blk->cond = IR::Condition::Execnz;
|
|
blk->end_class = EndClass::Branch;
|
|
blk->branch_true = block;
|
|
}
|
|
// Reset scope begin.
|
|
curr_begin = -1;
|
|
}
|
|
// Mark a potential start of an exec scope.
|
|
if (is_open_scope(inst)) {
|
|
curr_begin = index;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void CFG::EmitBlocks() {
|
|
for (auto it = labels.cbegin(); it != labels.cend(); ++it) {
|
|
const Label start = *it;
|
|
const auto next_it = std::next(it);
|
|
const bool is_last = (next_it == labels.cend());
|
|
if (is_last) {
|
|
// Last label is special.
|
|
return;
|
|
}
|
|
// The end label is the start instruction of next block.
|
|
// The end instruction of this block is the previous one.
|
|
const Label end = *next_it;
|
|
const size_t end_index = GetIndex(end) - 1;
|
|
const auto& end_inst = inst_list[end_index];
|
|
|
|
// Insert block between the labels using the last instruction
|
|
// as an indicator for branching type.
|
|
Block* block = block_pool.Create();
|
|
block->begin = start;
|
|
block->end = end;
|
|
block->begin_index = GetIndex(start);
|
|
block->end_index = end_index;
|
|
block->end_inst = end_inst;
|
|
block->cond = MakeCondition(end_inst);
|
|
blocks.insert(*block);
|
|
}
|
|
}
|
|
|
|
void CFG::LinkBlocks() {
|
|
const auto get_block = [this](u32 address) {
|
|
auto it = blocks.find(address, Compare{});
|
|
ASSERT_MSG(it != blocks.cend() && it->begin == address);
|
|
return &*it;
|
|
};
|
|
|
|
for (auto it = blocks.begin(); it != blocks.end(); it++) {
|
|
auto& block = *it;
|
|
const auto end_inst{block.end_inst};
|
|
|
|
// If the block doesn't end with a branch we simply
|
|
// need to link with the next block.
|
|
if (!end_inst.IsTerminateInstruction()) {
|
|
auto* next_block = get_block(block.end);
|
|
++next_block->num_predecessors;
|
|
|
|
block.branch_true = next_block;
|
|
block.end_class = EndClass::Branch;
|
|
continue;
|
|
}
|
|
|
|
// Find the branch targets from the instruction and link the blocks.
|
|
// Note: Block end address is one instruction after end_inst.
|
|
const u32 branch_pc = block.end - end_inst.length;
|
|
const u32 target_pc = end_inst.BranchTarget(branch_pc);
|
|
if (end_inst.IsUnconditionalBranch()) {
|
|
auto* target_block = get_block(target_pc);
|
|
++target_block->num_predecessors;
|
|
|
|
block.branch_true = target_block;
|
|
block.end_class = EndClass::Branch;
|
|
} else if (end_inst.IsConditionalBranch()) {
|
|
auto* target_block = get_block(target_pc);
|
|
++target_block->num_predecessors;
|
|
|
|
auto* end_block = get_block(block.end);
|
|
++end_block->num_predecessors;
|
|
|
|
block.branch_true = target_block;
|
|
block.branch_false = end_block;
|
|
block.end_class = EndClass::Branch;
|
|
} else if (end_inst.opcode == Opcode::S_ENDPGM) {
|
|
const auto& prev_inst = inst_list[block.end_index - 1];
|
|
if (prev_inst.opcode == Opcode::EXP && prev_inst.control.exp.en == 0) {
|
|
if (prev_inst.control.exp.target != 9) {
|
|
block.end_class = EndClass::Kill;
|
|
} else if (const auto& exec_mask = inst_list[block.end_index - 2];
|
|
exec_mask.src[0].field == OperandField::ConstZero) {
|
|
block.end_class = EndClass::Kill;
|
|
} else {
|
|
block.end_class = EndClass::Exit;
|
|
}
|
|
} else {
|
|
block.end_class = EndClass::Exit;
|
|
}
|
|
} else {
|
|
UNREACHABLE();
|
|
}
|
|
}
|
|
}
|
|
|
|
std::string CFG::Dot() const {
|
|
int node_uid{0};
|
|
|
|
const auto name_of = [](const Block& block) { return fmt::format("\"{:#x}\"", block.begin); };
|
|
|
|
std::string dot{"digraph shader {\n"};
|
|
dot += fmt::format("\tsubgraph cluster_{} {{\n", 0);
|
|
dot += fmt::format("\t\tnode [style=filled];\n");
|
|
for (const Block& block : blocks) {
|
|
const std::string name{name_of(block)};
|
|
const auto add_branch = [&](Block* branch, bool add_label) {
|
|
dot += fmt::format("\t\t{}->{}", name, name_of(*branch));
|
|
if (add_label && block.cond != IR::Condition::True &&
|
|
block.cond != IR::Condition::False) {
|
|
dot += fmt::format(" [label=\"{}\"]", block.cond);
|
|
}
|
|
dot += '\n';
|
|
};
|
|
dot += fmt::format("\t\t{};\n", name);
|
|
switch (block.end_class) {
|
|
case EndClass::Branch:
|
|
if (block.cond != IR::Condition::False) {
|
|
add_branch(block.branch_true, true);
|
|
}
|
|
if (block.cond != IR::Condition::True) {
|
|
add_branch(block.branch_false, false);
|
|
}
|
|
break;
|
|
case EndClass::Exit:
|
|
dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
|
|
dot +=
|
|
fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n", node_uid);
|
|
++node_uid;
|
|
break;
|
|
case EndClass::Kill:
|
|
dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
|
|
dot +=
|
|
fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n", node_uid);
|
|
++node_uid;
|
|
break;
|
|
}
|
|
}
|
|
dot += "\t\tlabel = \"main\";\n\t}\n";
|
|
if (blocks.empty()) {
|
|
dot += "Start;\n";
|
|
} else {
|
|
dot += fmt::format("\tStart -> {};\n", name_of(*blocks.begin()));
|
|
}
|
|
dot += fmt::format("\tStart [shape=diamond];\n");
|
|
dot += "}\n";
|
|
return dot;
|
|
}
|
|
|
|
} // namespace Shader::Gcn
|