shadPS4/src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp
TheTurtle 1f9ac53c28
Some checks are pending
Build and Release / reuse (push) Waiting to run
Build and Release / clang-format (push) Waiting to run
Build and Release / get-info (push) Waiting to run
Build and Release / windows-sdl (push) Blocked by required conditions
Build and Release / windows-qt (push) Blocked by required conditions
Build and Release / macos-sdl (push) Blocked by required conditions
Build and Release / macos-qt (push) Blocked by required conditions
Build and Release / linux-sdl (push) Blocked by required conditions
Build and Release / linux-qt (push) Blocked by required conditions
Build and Release / linux-sdl-gcc (push) Blocked by required conditions
Build and Release / linux-qt-gcc (push) Blocked by required conditions
Build and Release / pre-release (push) Blocked by required conditions
shader_recompiler: Improve divergence handling and readlane elimintation (#2667)
* control_flow_graph: Improve divergence handling

* recompiler: Simplify optimization passes

Removes a redudant constant propagation and cleans up the passes a little

* ir_passes: Add new readlane elimination pass

The algorithm has grown complex enough where it deserves its own pass. The old implementation could only handle a single phi level properly,
however this one should be able to eliminate vast majority of lane cases remaining. It first performs a traversal of the phi tree to ensure
that all phi sources can be rewritten into an expected value and then performs elimintation by recursively duplicating the phi nodes at each step,
in order to preserve control flow.

* clang format

* control_flow_graph: Remove debug code
2025-03-23 00:35:42 +02:00

116 lines
4.2 KiB
C++

// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/ir/program.h"
namespace Shader::Optimization {
static IR::Inst* SearchChain(IR::Inst* inst, u32 lane) {
while (inst->GetOpcode() == IR::Opcode::WriteLane) {
if (inst->Arg(2).U32() == lane) {
// We found a possible write lane source, return it.
return inst;
}
inst = inst->Arg(0).InstRecursive();
}
return inst;
}
static bool IsPossibleToEliminate(IR::Inst* inst, u32 lane) {
// Breadth-first search visiting the right most arguments first
boost::container::small_vector<IR::Inst*, 16> visited;
std::queue<IR::Inst*> queue;
queue.push(inst);
while (!queue.empty()) {
// Pop one instruction from the queue
IR::Inst* inst{queue.front()};
queue.pop();
// If it's a WriteLane search for possible candidates
if (inst = SearchChain(inst, lane); inst->GetOpcode() == IR::Opcode::WriteLane) {
// We found a possible write lane source, stop looking here.
continue;
}
// If there are other instructions in-between that use the value we can't eliminate.
if (inst->GetOpcode() != IR::Opcode::ReadLane && inst->GetOpcode() != IR::Opcode::Phi) {
return false;
}
// Visit the right most arguments first
for (size_t arg = inst->NumArgs(); arg--;) {
auto arg_value{inst->Arg(arg)};
if (arg_value.IsImmediate()) {
continue;
}
// Queue instruction if it hasn't been visited
IR::Inst* arg_inst{arg_value.InstRecursive()};
if (std::ranges::find(visited, arg_inst) == visited.end()) {
visited.push_back(arg_inst);
queue.push(arg_inst);
}
}
}
return true;
}
using PhiMap = std::unordered_map<IR::Inst*, IR::Inst*>;
static IR::Value GetRealValue(PhiMap& phi_map, IR::Inst* inst, u32 lane) {
// If this is a WriteLane op search the chain for a possible candidate.
if (inst = SearchChain(inst, lane); inst->GetOpcode() == IR::Opcode::WriteLane) {
return inst->Arg(1);
}
// If this is a phi, duplicate it and populate its arguments with real values.
if (inst->GetOpcode() == IR::Opcode::Phi) {
// We are in a phi cycle, use the already duplicated phi.
const auto [it, is_new_phi] = phi_map.try_emplace(inst);
if (!is_new_phi) {
return IR::Value{it->second};
}
// Create new phi and insert it right before the old one.
const auto insert_point = IR::Block::InstructionList::s_iterator_to(*inst);
IR::Block* block = inst->GetParent();
IR::Inst* new_phi{&*block->PrependNewInst(insert_point, IR::Opcode::Phi)};
new_phi->SetFlags(IR::Type::U32);
it->second = new_phi;
// Gather all arguments.
for (size_t arg_index = 0; arg_index < inst->NumArgs(); arg_index++) {
IR::Inst* arg_prod = inst->Arg(arg_index).InstRecursive();
const IR::Value arg = GetRealValue(phi_map, arg_prod, lane);
new_phi->AddPhiOperand(inst->PhiBlock(arg_index), arg);
}
return IR::Value{new_phi};
}
UNREACHABLE();
}
void ReadLaneEliminationPass(IR::Program& program) {
PhiMap phi_map;
for (IR::Block* const block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
if (inst.GetOpcode() != IR::Opcode::ReadLane) {
continue;
}
const u32 lane = inst.Arg(1).U32();
IR::Inst* prod = inst.Arg(0).InstRecursive();
// Check simple case of no control flow and phis
if (prod = SearchChain(prod, lane); prod->GetOpcode() == IR::Opcode::WriteLane) {
inst.ReplaceUsesWith(prod->Arg(1));
continue;
}
// Traverse the phi tree to see if it's possible to eliminate
if (prod->GetOpcode() == IR::Opcode::Phi && IsPossibleToEliminate(prod, lane)) {
inst.ReplaceUsesWith(GetRealValue(phi_map, prod, lane));
phi_map.clear();
}
}
}
}
} // namespace Shader::Optimization