From 8beb54f8d5ce81a9cbdedc15c973d4b024a1e1e1 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Sat, 22 Mar 2025 23:07:21 +0200 Subject: [PATCH] ir_passes: Add new readlane elimination pass The algorithm has grown complex enough where it deserves its own pass. The old implementation could only handle a single phi level properly, however this one should be able to eliminate vast majority of lane cases remaining. It first performs a traversal of the phi tree to ensure that all phi sources can be rewritten into an expected value and then performs elimintation by recursively duplicating the phi nodes at each step, in order to preserve control flow. --- CMakeLists.txt | 2 +- .../ir/passes/constant_propagation_pass.cpp | 50 -------- src/shader_recompiler/ir/passes/ir_passes.h | 1 + .../ir/passes/readlane_elimination_pass.cpp | 115 ++++++++++++++++++ src/shader_recompiler/recompiler.cpp | 1 + 5 files changed, 118 insertions(+), 51 deletions(-) create mode 100644 src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index f05587d38..185205221 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -771,6 +771,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/passes/identity_removal_pass.cpp src/shader_recompiler/ir/passes/ir_passes.h src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp + src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp src/shader_recompiler/ir/passes/resource_tracking_pass.cpp src/shader_recompiler/ir/passes/ring_access_elimination.cpp src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -1121,7 +1122,6 @@ cmrc_add_resource_library(embedded-resources src/images/gold.png src/images/platinum.png src/images/silver.png) - target_link_libraries(shadps4 PRIVATE res::embedded) # ImGui resources diff --git a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp index c8a4b13cb..5c66b1115 100644 --- a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp @@ -251,54 +251,6 @@ void FoldCmpClass(IR::Block& block, IR::Inst& inst) { } } -void FoldReadLane(IR::Block& block, IR::Inst& inst) { - const u32 lane = inst.Arg(1).U32(); - IR::Inst* prod = inst.Arg(0).InstRecursive(); - - const auto search_chain = [lane](const IR::Inst* prod) -> IR::Value { - while (prod->GetOpcode() == IR::Opcode::WriteLane) { - if (prod->Arg(2).U32() == lane) { - return prod->Arg(1); - } - prod = prod->Arg(0).InstRecursive(); - } - return {}; - }; - - if (prod->GetOpcode() == IR::Opcode::WriteLane) { - if (const IR::Value value = search_chain(prod); !value.IsEmpty()) { - inst.ReplaceUsesWith(value); - } - return; - } - - if (prod->GetOpcode() == IR::Opcode::Phi) { - boost::container::small_vector phi_args; - for (size_t arg_index = 0; arg_index < prod->NumArgs(); ++arg_index) { - const IR::Inst* arg{prod->Arg(arg_index).InstRecursive()}; - if (arg->GetOpcode() != IR::Opcode::WriteLane) { - return; - } - const IR::Value value = search_chain(arg); - if (value.IsEmpty()) { - continue; - } - phi_args.emplace_back(value); - } - if (std::ranges::all_of(phi_args, [&](IR::Value value) { return value == phi_args[0]; })) { - inst.ReplaceUsesWith(phi_args[0]); - return; - } - const auto insert_point = IR::Block::InstructionList::s_iterator_to(*prod); - IR::Inst* const new_phi{&*block.PrependNewInst(insert_point, IR::Opcode::Phi)}; - new_phi->SetFlags(IR::Type::U32); - for (size_t arg_index = 0; arg_index < phi_args.size(); arg_index++) { - new_phi->AddPhiOperand(prod->PhiBlock(arg_index), phi_args[arg_index]); - } - inst.ReplaceUsesWith(IR::Value{new_phi}); - } -} - void ConstantPropagation(IR::Block& block, IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::IAdd32: @@ -408,8 +360,6 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::SelectF32: case IR::Opcode::SelectF64: return FoldSelect(inst); - case IR::Opcode::ReadLane: - return FoldReadLane(block, inst); case IR::Opcode::FPNeg32: FoldWhenAllImmediates(inst, [](f32 a) { return -a; }); return; diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h index 1567d923c..760dbb112 100644 --- a/src/shader_recompiler/ir/passes/ir_passes.h +++ b/src/shader_recompiler/ir/passes/ir_passes.h @@ -17,6 +17,7 @@ void IdentityRemovalPass(IR::BlockList& program); void DeadCodeEliminationPass(IR::Program& program); void ConstantPropagationPass(IR::BlockList& program); void FlattenExtendedUserdataPass(IR::Program& program); +void ReadLaneEliminationPass(IR::Program& program); void ResourceTrackingPass(IR::Program& program); void CollectShaderInfoPass(IR::Program& program); void LowerBufferFormatToRaw(IR::Program& program); diff --git a/src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp b/src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp new file mode 100644 index 000000000..fbe382d41 --- /dev/null +++ b/src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp @@ -0,0 +1,115 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/ir/program.h" + +namespace Shader::Optimization { + +static IR::Inst* SearchChain(IR::Inst* inst, u32 lane) { + while (inst->GetOpcode() == IR::Opcode::WriteLane) { + if (inst->Arg(2).U32() == lane) { + // We found a possible write lane source, return it. + return inst; + } + inst = inst->Arg(0).InstRecursive(); + } + return inst; +} + +static bool IsPossibleToEliminate(IR::Inst* inst, u32 lane) { + // Breadth-first search visiting the right most arguments first + boost::container::small_vector visited; + std::queue queue; + queue.push(inst); + + while (!queue.empty()) { + // Pop one instruction from the queue + IR::Inst* inst{queue.front()}; + queue.pop(); + + // If it's a WriteLane search for possible candidates + if (inst = SearchChain(inst, lane); inst->GetOpcode() == IR::Opcode::WriteLane) { + // We found a possible write lane source, stop looking here. + continue; + } + // If there are other instructions in-between that use the value we can't eliminate. + if (inst->GetOpcode() != IR::Opcode::ReadLane && inst->GetOpcode() != IR::Opcode::Phi) { + return false; + } + // Visit the right most arguments first + for (size_t arg = inst->NumArgs(); arg--;) { + auto arg_value{inst->Arg(arg)}; + if (arg_value.IsImmediate()) { + continue; + } + // Queue instruction if it hasn't been visited + IR::Inst* arg_inst{arg_value.InstRecursive()}; + if (std::ranges::find(visited, arg_inst) == visited.end()) { + visited.push_back(arg_inst); + queue.push(arg_inst); + } + } + } + return true; +} + +using PhiMap = std::unordered_map; + +static IR::Value GetRealValue(PhiMap& phi_map, IR::Inst* inst, u32 lane) { + // If this is a WriteLane op search the chain for a possible candidate. + if (inst = SearchChain(inst, lane); inst->GetOpcode() == IR::Opcode::WriteLane) { + return inst->Arg(1); + } + + // If this is a phi, duplicate it and populate its arguments with real values. + if (inst->GetOpcode() == IR::Opcode::Phi) { + // We are in a phi cycle, use the already duplicated phi. + const auto [it, is_new_phi] = phi_map.try_emplace(inst); + if (!is_new_phi) { + return IR::Value{it->second}; + } + + // Create new phi and insert it right before the old one. + const auto insert_point = IR::Block::InstructionList::s_iterator_to(*inst); + IR::Block* block = inst->GetParent(); + IR::Inst* new_phi{&*block->PrependNewInst(insert_point, IR::Opcode::Phi)}; + new_phi->SetFlags(IR::Type::U32); + it->second = new_phi; + + // Gather all arguments. + for (size_t arg_index = 0; arg_index < inst->NumArgs(); arg_index++) { + IR::Inst* arg_prod = inst->Arg(arg_index).InstRecursive(); + const IR::Value arg = GetRealValue(phi_map, arg_prod, lane); + new_phi->AddPhiOperand(inst->PhiBlock(arg_index), arg); + } + return IR::Value{new_phi}; + } + UNREACHABLE(); +} + +void ReadLaneEliminationPass(IR::Program& program) { + PhiMap phi_map; + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (inst.GetOpcode() != IR::Opcode::ReadLane) { + continue; + } + const u32 lane = inst.Arg(1).U32(); + IR::Inst* prod = inst.Arg(0).InstRecursive(); + + // Check simple case of no control flow and phis + if (prod = SearchChain(prod, lane); prod->GetOpcode() == IR::Opcode::WriteLane) { + inst.ReplaceUsesWith(prod->Arg(1)); + continue; + } + + // Traverse the phi tree to see if it's possible to eliminate + if (prod->GetOpcode() == IR::Opcode::Phi && IsPossibleToEliminate(prod, lane)) { + inst.ReplaceUsesWith(GetRealValue(phi_map, prod, lane)); + phi_map.clear(); + } + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index 5f87470b0..5004e0beb 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -71,6 +71,7 @@ IR::Program TranslateProgram(std::span code, Pools& pools, Info& info Shader::Optimization::DomainShaderTransform(program, runtime_info); } Shader::Optimization::RingAccessElimination(program, runtime_info); + Shader::Optimization::ReadLaneEliminationPass(program); Shader::Optimization::FlattenExtendedUserdataPass(program); Shader::Optimization::ResourceTrackingPass(program); Shader::Optimization::LowerBufferFormatToRaw(program);