diff --git a/CMakeLists.txt b/CMakeLists.txt index f05587d38..185205221 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -771,6 +771,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/passes/identity_removal_pass.cpp src/shader_recompiler/ir/passes/ir_passes.h src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp + src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp src/shader_recompiler/ir/passes/resource_tracking_pass.cpp src/shader_recompiler/ir/passes/ring_access_elimination.cpp src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -1121,7 +1122,6 @@ cmrc_add_resource_library(embedded-resources src/images/gold.png src/images/platinum.png src/images/silver.png) - target_link_libraries(shadps4 PRIVATE res::embedded) # ImGui resources diff --git a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp index c8a4b13cb..5c66b1115 100644 --- a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp @@ -251,54 +251,6 @@ void FoldCmpClass(IR::Block& block, IR::Inst& inst) { } } -void FoldReadLane(IR::Block& block, IR::Inst& inst) { - const u32 lane = inst.Arg(1).U32(); - IR::Inst* prod = inst.Arg(0).InstRecursive(); - - const auto search_chain = [lane](const IR::Inst* prod) -> IR::Value { - while (prod->GetOpcode() == IR::Opcode::WriteLane) { - if (prod->Arg(2).U32() == lane) { - return prod->Arg(1); - } - prod = prod->Arg(0).InstRecursive(); - } - return {}; - }; - - if (prod->GetOpcode() == IR::Opcode::WriteLane) { - if (const IR::Value value = search_chain(prod); !value.IsEmpty()) { - inst.ReplaceUsesWith(value); - } - return; - } - - if (prod->GetOpcode() == IR::Opcode::Phi) { - boost::container::small_vector phi_args; - for (size_t arg_index = 0; arg_index < prod->NumArgs(); ++arg_index) { - const IR::Inst* arg{prod->Arg(arg_index).InstRecursive()}; - if (arg->GetOpcode() != IR::Opcode::WriteLane) { - return; - } - const IR::Value value = search_chain(arg); - if (value.IsEmpty()) { - continue; - } - phi_args.emplace_back(value); - } - if (std::ranges::all_of(phi_args, [&](IR::Value value) { return value == phi_args[0]; })) { - inst.ReplaceUsesWith(phi_args[0]); - return; - } - const auto insert_point = IR::Block::InstructionList::s_iterator_to(*prod); - IR::Inst* const new_phi{&*block.PrependNewInst(insert_point, IR::Opcode::Phi)}; - new_phi->SetFlags(IR::Type::U32); - for (size_t arg_index = 0; arg_index < phi_args.size(); arg_index++) { - new_phi->AddPhiOperand(prod->PhiBlock(arg_index), phi_args[arg_index]); - } - inst.ReplaceUsesWith(IR::Value{new_phi}); - } -} - void ConstantPropagation(IR::Block& block, IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::IAdd32: @@ -408,8 +360,6 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::SelectF32: case IR::Opcode::SelectF64: return FoldSelect(inst); - case IR::Opcode::ReadLane: - return FoldReadLane(block, inst); case IR::Opcode::FPNeg32: FoldWhenAllImmediates(inst, [](f32 a) { return -a; }); return; diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h index 1567d923c..760dbb112 100644 --- a/src/shader_recompiler/ir/passes/ir_passes.h +++ b/src/shader_recompiler/ir/passes/ir_passes.h @@ -17,6 +17,7 @@ void IdentityRemovalPass(IR::BlockList& program); void DeadCodeEliminationPass(IR::Program& program); void ConstantPropagationPass(IR::BlockList& program); void FlattenExtendedUserdataPass(IR::Program& program); +void ReadLaneEliminationPass(IR::Program& program); void ResourceTrackingPass(IR::Program& program); void CollectShaderInfoPass(IR::Program& program); void LowerBufferFormatToRaw(IR::Program& program); diff --git a/src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp b/src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp new file mode 100644 index 000000000..fbe382d41 --- /dev/null +++ b/src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp @@ -0,0 +1,115 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/ir/program.h" + +namespace Shader::Optimization { + +static IR::Inst* SearchChain(IR::Inst* inst, u32 lane) { + while (inst->GetOpcode() == IR::Opcode::WriteLane) { + if (inst->Arg(2).U32() == lane) { + // We found a possible write lane source, return it. + return inst; + } + inst = inst->Arg(0).InstRecursive(); + } + return inst; +} + +static bool IsPossibleToEliminate(IR::Inst* inst, u32 lane) { + // Breadth-first search visiting the right most arguments first + boost::container::small_vector visited; + std::queue queue; + queue.push(inst); + + while (!queue.empty()) { + // Pop one instruction from the queue + IR::Inst* inst{queue.front()}; + queue.pop(); + + // If it's a WriteLane search for possible candidates + if (inst = SearchChain(inst, lane); inst->GetOpcode() == IR::Opcode::WriteLane) { + // We found a possible write lane source, stop looking here. + continue; + } + // If there are other instructions in-between that use the value we can't eliminate. + if (inst->GetOpcode() != IR::Opcode::ReadLane && inst->GetOpcode() != IR::Opcode::Phi) { + return false; + } + // Visit the right most arguments first + for (size_t arg = inst->NumArgs(); arg--;) { + auto arg_value{inst->Arg(arg)}; + if (arg_value.IsImmediate()) { + continue; + } + // Queue instruction if it hasn't been visited + IR::Inst* arg_inst{arg_value.InstRecursive()}; + if (std::ranges::find(visited, arg_inst) == visited.end()) { + visited.push_back(arg_inst); + queue.push(arg_inst); + } + } + } + return true; +} + +using PhiMap = std::unordered_map; + +static IR::Value GetRealValue(PhiMap& phi_map, IR::Inst* inst, u32 lane) { + // If this is a WriteLane op search the chain for a possible candidate. + if (inst = SearchChain(inst, lane); inst->GetOpcode() == IR::Opcode::WriteLane) { + return inst->Arg(1); + } + + // If this is a phi, duplicate it and populate its arguments with real values. + if (inst->GetOpcode() == IR::Opcode::Phi) { + // We are in a phi cycle, use the already duplicated phi. + const auto [it, is_new_phi] = phi_map.try_emplace(inst); + if (!is_new_phi) { + return IR::Value{it->second}; + } + + // Create new phi and insert it right before the old one. + const auto insert_point = IR::Block::InstructionList::s_iterator_to(*inst); + IR::Block* block = inst->GetParent(); + IR::Inst* new_phi{&*block->PrependNewInst(insert_point, IR::Opcode::Phi)}; + new_phi->SetFlags(IR::Type::U32); + it->second = new_phi; + + // Gather all arguments. + for (size_t arg_index = 0; arg_index < inst->NumArgs(); arg_index++) { + IR::Inst* arg_prod = inst->Arg(arg_index).InstRecursive(); + const IR::Value arg = GetRealValue(phi_map, arg_prod, lane); + new_phi->AddPhiOperand(inst->PhiBlock(arg_index), arg); + } + return IR::Value{new_phi}; + } + UNREACHABLE(); +} + +void ReadLaneEliminationPass(IR::Program& program) { + PhiMap phi_map; + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (inst.GetOpcode() != IR::Opcode::ReadLane) { + continue; + } + const u32 lane = inst.Arg(1).U32(); + IR::Inst* prod = inst.Arg(0).InstRecursive(); + + // Check simple case of no control flow and phis + if (prod = SearchChain(prod, lane); prod->GetOpcode() == IR::Opcode::WriteLane) { + inst.ReplaceUsesWith(prod->Arg(1)); + continue; + } + + // Traverse the phi tree to see if it's possible to eliminate + if (prod->GetOpcode() == IR::Opcode::Phi && IsPossibleToEliminate(prod, lane)) { + inst.ReplaceUsesWith(GetRealValue(phi_map, prod, lane)); + phi_map.clear(); + } + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index 5f87470b0..5004e0beb 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -71,6 +71,7 @@ IR::Program TranslateProgram(std::span code, Pools& pools, Info& info Shader::Optimization::DomainShaderTransform(program, runtime_info); } Shader::Optimization::RingAccessElimination(program, runtime_info); + Shader::Optimization::ReadLaneEliminationPass(program); Shader::Optimization::FlattenExtendedUserdataPass(program); Shader::Optimization::ResourceTrackingPass(program); Shader::Optimization::LowerBufferFormatToRaw(program);