shadPS4/src/shader_recompiler/recompiler.cpp
TheTurtle 1f9ac53c28
Some checks are pending
Build and Release / reuse (push) Waiting to run
Build and Release / clang-format (push) Waiting to run
Build and Release / get-info (push) Waiting to run
Build and Release / windows-sdl (push) Blocked by required conditions
Build and Release / windows-qt (push) Blocked by required conditions
Build and Release / macos-sdl (push) Blocked by required conditions
Build and Release / macos-qt (push) Blocked by required conditions
Build and Release / linux-sdl (push) Blocked by required conditions
Build and Release / linux-qt (push) Blocked by required conditions
Build and Release / linux-sdl-gcc (push) Blocked by required conditions
Build and Release / linux-qt-gcc (push) Blocked by required conditions
Build and Release / pre-release (push) Blocked by required conditions
shader_recompiler: Improve divergence handling and readlane elimintation (#2667)
* control_flow_graph: Improve divergence handling

* recompiler: Simplify optimization passes

Removes a redudant constant propagation and cleans up the passes a little

* ir_passes: Add new readlane elimination pass

The algorithm has grown complex enough where it deserves its own pass. The old implementation could only handle a single phi level properly,
however this one should be able to eliminate vast majority of lane cases remaining. It first performs a traversal of the phi tree to ensure
that all phi sources can be rewritten into an expected value and then performs elimintation by recursively duplicating the phi nodes at each step,
in order to preserve control flow.

* clang format

* control_flow_graph: Remove debug code
2025-03-23 00:35:42 +02:00

89 lines
3.7 KiB
C++

// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/frontend/control_flow_graph.h"
#include "shader_recompiler/frontend/decode.h"
#include "shader_recompiler/frontend/structured_control_flow.h"
#include "shader_recompiler/ir/passes/ir_passes.h"
#include "shader_recompiler/ir/post_order.h"
#include "shader_recompiler/recompiler.h"
namespace Shader {
IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
size_t num_syntax_blocks{};
for (const auto& node : syntax_list) {
if (node.type == IR::AbstractSyntaxNode::Type::Block) {
++num_syntax_blocks;
}
}
IR::BlockList blocks;
blocks.reserve(num_syntax_blocks);
u32 order_index{};
for (const auto& node : syntax_list) {
if (node.type == IR::AbstractSyntaxNode::Type::Block) {
blocks.push_back(node.data.block);
}
}
return blocks;
}
IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info,
RuntimeInfo& runtime_info, const Profile& profile) {
// Ensure first instruction is expected.
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
if (code[0] != token_mov_vcchi) {
LOG_WARNING(Render_Recompiler, "First instruction is not s_mov_b32 vcc_hi, #imm");
}
Gcn::GcnCodeSlice slice(code.data(), code.data() + code.size());
Gcn::GcnDecodeContext decoder;
// Decode and save instructions
IR::Program program{info};
program.ins_list.reserve(code.size());
while (!slice.atEnd()) {
program.ins_list.emplace_back(decoder.decodeInstruction(slice));
}
// Clear any previous pooled data.
pools.ReleaseContents();
// Create control flow graph
Common::ObjectPool<Gcn::Block> gcn_block_pool{64};
Gcn::CFG cfg{gcn_block_pool, program.ins_list};
// Structurize control flow graph and create program.
program.syntax_list = Shader::Gcn::BuildASL(pools.inst_pool, pools.block_pool, cfg,
program.info, runtime_info, profile);
program.blocks = GenerateBlocks(program.syntax_list);
program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());
// Run optimization passes
Shader::Optimization::SsaRewritePass(program.post_order_blocks);
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
Shader::Optimization::IdentityRemovalPass(program.blocks);
if (info.l_stage == LogicalStage::TessellationControl) {
Shader::Optimization::TessellationPreprocess(program, runtime_info);
Shader::Optimization::HullShaderTransform(program, runtime_info);
} else if (info.l_stage == LogicalStage::TessellationEval) {
Shader::Optimization::TessellationPreprocess(program, runtime_info);
Shader::Optimization::DomainShaderTransform(program, runtime_info);
}
Shader::Optimization::RingAccessElimination(program, runtime_info);
Shader::Optimization::ReadLaneEliminationPass(program);
Shader::Optimization::FlattenExtendedUserdataPass(program);
Shader::Optimization::ResourceTrackingPass(program);
Shader::Optimization::LowerBufferFormatToRaw(program);
Shader::Optimization::SharedMemoryToStoragePass(program, runtime_info, profile);
Shader::Optimization::SharedMemoryBarrierPass(program, runtime_info, profile);
Shader::Optimization::IdentityRemovalPass(program.blocks);
Shader::Optimization::DeadCodeEliminationPass(program);
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
Shader::Optimization::CollectShaderInfoPass(program);
return program;
}
} // namespace Shader