mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-12-15 16:18:56 +00:00
video: Import new shader recompiler + display a triangle (#142)
This commit is contained in:
403
src/shader_recompiler/ir/passes/constant_propogation_pass.cpp
Normal file
403
src/shader_recompiler/ir/passes/constant_propogation_pass.cpp
Normal file
@@ -0,0 +1,403 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <bit>
|
||||
#include <optional>
|
||||
#include <type_traits>
|
||||
#include "common/func_traits.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
template <typename T>
|
||||
[[nodiscard]] T Arg(const IR::Value& value) {
|
||||
if constexpr (std::is_same_v<T, bool>) {
|
||||
return value.U1();
|
||||
} else if constexpr (std::is_same_v<T, u32>) {
|
||||
return value.U32();
|
||||
} else if constexpr (std::is_same_v<T, s32>) {
|
||||
return static_cast<s32>(value.U32());
|
||||
} else if constexpr (std::is_same_v<T, f32>) {
|
||||
return value.F32();
|
||||
} else if constexpr (std::is_same_v<T, u64>) {
|
||||
return value.U64();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Func, size_t... I>
|
||||
IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence<I...>) {
|
||||
using Traits = Common::LambdaTraits<decltype(func)>;
|
||||
return IR::Value{func(Arg<typename Traits::template ArgType<I>>(inst.Arg(I))...)};
|
||||
}
|
||||
|
||||
template <typename T, typename ImmFn>
|
||||
bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) {
|
||||
const IR::Value lhs{inst.Arg(0)};
|
||||
const IR::Value rhs{inst.Arg(1)};
|
||||
|
||||
const bool is_lhs_immediate{lhs.IsImmediate()};
|
||||
const bool is_rhs_immediate{rhs.IsImmediate()};
|
||||
|
||||
if (is_lhs_immediate && is_rhs_immediate) {
|
||||
const auto result{imm_fn(Arg<T>(lhs), Arg<T>(rhs))};
|
||||
inst.ReplaceUsesWith(IR::Value{result});
|
||||
return false;
|
||||
}
|
||||
if (is_lhs_immediate && !is_rhs_immediate) {
|
||||
IR::Inst* const rhs_inst{rhs.InstRecursive()};
|
||||
if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->Arg(1).IsImmediate()) {
|
||||
const auto combined{imm_fn(Arg<T>(lhs), Arg<T>(rhs_inst->Arg(1)))};
|
||||
inst.SetArg(0, rhs_inst->Arg(0));
|
||||
inst.SetArg(1, IR::Value{combined});
|
||||
} else {
|
||||
// Normalize
|
||||
inst.SetArg(0, rhs);
|
||||
inst.SetArg(1, lhs);
|
||||
}
|
||||
}
|
||||
if (!is_lhs_immediate && is_rhs_immediate) {
|
||||
const IR::Inst* const lhs_inst{lhs.InstRecursive()};
|
||||
if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->Arg(1).IsImmediate()) {
|
||||
const auto combined{imm_fn(Arg<T>(rhs), Arg<T>(lhs_inst->Arg(1)))};
|
||||
inst.SetArg(0, lhs_inst->Arg(0));
|
||||
inst.SetArg(1, IR::Value{combined});
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename Func>
|
||||
bool FoldWhenAllImmediates(IR::Inst& inst, Func&& func) {
|
||||
if (!inst.AreAllArgsImmediates() /*|| inst.HasAssociatedPseudoOperation()*/) {
|
||||
return false;
|
||||
}
|
||||
using Indices = std::make_index_sequence<Common::LambdaTraits<decltype(func)>::NUM_ARGS>;
|
||||
inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{}));
|
||||
return true;
|
||||
}
|
||||
|
||||
template <IR::Opcode op, typename Dest, typename Source>
|
||||
void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
|
||||
const IR::Value value{inst.Arg(0)};
|
||||
if (value.IsImmediate()) {
|
||||
inst.ReplaceUsesWith(IR::Value{std::bit_cast<Dest>(Arg<Source>(value))});
|
||||
return;
|
||||
}
|
||||
IR::Inst* const arg_inst{value.InstRecursive()};
|
||||
if (arg_inst->GetOpcode() == reverse) {
|
||||
inst.ReplaceUsesWith(arg_inst->Arg(0));
|
||||
return;
|
||||
}
|
||||
if constexpr (op == IR::Opcode::BitCastF32U32) {
|
||||
if (arg_inst->GetOpcode() == IR::Opcode::ReadConstBuffer) {
|
||||
// Replace the bitcast with a typed constant buffer read
|
||||
inst.ReplaceOpcode(IR::Opcode::ReadConstBufferF32);
|
||||
inst.SetArg(0, arg_inst->Arg(0));
|
||||
inst.SetArg(1, arg_inst->Arg(1));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<IR::Value> FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert,
|
||||
IR::Opcode construct, u32 first_index) {
|
||||
IR::Inst* const inst{inst_value.InstRecursive()};
|
||||
if (inst->GetOpcode() == construct) {
|
||||
return inst->Arg(first_index);
|
||||
}
|
||||
if (inst->GetOpcode() != insert) {
|
||||
return std::nullopt;
|
||||
}
|
||||
IR::Value value_index{inst->Arg(2)};
|
||||
if (!value_index.IsImmediate()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
const u32 second_index{value_index.U32()};
|
||||
if (first_index != second_index) {
|
||||
IR::Value value_composite{inst->Arg(0)};
|
||||
if (value_composite.IsImmediate()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return FoldCompositeExtractImpl(value_composite, insert, construct, first_index);
|
||||
}
|
||||
return inst->Arg(1);
|
||||
}
|
||||
|
||||
void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode insert) {
|
||||
const IR::Value value_1{inst.Arg(0)};
|
||||
const IR::Value value_2{inst.Arg(1)};
|
||||
if (value_1.IsImmediate()) {
|
||||
return;
|
||||
}
|
||||
if (!value_2.IsImmediate()) {
|
||||
return;
|
||||
}
|
||||
const u32 first_index{value_2.U32()};
|
||||
const std::optional result{FoldCompositeExtractImpl(value_1, insert, construct, first_index)};
|
||||
if (!result) {
|
||||
return;
|
||||
}
|
||||
inst.ReplaceUsesWith(*result);
|
||||
}
|
||||
|
||||
void FoldConvert(IR::Inst& inst, IR::Opcode opposite) {
|
||||
const IR::Value value{inst.Arg(0)};
|
||||
if (value.IsImmediate()) {
|
||||
return;
|
||||
}
|
||||
IR::Inst* const producer{value.InstRecursive()};
|
||||
if (producer->GetOpcode() == opposite) {
|
||||
inst.ReplaceUsesWith(producer->Arg(0));
|
||||
}
|
||||
}
|
||||
|
||||
void FoldLogicalAnd(IR::Inst& inst) {
|
||||
if (!FoldCommutative<bool>(inst, [](bool a, bool b) { return a && b; })) {
|
||||
return;
|
||||
}
|
||||
const IR::Value rhs{inst.Arg(1)};
|
||||
if (rhs.IsImmediate()) {
|
||||
if (rhs.U1()) {
|
||||
inst.ReplaceUsesWith(inst.Arg(0));
|
||||
} else {
|
||||
inst.ReplaceUsesWith(IR::Value{false});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FoldSelect(IR::Inst& inst) {
|
||||
const IR::Value cond{inst.Arg(0)};
|
||||
if (cond.IsImmediate()) {
|
||||
inst.ReplaceUsesWith(cond.U1() ? inst.Arg(1) : inst.Arg(2));
|
||||
}
|
||||
}
|
||||
|
||||
void FoldLogicalOr(IR::Inst& inst) {
|
||||
if (!FoldCommutative<bool>(inst, [](bool a, bool b) { return a || b; })) {
|
||||
return;
|
||||
}
|
||||
const IR::Value rhs{inst.Arg(1)};
|
||||
if (rhs.IsImmediate()) {
|
||||
if (rhs.U1()) {
|
||||
inst.ReplaceUsesWith(IR::Value{true});
|
||||
} else {
|
||||
inst.ReplaceUsesWith(inst.Arg(0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FoldLogicalNot(IR::Inst& inst) {
|
||||
const IR::U1 value{inst.Arg(0)};
|
||||
if (value.IsImmediate()) {
|
||||
inst.ReplaceUsesWith(IR::Value{!value.U1()});
|
||||
return;
|
||||
}
|
||||
IR::Inst* const arg{value.InstRecursive()};
|
||||
if (arg->GetOpcode() == IR::Opcode::LogicalNot) {
|
||||
inst.ReplaceUsesWith(arg->Arg(0));
|
||||
}
|
||||
}
|
||||
|
||||
void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) {
|
||||
const IR::Value value{inst.Arg(0)};
|
||||
if (value.IsImmediate()) {
|
||||
return;
|
||||
}
|
||||
IR::Inst* const arg_inst{value.InstRecursive()};
|
||||
if (arg_inst->GetOpcode() == reverse) {
|
||||
inst.ReplaceUsesWith(arg_inst->Arg(0));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void FoldAdd(IR::Block& block, IR::Inst& inst) {
|
||||
if (!FoldCommutative<T>(inst, [](T a, T b) { return a + b; })) {
|
||||
return;
|
||||
}
|
||||
const IR::Value rhs{inst.Arg(1)};
|
||||
if (rhs.IsImmediate() && Arg<T>(rhs) == 0) {
|
||||
inst.ReplaceUsesWith(inst.Arg(0));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
template <u32 idx>
|
||||
bool IsArgImm(const IR::Inst& inst, u32 imm) {
|
||||
const IR::Value& arg = inst.Arg(idx);
|
||||
return arg.IsImmediate() && arg.U32() == imm;
|
||||
};
|
||||
|
||||
void FoldBooleanConvert(IR::Inst& inst) {
|
||||
// Eliminate pattern
|
||||
// %4 = <some bool>
|
||||
// %5 = SelectU32 %4, #1, #0 (uses: 2)
|
||||
// %8 = INotEqual %5, #0 (uses: 1)
|
||||
if (!IsArgImm<1>(inst, 0)) {
|
||||
return;
|
||||
}
|
||||
IR::Inst* prod = inst.Arg(0).TryInstRecursive();
|
||||
if (!prod || prod->GetOpcode() != IR::Opcode::SelectU32) {
|
||||
return;
|
||||
}
|
||||
if (IsArgImm<1>(*prod, 1) && IsArgImm<2>(*prod, 0)) {
|
||||
inst.ReplaceUsesWith(prod->Arg(0));
|
||||
}
|
||||
}
|
||||
|
||||
void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::IAdd32:
|
||||
return FoldAdd<u32>(block, inst);
|
||||
case IR::Opcode::IMul32:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; });
|
||||
return;
|
||||
case IR::Opcode::ShiftRightArithmetic32:
|
||||
FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return static_cast<u32>(a >> b); });
|
||||
return;
|
||||
case IR::Opcode::BitCastF32U32:
|
||||
return FoldBitCast<IR::Opcode::BitCastF32U32, f32, u32>(inst, IR::Opcode::BitCastU32F32);
|
||||
case IR::Opcode::BitCastU32F32:
|
||||
return FoldBitCast<IR::Opcode::BitCastU32F32, u32, f32>(inst, IR::Opcode::BitCastF32U32);
|
||||
case IR::Opcode::PackHalf2x16:
|
||||
return FoldInverseFunc(inst, IR::Opcode::UnpackHalf2x16);
|
||||
case IR::Opcode::UnpackHalf2x16:
|
||||
return FoldInverseFunc(inst, IR::Opcode::PackHalf2x16);
|
||||
case IR::Opcode::PackFloat2x16:
|
||||
return FoldInverseFunc(inst, IR::Opcode::UnpackFloat2x16);
|
||||
case IR::Opcode::UnpackFloat2x16:
|
||||
return FoldInverseFunc(inst, IR::Opcode::PackFloat2x16);
|
||||
case IR::Opcode::SelectU1:
|
||||
case IR::Opcode::SelectU8:
|
||||
case IR::Opcode::SelectU16:
|
||||
case IR::Opcode::SelectU32:
|
||||
case IR::Opcode::SelectU64:
|
||||
case IR::Opcode::SelectF32:
|
||||
case IR::Opcode::SelectF64:
|
||||
return FoldSelect(inst);
|
||||
case IR::Opcode::FPNeg32:
|
||||
FoldWhenAllImmediates(inst, [](f32 a) { return -a; });
|
||||
return;
|
||||
case IR::Opcode::LogicalAnd:
|
||||
return FoldLogicalAnd(inst);
|
||||
case IR::Opcode::LogicalOr:
|
||||
return FoldLogicalOr(inst);
|
||||
case IR::Opcode::LogicalNot:
|
||||
return FoldLogicalNot(inst);
|
||||
case IR::Opcode::SLessThan:
|
||||
FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; });
|
||||
return;
|
||||
case IR::Opcode::ULessThan:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; });
|
||||
return;
|
||||
case IR::Opcode::SLessThanEqual:
|
||||
FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a <= b; });
|
||||
return;
|
||||
case IR::Opcode::ULessThanEqual:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a <= b; });
|
||||
return;
|
||||
case IR::Opcode::SGreaterThan:
|
||||
FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a > b; });
|
||||
return;
|
||||
case IR::Opcode::UGreaterThan:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a > b; });
|
||||
return;
|
||||
case IR::Opcode::SGreaterThanEqual:
|
||||
FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a >= b; });
|
||||
return;
|
||||
case IR::Opcode::UGreaterThanEqual:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a >= b; });
|
||||
return;
|
||||
case IR::Opcode::IEqual:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a == b; });
|
||||
return;
|
||||
case IR::Opcode::INotEqual:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a != b; });
|
||||
FoldBooleanConvert(inst);
|
||||
return;
|
||||
case IR::Opcode::BitwiseAnd32:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a & b; });
|
||||
return;
|
||||
case IR::Opcode::BitwiseOr32:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a | b; });
|
||||
return;
|
||||
case IR::Opcode::BitwiseXor32:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a ^ b; });
|
||||
return;
|
||||
case IR::Opcode::BitFieldUExtract:
|
||||
FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) {
|
||||
if (static_cast<size_t>(shift) + static_cast<size_t>(count) > 32) {
|
||||
throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract,
|
||||
base, shift, count);
|
||||
}
|
||||
return (base >> shift) & ((1U << count) - 1);
|
||||
});
|
||||
return;
|
||||
case IR::Opcode::BitFieldSExtract:
|
||||
FoldWhenAllImmediates(inst, [](s32 base, u32 shift, u32 count) {
|
||||
const size_t back_shift{static_cast<size_t>(shift) + static_cast<size_t>(count)};
|
||||
const size_t left_shift{32 - back_shift};
|
||||
const size_t right_shift{static_cast<size_t>(32 - count)};
|
||||
if (back_shift > 32 || left_shift >= 32 || right_shift >= 32) {
|
||||
throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract,
|
||||
base, shift, count);
|
||||
}
|
||||
return static_cast<u32>((base << left_shift) >> right_shift);
|
||||
});
|
||||
return;
|
||||
case IR::Opcode::BitFieldInsert:
|
||||
FoldWhenAllImmediates(inst, [](u32 base, u32 insert, u32 offset, u32 bits) {
|
||||
if (bits >= 32 || offset >= 32) {
|
||||
throw LogicError("Undefined result in {}({}, {}, {}, {})",
|
||||
IR::Opcode::BitFieldInsert, base, insert, offset, bits);
|
||||
}
|
||||
return (base & ~(~(~0u << bits) << offset)) | (insert << offset);
|
||||
});
|
||||
return;
|
||||
case IR::Opcode::CompositeExtractU32x2:
|
||||
return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x2,
|
||||
IR::Opcode::CompositeInsertU32x2);
|
||||
case IR::Opcode::CompositeExtractU32x3:
|
||||
return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x3,
|
||||
IR::Opcode::CompositeInsertU32x3);
|
||||
case IR::Opcode::CompositeExtractU32x4:
|
||||
return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x4,
|
||||
IR::Opcode::CompositeInsertU32x4);
|
||||
case IR::Opcode::CompositeExtractF32x2:
|
||||
return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x2,
|
||||
IR::Opcode::CompositeInsertF32x2);
|
||||
case IR::Opcode::CompositeExtractF32x3:
|
||||
return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x3,
|
||||
IR::Opcode::CompositeInsertF32x3);
|
||||
case IR::Opcode::CompositeExtractF32x4:
|
||||
return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x4,
|
||||
IR::Opcode::CompositeInsertF32x4);
|
||||
case IR::Opcode::CompositeExtractF16x2:
|
||||
return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x2,
|
||||
IR::Opcode::CompositeInsertF16x2);
|
||||
case IR::Opcode::CompositeExtractF16x3:
|
||||
return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x3,
|
||||
IR::Opcode::CompositeInsertF16x3);
|
||||
case IR::Opcode::CompositeExtractF16x4:
|
||||
return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x4,
|
||||
IR::Opcode::CompositeInsertF16x4);
|
||||
case IR::Opcode::ConvertF32F16:
|
||||
return FoldConvert(inst, IR::Opcode::ConvertF16F32);
|
||||
case IR::Opcode::ConvertF16F32:
|
||||
return FoldConvert(inst, IR::Opcode::ConvertF32F16);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ConstantPropagationPass(IR::BlockList& program) {
|
||||
const auto end{program.rend()};
|
||||
for (auto it = program.rbegin(); it != end; ++it) {
|
||||
IR::Block* const block{*it};
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
ConstantPropagation(*block, inst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
16
src/shader_recompiler/ir/passes/passes.h
Normal file
16
src/shader_recompiler/ir/passes/passes.h
Normal file
@@ -0,0 +1,16 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
void SsaRewritePass(IR::BlockList& program);
|
||||
void IdentityRemovalPass(IR::BlockList& program);
|
||||
void DeadCodeEliminationPass(IR::BlockList& program);
|
||||
void ConstantPropagationPass(IR::BlockList& program);
|
||||
void ResourceTrackingPass(IR::BlockList& program);
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
131
src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
Normal file
131
src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
Normal file
@@ -0,0 +1,131 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <bit>
|
||||
#include <optional>
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
namespace {
|
||||
|
||||
struct SharpLocation {
|
||||
IR::ScalarReg eud_ptr;
|
||||
u32 index_dwords;
|
||||
|
||||
auto operator<=>(const SharpLocation&) const = default;
|
||||
};
|
||||
|
||||
bool IsResourceInstruction(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::ReadConstBuffer:
|
||||
case IR::Opcode::ReadConstBufferF32:
|
||||
case IR::Opcode::ImageSampleExplicitLod:
|
||||
case IR::Opcode::ImageSampleImplicitLod:
|
||||
case IR::Opcode::ImageSampleDrefExplicitLod:
|
||||
case IR::Opcode::ImageSampleDrefImplicitLod:
|
||||
case IR::Opcode::ImageFetch:
|
||||
case IR::Opcode::ImageGather:
|
||||
case IR::Opcode::ImageGatherDref:
|
||||
case IR::Opcode::ImageQueryDimensions:
|
||||
case IR::Opcode::ImageQueryLod:
|
||||
case IR::Opcode::ImageGradient:
|
||||
case IR::Opcode::ImageRead:
|
||||
case IR::Opcode::ImageWrite:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/*class Descriptors {
|
||||
public:
|
||||
explicit Descriptors(TextureDescriptors& texture_descriptors_)
|
||||
: texture_descriptors{texture_descriptors_} {}
|
||||
|
||||
u32 Add(const TextureDescriptor& desc) {
|
||||
const u32 index{Add(texture_descriptors, desc, [&desc](const auto& existing) {
|
||||
return desc.type == existing.type && desc.is_depth == existing.is_depth &&
|
||||
desc.has_secondary == existing.has_secondary &&
|
||||
desc.cbuf_index == existing.cbuf_index &&
|
||||
desc.cbuf_offset == existing.cbuf_offset &&
|
||||
desc.shift_left == existing.shift_left &&
|
||||
desc.secondary_cbuf_index == existing.secondary_cbuf_index &&
|
||||
desc.secondary_cbuf_offset == existing.secondary_cbuf_offset &&
|
||||
desc.secondary_shift_left == existing.secondary_shift_left &&
|
||||
desc.count == existing.count && desc.size_shift == existing.size_shift;
|
||||
})};
|
||||
// TODO: Read this from TIC
|
||||
texture_descriptors[index].is_multisample |= desc.is_multisample;
|
||||
return index;
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename Descriptors, typename Descriptor, typename Func>
|
||||
static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) {
|
||||
// TODO: Handle arrays
|
||||
const auto it{std::ranges::find_if(descriptors, pred)};
|
||||
if (it != descriptors.end()) {
|
||||
return static_cast<u32>(std::distance(descriptors.begin(), it));
|
||||
}
|
||||
descriptors.push_back(desc);
|
||||
return static_cast<u32>(descriptors.size()) - 1;
|
||||
}
|
||||
|
||||
TextureDescriptors& texture_descriptors;
|
||||
};*/
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
SharpLocation TrackSharp(const IR::Value& handle) {
|
||||
IR::Inst* inst = handle.InstRecursive();
|
||||
if (inst->GetOpcode() == IR::Opcode::GetScalarRegister) {
|
||||
return SharpLocation{
|
||||
.eud_ptr = IR::ScalarReg::Max,
|
||||
.index_dwords = inst->Arg(0).U32(),
|
||||
};
|
||||
}
|
||||
ASSERT_MSG(inst->GetOpcode() == IR::Opcode::ReadConst, "Sharp load not from constant memory");
|
||||
|
||||
// Retrieve offset from base.
|
||||
IR::Inst* addr = inst->Arg(0).InstRecursive();
|
||||
u32 dword_offset = addr->Arg(1).U32();
|
||||
addr = addr->Arg(0).InstRecursive();
|
||||
ASSERT_MSG(addr->Arg(1).IsImmediate(), "Bindless not supported");
|
||||
dword_offset += addr->Arg(1).U32() >> 2;
|
||||
|
||||
// Retrieve SGPR that holds sbase
|
||||
inst = addr->Arg(0).InstRecursive()->Arg(0).InstRecursive();
|
||||
ASSERT_MSG(inst->GetOpcode() == IR::Opcode::GetScalarRegister,
|
||||
"Nested resource loads not supported");
|
||||
const IR::ScalarReg base = inst->Arg(0).ScalarReg();
|
||||
|
||||
// Return retrieved location.
|
||||
return SharpLocation{
|
||||
.eud_ptr = base,
|
||||
.index_dwords = dword_offset,
|
||||
};
|
||||
}
|
||||
|
||||
void ResourceTrackingPass(IR::BlockList& program) {
|
||||
for (IR::Block* const block : program) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (!IsResourceInstruction(inst)) {
|
||||
continue;
|
||||
}
|
||||
printf("ff\n");
|
||||
IR::Inst* producer = inst.Arg(0).InstRecursive();
|
||||
const auto loc = TrackSharp(producer->Arg(0));
|
||||
fmt::print("Found resource s[{}:{}] is_eud = {}\n", loc.index_dwords,
|
||||
loc.index_dwords + 4, loc.eud_ptr != IR::ScalarReg::Max);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
408
src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp
Normal file
408
src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp
Normal file
@@ -0,0 +1,408 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
// This file implements the SSA rewriting algorithm proposed in
|
||||
//
|
||||
// Simple and Efficient Construction of Static Single Assignment Form.
|
||||
// Braun M., Buchwald S., Hack S., Leiba R., Mallon C., Zwinkau A. (2013)
|
||||
// In: Jhala R., De Bosschere K. (eds)
|
||||
// Compiler Construction. CC 2013.
|
||||
// Lecture Notes in Computer Science, vol 7791.
|
||||
// Springer, Berlin, Heidelberg
|
||||
//
|
||||
// https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6
|
||||
//
|
||||
|
||||
#include <map>
|
||||
#include <span>
|
||||
#include <unordered_map>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/ir/opcodes.h"
|
||||
#include "shader_recompiler/ir/reg.h"
|
||||
#include "shader_recompiler/ir/value.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
namespace {
|
||||
struct FlagTag {
|
||||
auto operator<=>(const FlagTag&) const noexcept = default;
|
||||
};
|
||||
struct ZeroFlagTag : FlagTag {};
|
||||
struct SignFlagTag : FlagTag {};
|
||||
struct CarryFlagTag : FlagTag {};
|
||||
struct OverflowFlagTag : FlagTag {};
|
||||
struct VccFlagTag : FlagTag {};
|
||||
|
||||
struct GotoVariable : FlagTag {
|
||||
GotoVariable() = default;
|
||||
explicit GotoVariable(u32 index_) : index{index_} {}
|
||||
|
||||
auto operator<=>(const GotoVariable&) const noexcept = default;
|
||||
|
||||
u32 index;
|
||||
};
|
||||
|
||||
using Variant = std::variant<IR::ScalarReg, IR::VectorReg, ZeroFlagTag, SignFlagTag, CarryFlagTag,
|
||||
OverflowFlagTag, GotoVariable, VccFlagTag>;
|
||||
using ValueMap = std::unordered_map<IR::Block*, IR::Value>;
|
||||
|
||||
struct DefTable {
|
||||
const IR::Value& Def(IR::Block* block, IR::ScalarReg variable) {
|
||||
return block->ssa_sreg_values[RegIndex(variable)];
|
||||
}
|
||||
void SetDef(IR::Block* block, IR::ScalarReg variable, const IR::Value& value) {
|
||||
block->ssa_sreg_values[RegIndex(variable)] = value;
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, IR::VectorReg variable) {
|
||||
return block->ssa_vreg_values[RegIndex(variable)];
|
||||
}
|
||||
void SetDef(IR::Block* block, IR::VectorReg variable, const IR::Value& value) {
|
||||
block->ssa_vreg_values[RegIndex(variable)] = value;
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, GotoVariable variable) {
|
||||
return goto_vars[variable.index][block];
|
||||
}
|
||||
void SetDef(IR::Block* block, GotoVariable variable, const IR::Value& value) {
|
||||
goto_vars[variable.index].insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, ZeroFlagTag) {
|
||||
return zero_flag[block];
|
||||
}
|
||||
void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) {
|
||||
zero_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, SignFlagTag) {
|
||||
return sign_flag[block];
|
||||
}
|
||||
void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) {
|
||||
sign_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, CarryFlagTag) {
|
||||
return carry_flag[block];
|
||||
}
|
||||
void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) {
|
||||
carry_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, OverflowFlagTag) {
|
||||
return overflow_flag[block];
|
||||
}
|
||||
void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) {
|
||||
overflow_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, VccFlagTag) {
|
||||
return vcc_flag[block];
|
||||
}
|
||||
void SetDef(IR::Block* block, VccFlagTag, const IR::Value& value) {
|
||||
vcc_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
std::unordered_map<u32, ValueMap> goto_vars;
|
||||
ValueMap indirect_branch_var;
|
||||
ValueMap zero_flag;
|
||||
ValueMap sign_flag;
|
||||
ValueMap carry_flag;
|
||||
ValueMap overflow_flag;
|
||||
ValueMap vcc_flag;
|
||||
};
|
||||
|
||||
IR::Opcode UndefOpcode(IR::ScalarReg) noexcept {
|
||||
return IR::Opcode::UndefU32;
|
||||
}
|
||||
|
||||
IR::Opcode UndefOpcode(IR::VectorReg) noexcept {
|
||||
return IR::Opcode::UndefU32;
|
||||
}
|
||||
|
||||
IR::Opcode UndefOpcode(const FlagTag&) noexcept {
|
||||
return IR::Opcode::UndefU1;
|
||||
}
|
||||
|
||||
enum class Status {
|
||||
Start,
|
||||
SetValue,
|
||||
PreparePhiArgument,
|
||||
PushPhiArgument,
|
||||
};
|
||||
|
||||
template <typename Type>
|
||||
struct ReadState {
|
||||
ReadState(IR::Block* block_) : block{block_} {}
|
||||
ReadState() = default;
|
||||
|
||||
IR::Block* block{};
|
||||
IR::Value result{};
|
||||
IR::Inst* phi{};
|
||||
IR::Block* const* pred_it{};
|
||||
IR::Block* const* pred_end{};
|
||||
Status pc{Status::Start};
|
||||
};
|
||||
|
||||
class Pass {
|
||||
public:
|
||||
template <typename Type>
|
||||
void WriteVariable(Type variable, IR::Block* block, const IR::Value& value) {
|
||||
current_def.SetDef(block, variable, value);
|
||||
}
|
||||
|
||||
template <typename Type>
|
||||
IR::Value ReadVariable(Type variable, IR::Block* root_block) {
|
||||
boost::container::small_vector<ReadState<Type>, 64> stack{
|
||||
ReadState<Type>(nullptr),
|
||||
ReadState<Type>(root_block),
|
||||
};
|
||||
const auto prepare_phi_operand = [&] {
|
||||
if (stack.back().pred_it == stack.back().pred_end) {
|
||||
IR::Inst* const phi{stack.back().phi};
|
||||
IR::Block* const block{stack.back().block};
|
||||
const IR::Value result{TryRemoveTrivialPhi(*phi, block, UndefOpcode(variable))};
|
||||
stack.pop_back();
|
||||
stack.back().result = result;
|
||||
WriteVariable(variable, block, result);
|
||||
} else {
|
||||
IR::Block* const imm_pred{*stack.back().pred_it};
|
||||
stack.back().pc = Status::PushPhiArgument;
|
||||
stack.emplace_back(imm_pred);
|
||||
}
|
||||
};
|
||||
do {
|
||||
IR::Block* const block{stack.back().block};
|
||||
switch (stack.back().pc) {
|
||||
case Status::Start: {
|
||||
if (const IR::Value& def = current_def.Def(block, variable); !def.IsEmpty()) {
|
||||
stack.back().result = def;
|
||||
} else if (!block->IsSsaSealed()) {
|
||||
// Incomplete CFG
|
||||
IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
|
||||
phi->SetFlags(IR::TypeOf(UndefOpcode(variable)));
|
||||
|
||||
incomplete_phis[block].insert_or_assign(variable, phi);
|
||||
stack.back().result = IR::Value{&*phi};
|
||||
} else if (const std::span imm_preds = block->ImmPredecessors();
|
||||
imm_preds.size() == 1) {
|
||||
// Optimize the common case of one predecessor: no phi needed
|
||||
stack.back().pc = Status::SetValue;
|
||||
stack.emplace_back(imm_preds.front());
|
||||
break;
|
||||
} else {
|
||||
// Break potential cycles with operandless phi
|
||||
IR::Inst* const phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
|
||||
phi->SetFlags(IR::TypeOf(UndefOpcode(variable)));
|
||||
|
||||
WriteVariable(variable, block, IR::Value{phi});
|
||||
|
||||
stack.back().phi = phi;
|
||||
stack.back().pred_it = imm_preds.data();
|
||||
stack.back().pred_end = imm_preds.data() + imm_preds.size();
|
||||
prepare_phi_operand();
|
||||
break;
|
||||
}
|
||||
}
|
||||
[[fallthrough]];
|
||||
case Status::SetValue: {
|
||||
const IR::Value result{stack.back().result};
|
||||
WriteVariable(variable, block, result);
|
||||
stack.pop_back();
|
||||
stack.back().result = result;
|
||||
break;
|
||||
}
|
||||
case Status::PushPhiArgument: {
|
||||
IR::Inst* const phi{stack.back().phi};
|
||||
phi->AddPhiOperand(*stack.back().pred_it, stack.back().result);
|
||||
++stack.back().pred_it;
|
||||
}
|
||||
[[fallthrough]];
|
||||
case Status::PreparePhiArgument:
|
||||
prepare_phi_operand();
|
||||
break;
|
||||
}
|
||||
} while (stack.size() > 1);
|
||||
return stack.back().result;
|
||||
}
|
||||
|
||||
void SealBlock(IR::Block* block) {
|
||||
const auto it{incomplete_phis.find(block)};
|
||||
if (it != incomplete_phis.end()) {
|
||||
for (auto& pair : it->second) {
|
||||
auto& variant{pair.first};
|
||||
auto& phi{pair.second};
|
||||
std::visit([&](auto& variable) { AddPhiOperands(variable, *phi, block); }, variant);
|
||||
}
|
||||
}
|
||||
block->SsaSeal();
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename Type>
|
||||
IR::Value AddPhiOperands(Type variable, IR::Inst& phi, IR::Block* block) {
|
||||
for (IR::Block* const imm_pred : block->ImmPredecessors()) {
|
||||
phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred));
|
||||
}
|
||||
return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable));
|
||||
}
|
||||
|
||||
IR::Value TryRemoveTrivialPhi(IR::Inst& phi, IR::Block* block, IR::Opcode undef_opcode) {
|
||||
IR::Value same;
|
||||
const size_t num_args{phi.NumArgs()};
|
||||
for (size_t arg_index = 0; arg_index < num_args; ++arg_index) {
|
||||
const IR::Value& op{phi.Arg(arg_index)};
|
||||
if (op.Resolve() == same.Resolve() || op == IR::Value{&phi}) {
|
||||
// Unique value or self-reference
|
||||
continue;
|
||||
}
|
||||
if (!same.IsEmpty()) {
|
||||
// The phi merges at least two values: not trivial
|
||||
return IR::Value{&phi};
|
||||
}
|
||||
same = op;
|
||||
}
|
||||
// Remove the phi node from the block, it will be reinserted
|
||||
IR::Block::InstructionList& list{block->Instructions()};
|
||||
list.erase(IR::Block::InstructionList::s_iterator_to(phi));
|
||||
|
||||
// Find the first non-phi instruction and use it as an insertion point
|
||||
IR::Block::iterator reinsert_point{std::ranges::find_if_not(list, IR::IsPhi)};
|
||||
if (same.IsEmpty()) {
|
||||
// The phi is unreachable or in the start block
|
||||
// Insert an undefined instruction and make it the phi node replacement
|
||||
// The "phi" node reinsertion point is specified after this instruction
|
||||
reinsert_point = block->PrependNewInst(reinsert_point, undef_opcode);
|
||||
same = IR::Value{&*reinsert_point};
|
||||
++reinsert_point;
|
||||
}
|
||||
// Reinsert the phi node and reroute all its uses to the "same" value
|
||||
list.insert(reinsert_point, phi);
|
||||
phi.ReplaceUsesWith(same);
|
||||
// TODO: Try to recursively remove all phi users, which might have become trivial
|
||||
return same;
|
||||
}
|
||||
|
||||
std::unordered_map<IR::Block*, std::map<Variant, IR::Inst*>> incomplete_phis;
|
||||
DefTable current_def;
|
||||
};
|
||||
|
||||
void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
|
||||
const IR::Opcode opcode{inst.GetOpcode()};
|
||||
switch (opcode) {
|
||||
case IR::Opcode::SetScalarRegister: {
|
||||
const IR::ScalarReg reg{inst.Arg(0).ScalarReg()};
|
||||
pass.WriteVariable(reg, block, inst.Arg(1));
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::SetVectorRegister: {
|
||||
const IR::VectorReg reg{inst.Arg(0).VectorReg()};
|
||||
pass.WriteVariable(reg, block, inst.Arg(1));
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::SetGotoVariable:
|
||||
pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1));
|
||||
break;
|
||||
case IR::Opcode::SetVcc:
|
||||
pass.WriteVariable(VccFlagTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
// case IR::Opcode::SetSFlag:
|
||||
// pass.WriteVariable(SignFlagTag{}, block, inst.Arg(0));
|
||||
// break;
|
||||
// case IR::Opcode::SetCFlag:
|
||||
// pass.WriteVariable(CarryFlagTag{}, block, inst.Arg(0));
|
||||
// break;
|
||||
// case IR::Opcode::SetOFlag:
|
||||
// pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0));
|
||||
// break;
|
||||
case IR::Opcode::GetScalarRegister: {
|
||||
const IR::ScalarReg reg{inst.Arg(0).ScalarReg()};
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(reg, block));
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::GetVectorRegister: {
|
||||
const IR::VectorReg reg{inst.Arg(0).VectorReg()};
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(reg, block));
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::GetGotoVariable:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block));
|
||||
break;
|
||||
case IR::Opcode::GetVcc:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(VccFlagTag{}, block));
|
||||
break;
|
||||
// case IR::Opcode::GetSFlag:
|
||||
// inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block));
|
||||
// break;
|
||||
// case IR::Opcode::GetCFlag:
|
||||
// inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block));
|
||||
// break;
|
||||
// case IR::Opcode::GetOFlag:
|
||||
// inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block));
|
||||
// break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void VisitBlock(Pass& pass, IR::Block* block) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
VisitInst(pass, block, inst);
|
||||
}
|
||||
pass.SealBlock(block);
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
void SsaRewritePass(IR::BlockList& program) {
|
||||
Pass pass;
|
||||
const auto end{program.rend()};
|
||||
for (auto block = program.rbegin(); block != end; ++block) {
|
||||
VisitBlock(pass, *block);
|
||||
}
|
||||
}
|
||||
|
||||
void IdentityRemovalPass(IR::BlockList& program) {
|
||||
std::vector<IR::Inst*> to_invalidate;
|
||||
for (IR::Block* const block : program) {
|
||||
for (auto inst = block->begin(); inst != block->end();) {
|
||||
const size_t num_args{inst->NumArgs()};
|
||||
for (size_t i = 0; i < num_args; ++i) {
|
||||
IR::Value arg;
|
||||
while ((arg = inst->Arg(i)).IsIdentity()) {
|
||||
inst->SetArg(i, arg.Inst()->Arg(0));
|
||||
}
|
||||
}
|
||||
if (inst->GetOpcode() == IR::Opcode::Identity ||
|
||||
inst->GetOpcode() == IR::Opcode::Void) {
|
||||
to_invalidate.push_back(&*inst);
|
||||
inst = block->Instructions().erase(inst);
|
||||
} else {
|
||||
++inst;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (IR::Inst* const inst : to_invalidate) {
|
||||
inst->Invalidate();
|
||||
}
|
||||
}
|
||||
|
||||
void DeadCodeEliminationPass(IR::BlockList& program) {
|
||||
// We iterate over the instructions in reverse order.
|
||||
// This is because removing an instruction reduces the number of uses for earlier instructions.
|
||||
for (IR::Block* const block : program) {
|
||||
auto it{block->end()};
|
||||
while (it != block->begin()) {
|
||||
--it;
|
||||
if (!it->HasUses() && !it->MayHaveSideEffects()) {
|
||||
it->Invalidate();
|
||||
it = block->Instructions().erase(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
Reference in New Issue
Block a user