From 52650d2c555c6b560a854f50dde54e1410d7a536 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Tue, 8 Apr 2025 02:34:45 +0200 Subject: [PATCH] Finish IR --- .../backend/spirv/emit_spirv_instructions.h | 1 + .../backend/spirv/emit_spirv_special.cpp | 4 + src/shader_recompiler/ir/basic_block.cpp | 6 + src/shader_recompiler/ir/basic_block.h | 3 + .../ir/compute_value/do_nop_functions.h | 1 + src/shader_recompiler/ir/ir_emitter.cpp | 8 + src/shader_recompiler/ir/ir_emitter.h | 4 + src/shader_recompiler/ir/microinstruction.cpp | 1 + src/shader_recompiler/ir/opcodes.inc | 1 + .../passes/flatten_extended_userdata_pass.cpp | 201 +++++++++--------- src/shader_recompiler/ir/passes/ir_passes.h | 3 +- src/shader_recompiler/ir/program.cpp | 7 +- src/shader_recompiler/ir/program.h | 2 +- src/shader_recompiler/ir/srt_gvn_table.h | 14 -- src/shader_recompiler/ir/subprogram.cpp | 73 +++++-- src/shader_recompiler/ir/subprogram.h | 4 +- src/shader_recompiler/recompiler.cpp | 2 +- 17 files changed, 189 insertions(+), 146 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index fb37799f5..a8901d8f6 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -47,6 +47,7 @@ void EmitPrologue(EmitContext& ctx); void EmitEpilogue(EmitContext& ctx); void EmitDiscard(EmitContext& ctx); void EmitDiscardCond(EmitContext& ctx, Id condition); +void EmitStoreFlatbuf(EmitContext& ctx, const IR::Value& data, const IR::Value& offset); void EmitDebugPrint(EmitContext& ctx, IR::Inst* inst, Id arg0, Id arg1, Id arg2, Id arg3, Id arg4); void EmitBarrier(EmitContext& ctx); void EmitWorkgroupMemoryBarrier(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index fe7bd3356..f48c76395 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -102,6 +102,10 @@ void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { throw NotImplementedException("Geometry streams"); } +void EmitStoreFlatbuf(EmitContext& ctx, const IR::Value& data, const IR::Value& offset) { + UNREACHABLE_MSG("StoreFlatbuf not intended for SPIR-V"); +} + void EmitDebugPrint(EmitContext& ctx, IR::Inst* inst, Id fmt, Id arg0, Id arg1, Id arg2, Id arg3) { IR::DebugPrintFlags flags = inst->Flags(); std::array fmt_args = {arg0, arg1, arg2, arg3}; diff --git a/src/shader_recompiler/ir/basic_block.cpp b/src/shader_recompiler/ir/basic_block.cpp index a312eabde..6e9062254 100644 --- a/src/shader_recompiler/ir/basic_block.cpp +++ b/src/shader_recompiler/ir/basic_block.cpp @@ -23,6 +23,12 @@ Block::iterator Block::PrependNewInst(iterator insertion_point, const Inst& base return instructions.insert(insertion_point, *inst); } +Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, u32 flags) { + Inst* const inst{inst_pool->Create(op, flags)}; + inst->SetParent(this); + return instructions.insert(insertion_point, *inst); +} + Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list args, u32 flags) { Inst* const inst{inst_pool->Create(op, flags)}; diff --git a/src/shader_recompiler/ir/basic_block.h b/src/shader_recompiler/ir/basic_block.h index 865243835..3c74b1133 100644 --- a/src/shader_recompiler/ir/basic_block.h +++ b/src/shader_recompiler/ir/basic_block.h @@ -47,6 +47,9 @@ public: /// Prepends a copy of an instruction to this basic block before the insertion point. iterator PrependNewInst(iterator insertion_point, const Inst& base_inst); + /// Prepends a new instruction to this basic block before the insertion point (without args). + iterator PrependNewInst(iterator insertion_point, Opcode op, u32 flags); + /// Prepends a new instruction to this basic block before the insertion point. iterator PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list args = {}, u32 flags = 0); diff --git a/src/shader_recompiler/ir/compute_value/do_nop_functions.h b/src/shader_recompiler/ir/compute_value/do_nop_functions.h index 716478e00..8b88742a1 100644 --- a/src/shader_recompiler/ir/compute_value/do_nop_functions.h +++ b/src/shader_recompiler/ir/compute_value/do_nop_functions.h @@ -17,6 +17,7 @@ NOP_FUNCTION(Prologue) NOP_FUNCTION(Epilogue) NOP_FUNCTION(Discard) NOP_FUNCTION(DiscardCond) +NOP_FUNCTION(StoreFlatbuf) NOP_FUNCTION(DebugPrint) NOP_FUNCTION(ReadConst) diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index a171d32a2..77e12c30c 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -102,6 +102,10 @@ void IREmitter::Reference(const Value& value) { Inst(Opcode::Reference, value); } +Value IREmitter::Phi(IR::Type type) { + return Inst(Opcode::Phi, Flags(type)); +} + void IREmitter::PhiMove(IR::Inst& phi, const Value& value) { Inst(Opcode::PhiMove, Value{&phi}, value); } @@ -1970,6 +1974,10 @@ void IREmitter::ImageWrite(const Value& handle, const Value& coords, const U32& return Inst(Opcode::CubeFaceIndex, cube_coords); } +void IREmitter::StoreFlatbuf(const U32& data, const U32& offset) { + Inst(Opcode::StoreFlatbuf, data, offset); +} + // Debug print maps to SPIRV's NonSemantic DebugPrintf instruction // Renderdoc will hook in its own implementation of the SPIRV instruction // Renderdoc accepts format specifiers, e.g. %u, listed here: diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 48cc02725..b982f1f91 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -18,6 +18,8 @@ namespace Shader::IR { class IREmitter { public: explicit IREmitter(Block& block_) : block{&block_}, insertion_point{block->end()} {} + explicit IREmitter(Inst& inst) + : block{inst.GetParent()}, insertion_point{Block::InstructionList::s_iterator_to(inst)} {} explicit IREmitter(Block& block_, Block::iterator insertion_point_) : block{&block_}, insertion_point{insertion_point_} {} @@ -39,12 +41,14 @@ public: U1 ConditionRef(const U1& value); void Reference(const Value& value); + [[nodiscard]] Value Phi(IR::Type type); void PhiMove(IR::Inst& phi, const Value& value); void Prologue(); void Epilogue(); void Discard(); void Discard(const U1& cond); + void StoreFlatbuf(const U32& data, const U32& offset); void DebugPrint(const char* fmt, boost::container::small_vector args); void Barrier(); diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index 580156f5b..45b0f3de0 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -100,6 +100,7 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::ImageAtomicOr32: case Opcode::ImageAtomicXor32: case Opcode::ImageAtomicExchange32: + case Opcode::StoreFlatbuf: case Opcode::DebugPrint: case Opcode::EmitVertex: case Opcode::EmitPrimitive: diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 93d759b74..f30c1ee67 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -14,6 +14,7 @@ OPCODE(Prologue, Void, OPCODE(Epilogue, Void, ) OPCODE(Discard, Void, ) OPCODE(DiscardCond, Void, U1, ) +OPCODE(StoreFlatbuf, Void, U32, U32 ) OPCODE(DebugPrint, Void, StringLiteral, Opaque, Opaque, Opaque, Opaque, ) // Constant memory operations diff --git a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp index bbf3fe8fb..7aa8283eb 100644 --- a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp +++ b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp @@ -12,11 +12,15 @@ #include "common/path_util.h" #include "shader_recompiler/info.h" #include "shader_recompiler/ir/breadth_first_search.h" +#include "shader_recompiler/ir/ir_emitter.h" +#include "shader_recompiler/ir/num_executions.h" #include "shader_recompiler/ir/opcodes.h" +#include "shader_recompiler/ir/passes/ir_passes.h" #include "shader_recompiler/ir/passes/srt.h" #include "shader_recompiler/ir/program.h" #include "shader_recompiler/ir/reg.h" #include "shader_recompiler/ir/srt_gvn_table.h" +#include "shader_recompiler/ir/subprogram.h" #include "shader_recompiler/ir/value.h" #include "src/common/arch.h" #include "src/common/decoder.h" @@ -57,28 +61,23 @@ static void DumpSrtProgram(const Shader::Info& info, const u8* code, size_t code using namespace Shader; struct PassInfo { - // map offset to inst - using PtrUserList = boost::container::flat_map; + struct ReadConstData { + u32 offset_dw; + u32 count_dw; + IR::Inst* unique_inst; + IR::Inst* original_inst; + }; Optimization::SrtGvnTable gvn_table; - // keys are GetUserData or ReadConst instructions that are used as pointers - std::unordered_map pointer_uses; - // GetUserData instructions corresponding to sgpr_base of SRT roots - boost::container::small_flat_map srt_roots; - // pick a single inst for a given value number std::unordered_map vn_to_inst; + // map of all readconsts to their subprogram insts + boost::container::small_flat_map all_readconsts; + // subprogram insts mapped to their readconst data + boost::container::small_flat_map readconst_data; - // Bumped during codegen to assign offsets to readconsts - u32 dst_off_dw; - - PtrUserList* GetUsesAsPointer(IR::Inst* inst) { - auto it = pointer_uses.find(inst); - if (it != pointer_uses.end()) { - return &it->second; - } - return nullptr; - } + // Incremented during SRT program generation + u32 dst_off_dw = 0; // Return a single instruction that this instruction is identical to, according // to value number @@ -105,39 +104,79 @@ static inline void PopPtr(Xbyak::CodeGenerator& c) { c.pop(rdi); }; -static void VisitPointer(u32 off_dw, IR::Inst* subtree, PassInfo& pass_info, - Xbyak::CodeGenerator& c) { - PushPtr(c, off_dw); - PassInfo::PtrUserList* use_list = pass_info.GetUsesAsPointer(subtree); - ASSERT(use_list); - - // First copy all the src data from this tree level - // That way, all data that was contiguous in the guest SRT is also contiguous in the - // flattened buffer. - // TODO src and dst are contiguous. Optimize with wider loads/stores - // TODO if this subtree is dynamically indexed, don't compact it (keep it sparse) - for (auto [src_off_dw, use] : *use_list) { - c.mov(r10d, ptr[rdi + (src_off_dw << 2)]); - c.mov(ptr[rsi + (pass_info.dst_off_dw << 2)], r10d); - - use->SetFlags(pass_info.dst_off_dw); - pass_info.dst_off_dw++; +static IR::U32 WrapInstWithCounter(IR::Inst* inst, u32 inital_value, IR::Block* first_block) { + const IR::Block::ConditionalData* loop_data = &inst->GetParent()->CondData(); + while (loop_data != nullptr && + loop_data->asl_node->type != IR::AbstractSyntaxNode::Type::Loop) { + loop_data = loop_data->parent; } - - // Then visit any children used as pointers - for (const auto [src_off_dw, use] : *use_list) { - if (pass_info.GetUsesAsPointer(use)) { - VisitPointer(src_off_dw, use, pass_info, c); - } - } - - PopPtr(c); + ASSERT(loop_data != nullptr); + IR::Block* loop_body = loop_data->asl_node->data.loop.body; + // We are putting the Phi node in the loop header so that the counter is + // incremented each time the loop is executed. We point the Phi node to the + // first block so that the counter is not reset each time the loop is + // executed (nested loops) + IR::IREmitter ir_inst(*inst->GetParent(), ++IR::Block::InstructionList::s_iterator_to(*inst)); + IR::IREmitter ir_loop_header(*loop_body->ImmPredecessors().front()); + IR::Inst* phi = ir_loop_header.Phi(IR::Type::U32).Inst(); + IR::U32 inc = ir_inst.IAdd(IR::U32(phi), ir_inst.Imm32(1)); + phi->AddPhiOperand(first_block, ir_loop_header.Imm32(inital_value)); + phi->AddPhiOperand(inst->GetParent(), inc); + return IR::U32(phi); } -static void GenerateSrtProgram(Info& info, PassInfo& pass_info) { - Xbyak::CodeGenerator& c = g_srt_codegen; +static void GenerateSrtReadConsts(IR::Program& program, PassInfo& pass_info, Pools& pools) { + IR::SubProgram sub_gen(&program, pools); + for (auto& [inst, sub_inst] : pass_info.all_readconsts) { + sub_inst = sub_gen.AddInst(inst); + pass_info.readconst_data[sub_inst] = {0, 0, pass_info.DeduplicateInstruction(sub_inst), + inst}; + } + IR::Program sub_program = sub_gen.GetSubProgram(); + IR::Block* original_first_block = program.blocks.front(); + IR::Block* sub_first_block = sub_program.blocks.front(); + for (auto& [inst, data] : pass_info.readconst_data) { + if (inst != data.unique_inst) { + PassInfo::ReadConstData& unique_data = pass_info.readconst_data[data.unique_inst]; + data.offset_dw = unique_data.offset_dw; + // In this context, count_dw is always the same as unique_data.count_dw + // There are no duplicate instructions in different loops + data.count_dw = unique_data.count_dw; + } else { + u32 count = static_cast(IR::GetNumExecutions(inst)); + ASSERT_MSG(count > 0, "Dynamic loop range not supported yet"); + data.count_dw = count; + data.offset_dw = pass_info.dst_off_dw; + pass_info.dst_off_dw += count; + IR::U32 save_offset; + if (data.count_dw > 1) { + save_offset = WrapInstWithCounter(inst, data.offset_dw, sub_first_block); + } else { + IR::IREmitter ir(*inst); + save_offset = ir.Imm32(data.offset_dw); + } + IR::IREmitter ir(*inst->GetParent(), + ++IR::Block::InstructionList::s_iterator_to(*inst)); + ir.StoreFlatbuf(IR::U32(inst), save_offset); + } + if (data.count_dw > 1) { + IR::U32 counter = + WrapInstWithCounter(data.original_inst, data.offset_dw, original_first_block); + data.original_inst->SetArg(1, counter); + } else { + IR::IREmitter ir(*data.original_inst); + data.original_inst->SetArg(1, ir.Imm32(data.offset_dw)); + } + } + DeadCodeEliminationPass(sub_program); + IR::DumpProgram(sub_program, sub_program.info, "srt"); +} - if (info.srt_info.srt_reservations.empty() && pass_info.srt_roots.empty()) { +static void GenerateSrtProgram(IR::Program& program, PassInfo& pass_info, Pools& pools) { + Xbyak::CodeGenerator& c = g_srt_codegen; + Shader::Info& info = program.info; + + if (info.srt_info.srt_reservations.empty() && pass_info.all_readconsts.empty()) { return; } @@ -167,10 +206,12 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) { ASSERT(pass_info.dst_off_dw == info.srt_info.flattened_bufsize_dw); - for (const auto& [sgpr_base, root] : pass_info.srt_roots) { - VisitPointer(static_cast(sgpr_base), root, pass_info, c); + if (!pass_info.all_readconsts.empty()) { + GenerateSrtReadConsts(program, pass_info, pools); } + info.srt_info.flattened_bufsize_dw = pass_info.dst_off_dw; + c.ret(); c.ready(); @@ -178,75 +219,25 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) { size_t codesize = c.getCurr() - reinterpret_cast(info.srt_info.walker_func); DumpSrtProgram(info, reinterpret_cast(info.srt_info.walker_func), codesize); } - - info.srt_info.flattened_bufsize_dw = pass_info.dst_off_dw; } }; // namespace -void FlattenExtendedUserdataPass(IR::Program& program) { +void FlattenExtendedUserdataPass(IR::Program& program, Pools& pools) { Shader::Info& info = program.info; PassInfo pass_info; - // traverse at end and assign offsets to duplicate readconsts, using - // vn_to_inst as the source - boost::container::small_vector all_readconsts; - - for (auto r_it = program.post_order_blocks.rbegin(); r_it != program.post_order_blocks.rend(); - r_it++) { - IR::Block* block = *r_it; - for (IR::Inst& inst : *block) { + for (auto it = program.post_order_blocks.rbegin(); it != program.post_order_blocks.rend(); + ++it) { + IR::Block* block = *it; + for (auto& inst : block->Instructions()) { if (inst.GetOpcode() == IR::Opcode::ReadConst) { - if (!inst.Arg(1).IsImmediate()) { - LOG_WARNING(Render_Recompiler, "ReadConst has non-immediate offset"); - continue; - } - - all_readconsts.push_back(&inst); - if (pass_info.DeduplicateInstruction(&inst) != &inst) { - // This is a duplicate of a readconst we've already visited - continue; - } - - IR::Inst* ptr_composite = inst.Arg(0).InstRecursive(); - - const auto pred = [](IR::Inst* inst) -> std::optional { - if (inst->GetOpcode() == IR::Opcode::GetUserData || - inst->GetOpcode() == IR::Opcode::ReadConst) { - return inst; - } - return std::nullopt; - }; - auto base0 = IR::BreadthFirstSearch(ptr_composite->Arg(0), pred); - auto base1 = IR::BreadthFirstSearch(ptr_composite->Arg(1), pred); - ASSERT_MSG(base0 && base1, "ReadConst not from constant memory"); - - IR::Inst* ptr_lo = base0.value(); - ptr_lo = pass_info.DeduplicateInstruction(ptr_lo); - - auto ptr_uses_kv = - pass_info.pointer_uses.try_emplace(ptr_lo, PassInfo::PtrUserList{}); - PassInfo::PtrUserList& user_list = ptr_uses_kv.first->second; - - user_list[inst.Arg(1).U32()] = &inst; - - if (ptr_lo->GetOpcode() == IR::Opcode::GetUserData) { - IR::ScalarReg ud_reg = ptr_lo->Arg(0).ScalarReg(); - pass_info.srt_roots[ud_reg] = ptr_lo; - } + pass_info.all_readconsts[&inst] = nullptr; } } } - GenerateSrtProgram(info, pass_info); - - // Assign offsets to duplicate readconsts - for (IR::Inst* readconst : all_readconsts) { - ASSERT(pass_info.vn_to_inst.contains(pass_info.gvn_table.GetValueNumber(readconst))); - IR::Inst* original = pass_info.DeduplicateInstruction(readconst); - readconst->SetFlags(original->Flags()); - } - + GenerateSrtProgram(program, pass_info, pools); info.RefreshFlatBuf(); } diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h index 760dbb112..4b20f8c52 100644 --- a/src/shader_recompiler/ir/passes/ir_passes.h +++ b/src/shader_recompiler/ir/passes/ir_passes.h @@ -5,6 +5,7 @@ #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/program.h" +#include "shader_recompiler/pools.h" namespace Shader { struct Profile; @@ -16,7 +17,7 @@ void SsaRewritePass(IR::BlockList& program); void IdentityRemovalPass(IR::BlockList& program); void DeadCodeEliminationPass(IR::Program& program); void ConstantPropagationPass(IR::BlockList& program); -void FlattenExtendedUserdataPass(IR::Program& program); +void FlattenExtendedUserdataPass(IR::Program& program, Pools& pools); void ReadLaneEliminationPass(IR::Program& program); void ResourceTrackingPass(IR::Program& program); void CollectShaderInfoPass(IR::Program& program); diff --git a/src/shader_recompiler/ir/program.cpp b/src/shader_recompiler/ir/program.cpp index 4071c9ac9..f2f6e34fa 100644 --- a/src/shader_recompiler/ir/program.cpp +++ b/src/shader_recompiler/ir/program.cpp @@ -15,7 +15,7 @@ namespace Shader::IR { -void DumpProgram(const Program& program, const Info& info) { +void DumpProgram(const Program& program, const Info& info, const std::string& type) { using namespace Common::FS; if (!Config::dumpShaders()) { @@ -26,7 +26,8 @@ void DumpProgram(const Program& program, const Info& info) { if (!std::filesystem::exists(dump_dir)) { std::filesystem::create_directories(dump_dir); } - const auto ir_filename = fmt::format("{}_{:#018x}.irprogram.txt", info.stage, info.pgm_hash); + const auto ir_filename = + fmt::format("{}_{:#018x}.{}irprogram.txt", info.stage, info.pgm_hash, type); const auto ir_file = IOFile{dump_dir / ir_filename, FileAccessMode::Write, FileType::TextFile}; size_t index{0}; @@ -43,7 +44,7 @@ void DumpProgram(const Program& program, const Info& info) { ir_file.WriteString(s); } - const auto asl_filename = fmt::format("{}_{:#018x}.asl.txt", info.stage, info.pgm_hash); + const auto asl_filename = fmt::format("{}_{:#018x}.{}asl.txt", info.stage, info.pgm_hash, type); const auto asl_file = IOFile{dump_dir / asl_filename, FileAccessMode::Write, FileType::TextFile}; diff --git a/src/shader_recompiler/ir/program.h b/src/shader_recompiler/ir/program.h index 9ede71215..3ffd4dc96 100644 --- a/src/shader_recompiler/ir/program.h +++ b/src/shader_recompiler/ir/program.h @@ -21,6 +21,6 @@ struct Program { Info& info; }; -void DumpProgram(const Program& program, const Info& info); +void DumpProgram(const Program& program, const Info& info, const std::string& type = ""); } // namespace Shader::IR diff --git a/src/shader_recompiler/ir/srt_gvn_table.h b/src/shader_recompiler/ir/srt_gvn_table.h index 3baa1c7da..295a86814 100644 --- a/src/shader_recompiler/ir/srt_gvn_table.h +++ b/src/shader_recompiler/ir/srt_gvn_table.h @@ -51,20 +51,6 @@ private: u32 vn; switch (inst->GetOpcode()) { - case IR::Opcode::Phi: { - const auto pred = [](IR::Inst* inst) -> std::optional { - if (inst->GetOpcode() == IR::Opcode::GetUserData || - inst->GetOpcode() == IR::Opcode::CompositeConstructU32x2 || - inst->GetOpcode() == IR::Opcode::ReadConst) { - return inst; - } - return std::nullopt; - }; - IR::Inst* source = IR::BreadthFirstSearch(inst, pred).value(); - vn = GetValueNumber(source); - value_numbers[IR::Value(inst)] = vn; - break; - } case IR::Opcode::GetUserData: case IR::Opcode::CompositeConstructU32x2: case IR::Opcode::ReadConst: { diff --git a/src/shader_recompiler/ir/subprogram.cpp b/src/shader_recompiler/ir/subprogram.cpp index ac69ec61e..bb944e3ef 100644 --- a/src/shader_recompiler/ir/subprogram.cpp +++ b/src/shader_recompiler/ir/subprogram.cpp @@ -23,24 +23,7 @@ Block* SubProgram::AddBlock(Block* orig_block) { } Inst* SubProgram::AddInst(Inst* orig_inst) { - auto it = orig_inst_to_inst.find(orig_inst); - if (it != orig_inst_to_inst.end()) { - return it->second; - } - Block* block = AddBlock(orig_inst->GetParent()); - Inst inst(orig_inst->GetOpcode(), orig_inst->Flags()); - if (orig_inst->GetOpcode() == Opcode::Phi) { - AddPhi(orig_inst, &inst); - } else { - for (size_t i = 0; i < orig_inst->NumArgs(); ++i) { - SetArg(&inst, i, orig_inst->Arg(i)); - } - } - auto insertion_point = block->end(); - if (block->back().GetOpcode() == Opcode::ConditionRef) { - --insertion_point; - } - return &(*block->PrependNewInst(insertion_point, inst)); + return AddInst(orig_inst, std::nullopt); } Block* SubProgram::GetBlock(Block* orig_block) { @@ -64,6 +47,7 @@ Program SubProgram::GetSubProgram() { completed = true; Program sub_program(super_program->info); BuildBlockListAndASL(sub_program); + AddProlgueAndEpilogue(sub_program); sub_program.post_order_blocks = PostOrder(sub_program.syntax_list.front()); AddConditionalTreeFromASL(sub_program.syntax_list); for (Block* block : sub_program.blocks) { @@ -72,6 +56,47 @@ Program SubProgram::GetSubProgram() { return sub_program; } +void SubProgram::AddProlgueAndEpilogue(Program& sub_program) { + // We may need to handle this better. + Block* epilogue_block = pools.block_pool.Create(pools.inst_pool); + Block* front_block = sub_program.blocks.front(); + sub_program.blocks.back()->AddBranch(epilogue_block); + sub_program.blocks.push_back(epilogue_block); + sub_program.syntax_list.push_back(AbstractSyntaxNode{.data = {.block = epilogue_block}, + .type = AbstractSyntaxNode::Type::Block}); + sub_program.syntax_list.push_back(AbstractSyntaxNode{.type = AbstractSyntaxNode::Type::Return}); + epilogue_block->AppendNewInst(Opcode::Epilogue, {}); + front_block->PrependNewInst(front_block->begin(), Opcode::Prologue); + epilogue_block->SsaSeal(); +} + +Inst* SubProgram::AddInst(Inst* orig_inst, + std::optional insertion_point) { + auto it = orig_inst_to_inst.find(orig_inst); + if (it != orig_inst_to_inst.end()) { + return it->second; + } + Block* block = AddBlock(orig_inst->GetParent()); + if (!insertion_point) { + if (block->back().GetOpcode() == Opcode::ConditionRef) { + insertion_point = --block->end(); + } else { + insertion_point = block->end(); + } + } + Inst* inst = &( + *block->PrependNewInst(*insertion_point, orig_inst->GetOpcode(), orig_inst->Flags())); + orig_inst_to_inst[orig_inst] = inst; + if (orig_inst->GetOpcode() == Opcode::Phi) { + AddPhi(orig_inst, inst); + } else { + for (size_t i = 0; i < orig_inst->NumArgs(); ++i) { + SetArg(inst, orig_inst, i); + } + } + return inst; +} + void SubProgram::AddPhi(Inst* orig_phi, Inst* phi) { // Current IR only has Phis with 2 arguments. ASSERT(orig_phi->NumArgs() == 2); @@ -108,11 +133,18 @@ void SubProgram::AddPhi(Inst* orig_phi, Inst* phi) { } } -void SubProgram::SetArg(Inst* inst, size_t index, const Value& arg) { +void SubProgram::SetArg(Inst* inst, Inst* orig_inst, size_t index) { + const Value& arg = orig_inst->Arg(index); if (arg.IsImmediate()) { inst->SetArg(index, arg); } else { - inst->SetArg(index, Value(AddInst(arg.InstRecursive()))); + Inst* arg_inst = arg.InstRecursive(); + if (orig_inst->GetParent() == arg_inst->GetParent()) { + inst->SetArg(index, + Value(AddInst(arg_inst, Block::InstructionList::s_iterator_to(*inst)))); + } else { + inst->SetArg(index, Value(AddInst(arg_inst, std::nullopt))); + } } } @@ -216,6 +248,7 @@ void SubProgram::BuildBlockListAndASL(Program& sub_program) { break; } case AbstractSyntaxNode::Type::Unreachable: + case AbstractSyntaxNode::Type::Return: continue; default: break; diff --git a/src/shader_recompiler/ir/subprogram.h b/src/shader_recompiler/ir/subprogram.h index b14a31e3d..f2b61d411 100644 --- a/src/shader_recompiler/ir/subprogram.h +++ b/src/shader_recompiler/ir/subprogram.h @@ -27,12 +27,14 @@ struct SubProgram { Program GetSubProgram(); private: + Inst* AddInst(Inst* orig_inst, std::optional insertion_point); void AddPhi(Inst* orig_phi, Inst* phi); - void SetArg(Inst* inst, size_t index, const Value& arg); + void SetArg(Inst* inst, Inst* orig_inst, size_t index); void AddPhiOperand(Inst* phi, Block* block, const Value& arg); void BuildBlockListAndASL(Program& sub_program); + void AddProlgueAndEpilogue(Program& sub_program); bool completed = false; Program* super_program; diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index b02ec706c..a28b508b9 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -75,7 +75,7 @@ IR::Program TranslateProgram(std::span code, Pools& pools, Info& info } Shader::Optimization::RingAccessElimination(program, runtime_info); Shader::Optimization::ReadLaneEliminationPass(program); - Shader::Optimization::FlattenExtendedUserdataPass(program); + Shader::Optimization::FlattenExtendedUserdataPass(program, pools); Shader::Optimization::ResourceTrackingPass(program); Shader::Optimization::LowerBufferFormatToRaw(program); Shader::Optimization::SharedMemoryToStoragePass(program, runtime_info, profile);