diff --git a/CMakeLists.txt b/CMakeLists.txt index 4cdb2319d..fd4cde787 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -893,6 +893,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/attribute.h src/shader_recompiler/ir/basic_block.cpp src/shader_recompiler/ir/basic_block.h + src/shader_recompiler/ir/breadth_first_search.h src/shader_recompiler/ir/condition.h src/shader_recompiler/ir/ir_emitter.cpp src/shader_recompiler/ir/ir_emitter.h diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index def1ff8ce..95d269eb4 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -945,11 +945,11 @@ void EmitContext::DefineImagesAndSamplers() { const Id id{AddGlobalVariable(sampler_pointer_type, spv::StorageClass::UniformConstant)}; Decorate(id, spv::Decoration::Binding, binding.unified++); Decorate(id, spv::Decoration::DescriptorSet, 0U); - auto sharp_desc = std::holds_alternative(samp_desc.sampler) - ? fmt::format("sgpr:{}", std::get(samp_desc.sampler)) - : fmt::format("inline:{:#x}:{:#x}", - std::get(samp_desc.sampler).raw0, - std::get(samp_desc.sampler).raw1); + const auto sharp_desc = + samp_desc.is_inline_sampler + ? fmt::format("inline:{:#x}:{:#x}", samp_desc.inline_sampler.raw0, + samp_desc.inline_sampler.raw1) + : fmt::format("sgpr:{}", samp_desc.sharp_idx); Name(id, fmt::format("{}_{}{}", stage, "samp", sharp_desc)); samplers.push_back(id); interfaces.push_back(id); diff --git a/src/shader_recompiler/frontend/control_flow_graph.cpp b/src/shader_recompiler/frontend/control_flow_graph.cpp index a7d2d1b13..fe082cb8c 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.cpp +++ b/src/shader_recompiler/frontend/control_flow_graph.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include -#include #include "common/assert.h" #include "common/logging/log.h" #include "shader_recompiler/frontend/control_flow_graph.h" @@ -350,19 +349,7 @@ void CFG::LinkBlocks() { block.branch_false = end_block; block.end_class = EndClass::Branch; } else if (end_inst.opcode == Opcode::S_ENDPGM) { - const auto& prev_inst = inst_list[block.end_index - 1]; - if (prev_inst.opcode == Opcode::EXP && prev_inst.control.exp.en == 0) { - if (prev_inst.control.exp.target != 9) { - block.end_class = EndClass::Kill; - } else if (const auto& exec_mask = inst_list[block.end_index - 2]; - exec_mask.src[0].field == OperandField::ConstZero) { - block.end_class = EndClass::Kill; - } else { - block.end_class = EndClass::Exit; - } - } else { - block.end_class = EndClass::Exit; - } + block.end_class = EndClass::Exit; } else { UNREACHABLE(); } @@ -403,12 +390,6 @@ std::string CFG::Dot() const { fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n", node_uid); ++node_uid; break; - case EndClass::Kill: - dot += fmt::format("\t\t{}->N{};\n", name, node_uid); - dot += - fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n", node_uid); - ++node_uid; - break; } } dot += "\t\tlabel = \"main\";\n\t}\n"; diff --git a/src/shader_recompiler/frontend/control_flow_graph.h b/src/shader_recompiler/frontend/control_flow_graph.h index 88ea718cc..909bea6e4 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.h +++ b/src/shader_recompiler/frontend/control_flow_graph.h @@ -23,7 +23,6 @@ using Hook = enum class EndClass { Branch, ///< Block ends with a (un)conditional branch. Exit, ///< Block ends with an exit instruction. - Kill, ///< Block ends with a discard instruction. }; /// A block represents a linear range of instructions. diff --git a/src/shader_recompiler/frontend/structured_control_flow.cpp b/src/shader_recompiler/frontend/structured_control_flow.cpp index bfff32087..41ae3c045 100644 --- a/src/shader_recompiler/frontend/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/structured_control_flow.cpp @@ -39,7 +39,6 @@ enum class StatementType { Loop, Break, Return, - Kill, Unreachable, Function, Identity, @@ -88,7 +87,6 @@ struct Statement : ListBaseHook { Statement(Break, Statement* cond_, Statement* up_) : cond{cond_}, up{up_}, type{StatementType::Break} {} Statement(Return, Statement* up_) : up{up_}, type{StatementType::Return} {} - Statement(Kill, Statement* up_) : up{up_}, type{StatementType::Kill} {} Statement(Unreachable, Statement* up_) : up{up_}, type{StatementType::Unreachable} {} Statement(FunctionTag) : children{}, type{StatementType::Function} {} Statement(Identity, IR::Condition cond_, Statement* up_) @@ -174,9 +172,6 @@ std::string DumpExpr(const Statement* stmt) { case StatementType::Return: ret += fmt::format("{} return;\n", indent); break; - case StatementType::Kill: - ret += fmt::format("{} kill;\n", indent); - break; case StatementType::Unreachable: ret += fmt::format("{} unreachable;\n", indent); break; @@ -335,9 +330,9 @@ private: } } // Expensive operation: - if (!AreSiblings(goto_stmt, label_stmt)) { - UNREACHABLE_MSG("Goto is not a sibling with the label"); - } + // if (!AreSiblings(goto_stmt, label_stmt)) { + // UNREACHABLE_MSG("Goto is not a sibling with the label"); + //} // goto_stmt and label_stmt are guaranteed to be siblings, eliminate if (std::next(goto_stmt) == label_stmt) { // Simply eliminate the goto if the label is next to it @@ -410,9 +405,6 @@ private: case EndClass::Exit: root.insert(ip, *pool.Create(Return{}, &root_stmt)); break; - case EndClass::Kill: - root.insert(ip, *pool.Create(Kill{}, &root_stmt)); - break; } } } @@ -637,6 +629,7 @@ private: if (!stmt.block->is_dummy) { const u32 start = stmt.block->begin_index; const u32 size = stmt.block->end_index - start + 1; + current_block->cfg_block = stmt.block; translator.Translate(current_block, stmt.block->begin, inst_list.subspan(start, size)); } @@ -770,18 +763,6 @@ private: syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return; break; } - case StatementType::Kill: { - ensure_block(); - IR::Block* demote_block{MergeBlock(parent, stmt)}; - IR::IREmitter{*current_block}.Discard(); - current_block->AddBranch(demote_block); - current_block = demote_block; - - auto& merge{syntax_list.emplace_back()}; - merge.type = IR::AbstractSyntaxNode::Type::Block; - merge.data.block = demote_block; - break; - } case StatementType::Unreachable: { ensure_block(); current_block = nullptr; @@ -789,7 +770,7 @@ private: break; } default: - throw NotImplementedException("Statement type {}", u32(stmt.type)); + UNREACHABLE_MSG("Statement type {}", u32(stmt.type)); } } if (current_block) { diff --git a/src/shader_recompiler/frontend/translate/scalar_flow.cpp b/src/shader_recompiler/frontend/translate/scalar_flow.cpp index 7b57d89ca..8e12adf6e 100644 --- a/src/shader_recompiler/frontend/translate/scalar_flow.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_flow.cpp @@ -6,7 +6,7 @@ namespace Shader::Gcn { -void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) { +void Translator::EmitFlowControl(const GcnInst& inst) { switch (inst.opcode) { case Opcode::S_BARRIER: return S_BARRIER(); @@ -20,7 +20,7 @@ void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) { LOG_WARNING(Render_Vulkan, "S_TRAP instruction!"); return; case Opcode::S_GETPC_B64: - return S_GETPC_B64(pc, inst); + return S_GETPC_B64(inst); case Opcode::S_SETPC_B64: case Opcode::S_WAITCNT: case Opcode::S_NOP: @@ -45,9 +45,7 @@ void Translator::S_BARRIER() { ir.Barrier(); } -void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) { - // This only really exists to let resource tracking pass know - // there is an inline cbuf. +void Translator::S_GETPC_B64(const GcnInst& inst) { const IR::ScalarReg dst{inst.dst[0].code}; ir.SetScalarReg(dst, ir.Imm32(pc)); ir.SetScalarReg(dst + 1, ir.Imm32(0)); diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 39ca3eaa7..ad6cf5f12 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -520,14 +520,13 @@ void Translator::EmitFetch(const GcnInst& inst) { GcnDecodeContext decoder; // Decode and save instructions - u32 sub_pc = 0; while (!slice.atEnd()) { const auto sub_inst = decoder.decodeInstruction(slice); if (sub_inst.opcode == Opcode::S_SETPC_B64) { // Assume we're swapping back to the main shader. break; } - TranslateInstruction(sub_inst, sub_pc++); + TranslateInstruction(sub_inst); } return; } @@ -574,11 +573,12 @@ void Translator::LogMissingOpcode(const GcnInst& inst) { info.translation_failed = true; } -void Translator::Translate(IR::Block* block, u32 pc, std::span inst_list) { +void Translator::Translate(IR::Block* block, u32 start_pc, std::span inst_list) { if (inst_list.empty()) { return; } ir = IR::IREmitter{*block, block->begin()}; + pc = start_pc; for (const auto& inst : inst_list) { pc += inst.length; @@ -590,11 +590,11 @@ void Translator::Translate(IR::Block* block, u32 pc, std::span in continue; } - TranslateInstruction(inst, pc); + TranslateInstruction(inst); } } -void Translator::TranslateInstruction(const GcnInst& inst, const u32 pc) { +void Translator::TranslateInstruction(const GcnInst& inst) { // Emit instructions for each category. switch (inst.category) { case InstCategory::DataShare: @@ -613,7 +613,7 @@ void Translator::TranslateInstruction(const GcnInst& inst, const u32 pc) { EmitExport(inst); break; case InstCategory::FlowControl: - EmitFlowControl(pc, inst); + EmitFlowControl(inst); break; case InstCategory::ScalarALU: EmitScalarAlu(inst); diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index d90806728..585c2f1b4 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -61,13 +61,13 @@ public: explicit Translator(Info& info, const RuntimeInfo& runtime_info, const Profile& profile); void Translate(IR::Block* block, u32 pc, std::span inst_list); - void TranslateInstruction(const GcnInst& inst, u32 pc); + void TranslateInstruction(const GcnInst& inst); // Instruction categories void EmitPrologue(IR::Block* first_block); void EmitFetch(const GcnInst& inst); void EmitExport(const GcnInst& inst); - void EmitFlowControl(u32 pc, const GcnInst& inst); + void EmitFlowControl(const GcnInst& inst); void EmitScalarAlu(const GcnInst& inst); void EmitScalarMemory(const GcnInst& inst); void EmitVectorAlu(const GcnInst& inst); @@ -126,7 +126,7 @@ public: void S_FLBIT_I32_B32(const GcnInst& inst); void S_FLBIT_I32_B64(const GcnInst& inst); void S_BITSET_B32(const GcnInst& inst, u32 bit_value); - void S_GETPC_B64(u32 pc, const GcnInst& inst); + void S_GETPC_B64(const GcnInst& inst); void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst); void S_ABS_I32(const GcnInst& inst); @@ -337,6 +337,7 @@ private: std::unordered_map vgpr_map; std::array vgpr_to_interp{}; bool opcode_missing = false; + u32 pc{}; }; } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index ec9bc200d..872c89d7c 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -588,7 +588,7 @@ void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) { IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::ScalarReg tsharp_reg, const IR::ScalarReg sampler_reg, const IR::VectorReg addr_reg, - bool gather) { + bool gather, u32 pc) { const auto& mimg = inst.control.mimg; const auto flags = MimgModifierFlags(mimg.mod); @@ -602,6 +602,7 @@ IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::Scal info.is_array.Assign(mimg.da); info.is_unnormalized.Assign(mimg.unrm); info.is_r128.Assign(mimg.r128); + info.pc.Assign(pc); if (gather) { info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1); @@ -610,11 +611,11 @@ IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::Scal info.has_derivatives.Assign(flags.test(MimgModifier::Derivative)); } - // Load first dword of T# and S#. We will use them as the handle that will guide resource - // tracking pass where to read the sharps. This will later also get patched to the SPIRV texture - // binding index. - const IR::Value handle = ir.GetScalarReg(tsharp_reg); - const IR::Value inline_sampler = + // Load first dword of T# and the full S#. We will use them as the handle that will guide + // resource tracking pass where to read the sharps. This will later also get patched to the + // backend texture binding index. + const IR::Value image_handle = ir.GetScalarReg(tsharp_reg); + const IR::Value sampler_handle = ir.CompositeConstruct(ir.GetScalarReg(sampler_reg), ir.GetScalarReg(sampler_reg + 1), ir.GetScalarReg(sampler_reg + 2), ir.GetScalarReg(sampler_reg + 3)); @@ -652,8 +653,8 @@ IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::Scal const IR::Value address4 = get_addr_reg(12); // Issue the placeholder IR instruction. - IR::Value texel = - ir.ImageSampleRaw(handle, address1, address2, address3, address4, inline_sampler, info); + IR::Value texel = ir.ImageSampleRaw(image_handle, sampler_handle, address1, address2, address3, + address4, info); if (info.is_depth && !gather) { // For non-gather depth sampling, only return a single value. texel = ir.CompositeExtract(texel, 0); @@ -669,7 +670,7 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) { const IR::ScalarReg sampler_reg{inst.src[3].code * 4}; const auto flags = MimgModifierFlags(mimg.mod); - const IR::Value texel = EmitImageSample(ir, inst, tsharp_reg, sampler_reg, addr_reg, false); + const IR::Value texel = EmitImageSample(ir, inst, tsharp_reg, sampler_reg, addr_reg, false, pc); for (u32 i = 0; i < 4; i++) { if (((mimg.dmask >> i) & 1) == 0) { continue; @@ -698,7 +699,7 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) { // should be always 1st (R) component for depth ASSERT(!flags.test(MimgModifier::Pcf) || mimg.dmask & 1); - const IR::Value texel = EmitImageSample(ir, inst, tsharp_reg, sampler_reg, addr_reg, true); + const IR::Value texel = EmitImageSample(ir, inst, tsharp_reg, sampler_reg, addr_reg, true, pc); for (u32 i = 0; i < 4; i++) { const IR::F32 value = IR::F32{ir.CompositeExtract(texel, i)}; ir.SetVectorReg(dest_reg++, value); diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 11dd9c05e..d80f2956b 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -4,7 +4,6 @@ #pragma once #include -#include #include #include #include @@ -93,15 +92,12 @@ struct ImageResource { using ImageResourceList = boost::container::small_vector; struct SamplerResource { - std::variant sampler; + u32 sharp_idx; + AmdGpu::Sampler inline_sampler; + u32 is_inline_sampler : 1; u32 associated_image : 4; u32 disable_aniso : 1; - SamplerResource(u32 sharp_idx, u32 associated_image_, bool disable_aniso_) - : sampler{sharp_idx}, associated_image{associated_image_}, disable_aniso{disable_aniso_} {} - SamplerResource(AmdGpu::Sampler sampler_) - : sampler{sampler_}, associated_image{0}, disable_aniso(0) {} - constexpr AmdGpu::Sampler GetSharp(const Info& info) const noexcept; }; using SamplerResourceList = boost::container::small_vector; @@ -312,20 +308,24 @@ struct Info { DECLARE_ENUM_FLAG_OPERATORS(Info::ReadConstType); constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept { - return inline_cbuf ? inline_cbuf : info.ReadUdSharp(sharp_idx); + const auto buffer = inline_cbuf ? inline_cbuf : info.ReadUdSharp(sharp_idx); + if (!buffer.Valid()) { + LOG_DEBUG(Render, "Encountered invalid buffer sharp"); + return AmdGpu::Buffer::Null(); + } + return buffer; } constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept { - AmdGpu::Image image{0}; + AmdGpu::Image image{}; if (!is_r128) { image = info.ReadUdSharp(sharp_idx); } else { - const auto buf = info.ReadUdSharp(sharp_idx); - memcpy(&image, &buf, sizeof(buf)); + const auto raw = info.ReadUdSharp(sharp_idx); + std::memcpy(&image, &raw, sizeof(raw)); } if (!image.Valid()) { - // Fall back to null image if unbound. - LOG_DEBUG(Render_Vulkan, "Encountered unbound image!"); + LOG_DEBUG(Render_Vulkan, "Encountered invalid image sharp"); image = is_depth ? AmdGpu::Image::NullDepth() : AmdGpu::Image::Null(); } else if (is_depth) { const auto data_fmt = image.GetDataFmt(); @@ -338,9 +338,7 @@ constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept } constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept { - return std::holds_alternative(sampler) - ? std::get(sampler) - : info.ReadUdSharp(std::get(sampler)); + return is_inline_sampler ? inline_sampler : info.ReadUdSharp(sharp_idx); } constexpr AmdGpu::Image FMaskResource::GetSharp(const Info& info) const noexcept { diff --git a/src/shader_recompiler/ir/basic_block.cpp b/src/shader_recompiler/ir/basic_block.cpp index a312eabde..22af927d7 100644 --- a/src/shader_recompiler/ir/basic_block.cpp +++ b/src/shader_recompiler/ir/basic_block.cpp @@ -123,8 +123,8 @@ std::string DumpBlock(const Block& block, const std::map& ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces } - if (op == Opcode::ReadConst) { - ret += fmt::format(" (flags={}) ", inst.Flags()); + if (op == Opcode::ReadConst || op == Opcode::ImageSampleRaw) { + ret += fmt::format(" (flags={:#x}) ", inst.Flags()); } const size_t arg_count{inst.NumArgs()}; for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { diff --git a/src/shader_recompiler/ir/basic_block.h b/src/shader_recompiler/ir/basic_block.h index e3595338d..ad76ae17a 100644 --- a/src/shader_recompiler/ir/basic_block.h +++ b/src/shader_recompiler/ir/basic_block.h @@ -14,6 +14,10 @@ #include "shader_recompiler/ir/reg.h" #include "shader_recompiler/ir/value.h" +namespace Shader::Gcn { +struct Block; +} + namespace Shader::IR { class Block { @@ -150,6 +154,10 @@ public: std::array ssa_sbit_values; std::array ssa_vreg_values; + /// Block of the CFG that corresponds to this IR block. + /// It can be null as IR has additional control flow blocks. + const Shader::Gcn::Block* cfg_block{}; + private: /// Memory pool for instruction list Common::ObjectPool* inst_pool; diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index a6d43d102..498615b67 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -2105,11 +2105,11 @@ Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, c return Inst(Opcode::ImageAtomicExchange32, Flags{info}, handle, coords, value); } -Value IREmitter::ImageSampleRaw(const Value& handle, const Value& address1, const Value& address2, - const Value& address3, const Value& address4, - const Value& inline_sampler, TextureInstInfo info) { - return Inst(Opcode::ImageSampleRaw, Flags{info}, handle, address1, address2, address3, address4, - inline_sampler); +Value IREmitter::ImageSampleRaw(const Value& image_handle, const Value& sampler_handle, + const Value& address1, const Value& address2, const Value& address3, + const Value& address4, TextureInstInfo info) { + return Inst(Opcode::ImageSampleRaw, Flags{info}, image_handle, sampler_handle, address1, + address2, address3, address4); } Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias, diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index e4afb8739..2cde957f3 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -359,9 +359,9 @@ public: [[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords, const Value& value, TextureInstInfo info); - [[nodiscard]] Value ImageSampleRaw(const Value& handle, const Value& address1, - const Value& address2, const Value& address3, - const Value& address4, const Value& inline_sampler, + [[nodiscard]] Value ImageSampleRaw(const Value& image_handle, const Value& sampler_handle, + const Value& address1, const Value& address2, + const Value& address3, const Value& address4, TextureInstInfo info); [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& body, diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 280cd47ec..fecfa472c 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -436,7 +436,7 @@ OPCODE(ConvertS32S8, U32, U8, OPCODE(ConvertS32S16, U32, U16, ) // Image operations -OPCODE(ImageSampleRaw, F32x4, Opaque, F32x4, F32x4, F32x4, F32, Opaque, ) +OPCODE(ImageSampleRaw, F32x4, Opaque, Opaque, F32x4, F32x4, F32x4, F32, ) OPCODE(ImageSampleImplicitLod, F32x4, Opaque, F32x4, F32, Opaque, ) OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, F32, Opaque, ) OPCODE(ImageSampleDrefImplicitLod, F32x4, Opaque, Opaque, F32, F32, Opaque, ) @@ -445,7 +445,7 @@ OPCODE(ImageGather, F32x4, Opaq OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, F32, ) OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, ) OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) -OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, F32, ) +OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, F32, ) OPCODE(ImageRead, F32x4, Opaque, Opaque, U32, U32, ) OPCODE(ImageWrite, Void, Opaque, Opaque, U32, U32, F32x4, ) diff --git a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp index 2a39d3a2e..dc1762fab 100644 --- a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp @@ -204,6 +204,18 @@ void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) { } } +void FoldDiscardCond(IR::Inst& inst) { + const IR::U1 cond{inst.Arg(0)}; + if (!cond.IsImmediate()) { + return; + } + if (cond.U1()) { + inst.ReplaceOpcode(IR::Opcode::Discard); + } else { + inst.Invalidate(); + } +} + template void FoldAdd(IR::Block& block, IR::Inst& inst) { if (!FoldCommutative(inst, [](T a, T b) { return a + b; })) { @@ -505,6 +517,8 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { return FoldConvert(inst, IR::Opcode::ConvertF16F32); case IR::Opcode::ConvertF16F32: return FoldConvert(inst, IR::Opcode::ConvertF32F16); + case IR::Opcode::DiscardCond: + return FoldDiscardCond(inst); default: break; } diff --git a/src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp b/src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp index d6586bda0..6a9214f34 100644 --- a/src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp +++ b/src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp @@ -78,10 +78,20 @@ static IR::Value GetRealValue(PhiMap& phi_map, IR::Inst* inst, u32 lane) { it->second = new_phi; // Gather all arguments. + boost::container::static_vector phi_args; for (size_t arg_index = 0; arg_index < inst->NumArgs(); arg_index++) { IR::Inst* arg_prod = inst->Arg(arg_index).InstRecursive(); const IR::Value arg = GetRealValue(phi_map, arg_prod, lane); - new_phi->AddPhiOperand(inst->PhiBlock(arg_index), arg); + phi_args.push_back(arg); + } + const IR::Value arg0 = phi_args[0].Resolve(); + if (std::ranges::all_of(phi_args, + [&](const IR::Value& arg) { return arg.Resolve() == arg0; })) { + new_phi->ReplaceUsesWith(arg0); + } else { + for (size_t arg_index = 0; arg_index < inst->NumArgs(); arg_index++) { + new_phi->AddPhiOperand(inst->PhiBlock(arg_index), phi_args[arg_index]); + } } return IR::Value{new_phi}; } diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 2cf39c98e..56f29a2c4 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "shader_recompiler/frontend/control_flow_graph.h" #include "shader_recompiler/info.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/breadth_first_search.h" @@ -259,7 +260,9 @@ public: u32 Add(const SamplerResource& desc) { const u32 index{Add(sampler_resources, desc, [this, &desc](const auto& existing) { - return desc.sampler == existing.sampler; + return desc.sharp_idx == existing.sharp_idx && + desc.is_inline_sampler == existing.is_inline_sampler && + desc.inline_sampler == existing.inline_sampler; })}; return index; } @@ -313,11 +316,24 @@ std::pair TryDisableAnisoLod0(const IR::Inst* inst) { return not_found; } + // The bitfield extract might be hidden by phi sometimes + auto* prod0_arg0 = prod0->Arg(0).InstRecursive(); + if (prod0_arg0->GetOpcode() == IR::Opcode::Phi) { + auto arg0 = prod0_arg0->Arg(0); + auto arg1 = prod0_arg0->Arg(1); + if (!arg0.IsImmediate() && + arg0.InstRecursive()->GetOpcode() == IR::Opcode::BitFieldUExtract) { + prod0_arg0 = arg0.InstRecursive(); + } else if (!arg1.IsImmediate() && + arg1.InstRecursive()->GetOpcode() == IR::Opcode::BitFieldUExtract) { + prod0_arg0 = arg1.InstRecursive(); + } + } + // The bits range is for lods (note that constants are changed after constant propagation pass) - const auto* prod0_arg0 = prod0->Arg(0).InstRecursive(); if (prod0_arg0->GetOpcode() != IR::Opcode::BitFieldUExtract || - !(prod0_arg0->Arg(1).IsIdentity() && prod0_arg0->Arg(1).U32() == 12) || - !(prod0_arg0->Arg(2).IsIdentity() && prod0_arg0->Arg(2).U32() == 8)) { + !(prod0_arg0->Arg(1).IsImmediate() && prod0_arg0->Arg(1).U32() == 12) || + !(prod0_arg0->Arg(2).IsImmediate() && prod0_arg0->Arg(2).U32() == 8)) { return not_found; } @@ -330,102 +346,170 @@ std::pair TryDisableAnisoLod0(const IR::Inst* inst) { // We're working on the first dword of s# const auto* prod2 = inst->Arg(2).InstRecursive(); if (prod2->GetOpcode() != IR::Opcode::GetUserData && - prod2->GetOpcode() != IR::Opcode::ReadConst) { + prod2->GetOpcode() != IR::Opcode::ReadConst && prod2->GetOpcode() != IR::Opcode::Phi) { return not_found; } return {prod2, true}; } -SharpLocation AttemptTrackSharp(const IR::Inst* inst, auto& visited_insts) { - // Search until we find a potential sharp source. - const auto pred = [&visited_insts](const IR::Inst* inst) -> std::optional { - if (std::ranges::find(visited_insts, inst) != visited_insts.end()) { - return std::nullopt; +using SharpSources = boost::container::small_vector; + +bool IsSharpSource(const IR::Inst* inst) { + return inst->GetOpcode() == IR::Opcode::GetUserData || + inst->GetOpcode() == IR::Opcode::ReadConst; +} + +SharpSources FindSharpSources(const IR::Inst* handle, u32 pc) { + SharpSources sources; + if (IsSharpSource(handle)) { + sources.push_back(handle); + return sources; + } + + bool found_read_const_buffer = false; + + boost::container::small_vector visited; + std::queue queue; + queue.push(handle); + + while (!queue.empty()) { + const IR::Inst* inst{queue.front()}; + queue.pop(); + if (IsSharpSource(inst)) { + sources.push_back(inst); + continue; } - if (inst->GetOpcode() == IR::Opcode::GetUserData || - inst->GetOpcode() == IR::Opcode::ReadConst) { - return inst; + found_read_const_buffer |= inst->GetOpcode() == IR::Opcode::ReadConstBuffer; + if (inst->GetOpcode() != IR::Opcode::Phi) { + continue; } - return std::nullopt; - }; - const auto result = IR::BreadthFirstSearch(inst, pred); - ASSERT_MSG(result, "Unable to track sharp source"); - inst = result.value(); - visited_insts.emplace_back(inst); + for (size_t arg = inst->NumArgs(); arg--;) { + const IR::Value arg_value = inst->Arg(arg); + if (arg_value.IsImmediate()) { + continue; + } + const IR::Inst* arg_inst = arg_value.InstRecursive(); + if (std::ranges::find(visited, arg_inst) == visited.end()) { + visited.push_back(arg_inst); + queue.push(arg_inst); + } + } + } + if (sources.empty()) { + if (found_read_const_buffer) { + UNREACHABLE_MSG("Bindless sharp access detected pc={:#x}", pc); + } else { + UNREACHABLE_MSG("Unable to find sharp sources pc={:#x}", pc); + } + } + return sources; +} + +bool IsCfgBlockDominatedBy(const Shader::Gcn::Block* maybe_dominator, + const Shader::Gcn::Block* block, const Shader::Gcn::Block* dest_block) { + if (block == maybe_dominator) { + return true; + } + + boost::container::small_vector visited; + std::queue queue; + queue.push(block); + + while (!queue.empty()) { + const Shader::Gcn::Block* block{queue.front()}; + queue.pop(); + if (block == dest_block) { + return false; + } + if (block == maybe_dominator) { + continue; + } + if (block->branch_false && !std::ranges::contains(visited, block->branch_false)) { + visited.push_back(block->branch_false); + queue.push(block->branch_false); + } + if (block->branch_true && !std::ranges::contains(visited, block->branch_true)) { + visited.push_back(block->branch_true); + queue.push(block->branch_true); + } + } + + return true; +} + +SharpLocation SharpLocationFromSource(const IR::Inst* inst) { if (inst->GetOpcode() == IR::Opcode::GetUserData) { - return static_cast(inst->Arg(0).ScalarReg()); + return static_cast(inst->Arg(0).ScalarReg()); } else { - ASSERT_MSG(inst->GetOpcode() == IR::Opcode::ReadConst, - "Sharp load not from constant memory"); return inst->Flags(); } } -/// Tracks a sharp with validation of the chosen data type. -template -std::pair TrackSharp(const IR::Inst* inst, const Info& info) { - boost::container::small_vector visited_insts{}; - while (true) { - const auto prev_size = visited_insts.size(); - const auto sharp = AttemptTrackSharp(inst, visited_insts); - if (const auto data = info.ReadUdSharp(sharp); data.Valid()) { - return std::make_pair(sharp, data); +SharpLocation TrackSharp(const IR::Inst* inst, const IR::Block& current_parent, u32 pc = 0) { + auto sources = FindSharpSources(inst, pc); + size_t num_sources = sources.size(); + ASSERT(current_parent.cfg_block); + + // Perform dominance analysis on found sources and eliminate ones that don't pass + // If a sharp source is dominated by another, the former can be eliminated. + for (s32 i = 0; i < num_sources;) { + const IR::Block* block = sources[i]->GetParent(); + ASSERT(block->cfg_block); + bool was_removed = false; + for (s32 j = 0; j < num_sources;) { + const IR::Block* dominator = sources[j]->GetParent(); + ASSERT(dominator->cfg_block); + if (i != j && IsCfgBlockDominatedBy(dominator->cfg_block, block->cfg_block, + current_parent.cfg_block)) { + std::swap(sources[i], sources[num_sources - 1]); + --num_sources; + sources.pop_back(); + was_removed = true; + break; + } else { + ++j; + } } - if (prev_size == visited_insts.size()) { - // No change in visited instructions, we've run out of paths. - UNREACHABLE_MSG("Unable to find valid sharp."); + if (!was_removed) { + ++i; } } -} -/// Tracks a sharp without data validation. -SharpLocation TrackSharp(const IR::Inst* inst, const Info& info) { - boost::container::static_vector visited_insts{}; - return AttemptTrackSharp(inst, visited_insts); -} - -s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors, - AmdGpu::Buffer& cbuf) { - - // Assuming V# is in UD s[32:35] - // The next pattern: - // s_getpc_b64 s[32:33] - // s_add_u32 s32, , s32 - // s_addc_u32 s33, 0, s33 - // s_mov_b32 s35, - // s_movk_i32 s34, - // buffer_load_format_xyz v[8:10], v1, s[32:35], 0 ... - // is used to define an inline constant buffer - - IR::Inst* handle = inst.Arg(0).InstRecursive(); - if (!handle->AreAllArgsImmediates()) { - return -1; - } - // We have found this pattern. Build the sharp. - std::array buffer; - buffer[0] = info.pgm_base + (handle->Arg(0).U32() | u64(handle->Arg(1).U32()) << 32); - buffer[1] = handle->Arg(2).U32() | u64(handle->Arg(3).U32()) << 32; - cbuf = std::bit_cast(buffer); - // Assign a binding to this sharp. - return descriptors.Add(BufferResource{ - .sharp_idx = std::numeric_limits::max(), - .used_types = BufferDataType(inst, cbuf.GetNumberFmt()), - .inline_cbuf = cbuf, - .buffer_type = BufferType::Guest, - }); + ASSERT_MSG(sources.size() == 1, "Unable to deduce sharp source"); + return SharpLocationFromSource(sources[0]); } void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { - s32 binding{}; - AmdGpu::Buffer buffer; - if (binding = TryHandleInlineCbuf(inst, info, descriptors, buffer); binding == -1) { - IR::Inst* handle = inst.Arg(0).InstRecursive(); - IR::Inst* producer = handle->Arg(0).InstRecursive(); - SharpLocation sharp; - std::tie(sharp, buffer) = TrackSharp(producer, info); - binding = descriptors.Add(BufferResource{ - .sharp_idx = sharp, + IR::Inst* handle = inst.Arg(0).InstRecursive(); + u32 buffer_binding = 0; + if (handle->AreAllArgsImmediates()) { + // Assuming V# is in UD s[32:35] + // The next pattern: + // s_getpc_b64 s[32:33] + // s_add_u32 s32, , s32 + // s_addc_u32 s33, 0, s33 + // s_mov_b32 s35, + // s_movk_i32 s34, + // buffer_load_format_xyz v[8:10], v1, s[32:35], 0 ... + // is used to define an inline buffer resource + std::array raw; + raw[0] = info.pgm_base + (handle->Arg(0).U32() | u64(handle->Arg(1).U32()) << 32); + raw[1] = handle->Arg(2).U32() | u64(handle->Arg(3).U32()) << 32; + const auto buffer = std::bit_cast(raw); + buffer_binding = descriptors.Add(BufferResource{ + .sharp_idx = std::numeric_limits::max(), + .used_types = BufferDataType(inst, buffer.GetNumberFmt()), + .inline_cbuf = buffer, + .buffer_type = BufferType::Guest, + }); + } else { + // Normal buffer resource. + IR::Inst* buffer_handle = handle->Arg(0).InstRecursive(); + const auto sharp_idx = TrackSharp(buffer_handle, block); + const auto buffer = info.ReadUdSharp(sharp_idx); + buffer_binding = descriptors.Add(BufferResource{ + .sharp_idx = sharp_idx, .used_types = BufferDataType(inst, buffer.GetNumberFmt()), .buffer_type = BufferType::Guest, .is_written = IsBufferStore(inst), @@ -436,25 +520,14 @@ void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& // Replace handle with binding index in buffer resource list. IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; - inst.SetArg(0, ir.Imm32(binding)); + inst.SetArg(0, ir.Imm32(buffer_binding)); } void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { - const auto pred = [](const IR::Inst* inst) -> std::optional { - const auto opcode = inst->GetOpcode(); - if (opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only) - opcode == IR::Opcode::GetUserData) { - return inst; - } - return std::nullopt; - }; - const auto result = IR::BreadthFirstSearch(&inst, pred); - ASSERT_MSG(result, "Unable to find image sharp source"); - const IR::Inst* tsharp_handle = result.value(); - // Read image sharp. - const auto tsharp = TrackSharp(tsharp_handle, info); const auto inst_info = inst.Flags(); + const IR::Inst* image_handle = inst.Arg(0).InstRecursive(); + const auto tsharp = TrackSharp(image_handle, block, inst_info.pc); const bool is_atomic = IsImageAtomicInstruction(inst); const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite || is_atomic; const ImageResource image_res = { @@ -506,38 +579,34 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) { - // Read sampler sharp. - const auto sampler_binding = [&] -> u32 { - const auto sampler = inst.Arg(5).InstRecursive(); - ASSERT(sampler && sampler->GetOpcode() == IR::Opcode::CompositeConstructU32x4); - const auto handle = sampler->Arg(0); - // Inline sampler resource. - if (handle.IsImmediate()) { - LOG_DEBUG(Render_Vulkan, "Inline sampler detected"); - const auto [s1, s2, s3, s4] = - std::tuple{sampler->Arg(0), sampler->Arg(1), sampler->Arg(2), sampler->Arg(3)}; - ASSERT(s1.IsImmediate() && s2.IsImmediate() && s3.IsImmediate() && - s4.IsImmediate()); - const auto inline_sampler = AmdGpu::Sampler{ - .raw0 = u64(s2.U32()) << 32 | u64(s1.U32()), - .raw1 = u64(s4.U32()) << 32 | u64(s3.U32()), - }; - const auto binding = descriptors.Add(SamplerResource{inline_sampler}); - return binding; - } else { - // Normal sampler resource. - const auto ssharp_handle = handle.InstRecursive(); - const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle); - const auto ssharp = TrackSharp(ssharp_ud, info); - const auto binding = - descriptors.Add(SamplerResource{ssharp, image_binding, disable_aniso}); - return binding; - } - }(); - // Patch image and sampler handle. + u32 sampler_binding = 0; + const IR::Inst* sampler = inst.Arg(1).InstRecursive(); + ASSERT(sampler && sampler->GetOpcode() == IR::Opcode::CompositeConstructU32x4); + // Inline sampler resource. + if (sampler->AreAllArgsImmediates()) { + const auto inline_sampler = AmdGpu::Sampler{ + .raw0 = u64(sampler->Arg(1).U32()) << 32 | u64(sampler->Arg(0).U32()), + .raw1 = u64(sampler->Arg(3).U32()) << 32 | u64(sampler->Arg(2).U32()), + }; + sampler_binding = descriptors.Add(SamplerResource{ + .sharp_idx = std::numeric_limits::max(), + .inline_sampler = inline_sampler, + .is_inline_sampler = true, + }); + } else { + // Normal sampler resource. + const auto& [sampler_handle, disable_aniso] = + TryDisableAnisoLod0(sampler->Arg(0).InstRecursive()); + const auto ssharp = TrackSharp(sampler_handle, block, inst_info.pc); + sampler_binding = descriptors.Add(SamplerResource{ + .sharp_idx = ssharp, + .is_inline_sampler = false, + .associated_image = image_binding, + .disable_aniso = disable_aniso, + }); + } inst.SetArg(0, ir.Imm32(image_binding | sampler_binding << 16)); } else { - // Patch image handle. inst.SetArg(0, ir.Imm32(image_binding)); } } @@ -768,10 +837,10 @@ void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info, const auto inst_info = inst.Flags(); const auto view_type = image.GetViewType(image_res.is_array); - IR::Inst* body1 = inst.Arg(1).InstRecursive(); - IR::Inst* body2 = inst.Arg(2).InstRecursive(); - IR::Inst* body3 = inst.Arg(3).InstRecursive(); - IR::F32 body4 = IR::F32{inst.Arg(4)}; + IR::Inst* body1 = inst.Arg(2).InstRecursive(); + IR::Inst* body2 = inst.Arg(3).InstRecursive(); + IR::Inst* body3 = inst.Arg(4).InstRecursive(); + IR::F32 body4 = IR::F32{inst.Arg(5)}; const auto get_addr_reg = [&](u32 index) -> IR::F32 { if (index <= 3) { return IR::F32{body1->Arg(index)}; @@ -942,14 +1011,13 @@ void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) { return; } - const auto handle = inst.Arg(0); - const auto image_res = info.images[handle.U32() & 0xFFFF]; + const auto image_handle = inst.Arg(0); + const auto& image_res = info.images[image_handle.U32() & 0xFFFF]; auto image = image_res.GetSharp(info); // Sample instructions must be handled separately using address register data. if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) { - PatchImageSampleArgs(block, inst, info, image_res, image); - return; + return PatchImageSampleArgs(block, inst, info, image_res, image); } IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; @@ -963,17 +1031,13 @@ void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) { case AmdGpu::ImageType::Color1D: // x, [lod] return {body->Arg(0), body->Arg(1)}; case AmdGpu::ImageType::Color1DArray: // x, slice, [lod] - [[fallthrough]]; - case AmdGpu::ImageType::Color2D: // x, y, [lod] - [[fallthrough]]; - case AmdGpu::ImageType::Color2DMsaa: // x, y. (sample is passed on different argument) + case AmdGpu::ImageType::Color2D: // x, y, [lod] + case AmdGpu::ImageType::Color2DMsaa: // x, y. (sample is passed on different argument) return {ir.CompositeConstruct(body->Arg(0), body->Arg(1)), body->Arg(2)}; - case AmdGpu::ImageType::Color2DArray: // x, y, slice, [lod] - [[fallthrough]]; + case AmdGpu::ImageType::Color2DArray: // x, y, slice, [lod] case AmdGpu::ImageType::Color2DMsaaArray: // x, y, slice. (sample is passed on different // argument) - [[fallthrough]]; - case AmdGpu::ImageType::Color3D: // x, y, z, [lod] + case AmdGpu::ImageType::Color3D: // x, y, z, [lod] return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)}; default: UNREACHABLE_MSG("Unknown image type {}", view_type); @@ -988,7 +1052,7 @@ void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) { const auto is_storage = image_res.is_written; if (inst.GetOpcode() == IR::Opcode::ImageRead) { - auto texel = ir.ImageRead(handle, coords, lod, ms, inst_info); + auto texel = ir.ImageRead(image_handle, coords, lod, ms, inst_info); if (is_storage) { // Storage image requires shader swizzle. texel = ApplySwizzle(ir, texel, image.DstSelect()); diff --git a/src/shader_recompiler/ir/reg.h b/src/shader_recompiler/ir/reg.h index c534eecd8..96c5b2dc7 100644 --- a/src/shader_recompiler/ir/reg.h +++ b/src/shader_recompiler/ir/reg.h @@ -45,6 +45,7 @@ union TextureInstInfo { BitField<10, 1, u32> is_unnormalized; BitField<11, 1, u32> is_gather; BitField<12, 1, u32> is_r128; + BitField<16, 16, u32> pc; }; union BufferInstInfo { diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index 5ede90200..ff9cfe2cc 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -486,6 +486,10 @@ struct Sampler { return raw0 != 0 || raw1 != 0; } + bool Valid() const { + return true; + } + bool operator==(const Sampler& other) const noexcept { return std::memcmp(this, &other, sizeof(Sampler)) == 0; }