From 93767ae31bafd03c5ae3a82ec37598f7838502c6 Mon Sep 17 00:00:00 2001 From: TheTurtle Date: Mon, 28 Jul 2025 23:32:16 +0300 Subject: [PATCH] shader_recompiler: Rework sharp tracking for robustness (#3327) * shader_recompiler: Remove remnants of old discard Also constant propagate conditional discard if condition is constant * resource_tracking_pass: Rework sharp tracking for robustness * resource_tracking_pass: Add source dominance analysis When reachability is not enough to prune source list, check if a source dominates all other sources * resource_tracking_pass: Fix immediate check How did this work before * resource_tracking_pass: Remove unused template type * readlane_elimination_pass: Don't add phi when all args are the same New sharp tracking exposed some bad sources coming on sampler sharps with aniso disable pattern that also were part of readlane pattern, fix tracking by removing the unnecessary phis inbetween * resource_tracking_pass: Allow phi in disable aniso pattern * resource_tracking_pass: Handle not valid buffer sharp and more phi in aniso pattern --- CMakeLists.txt | 1 + .../backend/spirv/spirv_emit_context.cpp | 10 +- .../frontend/control_flow_graph.cpp | 21 +- .../frontend/control_flow_graph.h | 1 - .../frontend/structured_control_flow.cpp | 29 +- .../frontend/translate/scalar_flow.cpp | 8 +- .../frontend/translate/translate.cpp | 12 +- .../frontend/translate/translate.h | 7 +- .../frontend/translate/vector_memory.cpp | 21 +- src/shader_recompiler/info.h | 30 +- src/shader_recompiler/ir/basic_block.cpp | 4 +- src/shader_recompiler/ir/basic_block.h | 8 + src/shader_recompiler/ir/ir_emitter.cpp | 10 +- src/shader_recompiler/ir/ir_emitter.h | 6 +- src/shader_recompiler/ir/opcodes.inc | 4 +- .../ir/passes/constant_propagation_pass.cpp | 14 + .../ir/passes/readlane_elimination_pass.cpp | 12 +- .../ir/passes/resource_tracking_pass.cpp | 344 +++++++++++------- src/shader_recompiler/ir/reg.h | 1 + src/video_core/amdgpu/resource.h | 4 + 20 files changed, 304 insertions(+), 243 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4cdb2319d..fd4cde787 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -893,6 +893,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/attribute.h src/shader_recompiler/ir/basic_block.cpp src/shader_recompiler/ir/basic_block.h + src/shader_recompiler/ir/breadth_first_search.h src/shader_recompiler/ir/condition.h src/shader_recompiler/ir/ir_emitter.cpp src/shader_recompiler/ir/ir_emitter.h diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index def1ff8ce..95d269eb4 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -945,11 +945,11 @@ void EmitContext::DefineImagesAndSamplers() { const Id id{AddGlobalVariable(sampler_pointer_type, spv::StorageClass::UniformConstant)}; Decorate(id, spv::Decoration::Binding, binding.unified++); Decorate(id, spv::Decoration::DescriptorSet, 0U); - auto sharp_desc = std::holds_alternative(samp_desc.sampler) - ? fmt::format("sgpr:{}", std::get(samp_desc.sampler)) - : fmt::format("inline:{:#x}:{:#x}", - std::get(samp_desc.sampler).raw0, - std::get(samp_desc.sampler).raw1); + const auto sharp_desc = + samp_desc.is_inline_sampler + ? fmt::format("inline:{:#x}:{:#x}", samp_desc.inline_sampler.raw0, + samp_desc.inline_sampler.raw1) + : fmt::format("sgpr:{}", samp_desc.sharp_idx); Name(id, fmt::format("{}_{}{}", stage, "samp", sharp_desc)); samplers.push_back(id); interfaces.push_back(id); diff --git a/src/shader_recompiler/frontend/control_flow_graph.cpp b/src/shader_recompiler/frontend/control_flow_graph.cpp index a7d2d1b13..fe082cb8c 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.cpp +++ b/src/shader_recompiler/frontend/control_flow_graph.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include -#include #include "common/assert.h" #include "common/logging/log.h" #include "shader_recompiler/frontend/control_flow_graph.h" @@ -350,19 +349,7 @@ void CFG::LinkBlocks() { block.branch_false = end_block; block.end_class = EndClass::Branch; } else if (end_inst.opcode == Opcode::S_ENDPGM) { - const auto& prev_inst = inst_list[block.end_index - 1]; - if (prev_inst.opcode == Opcode::EXP && prev_inst.control.exp.en == 0) { - if (prev_inst.control.exp.target != 9) { - block.end_class = EndClass::Kill; - } else if (const auto& exec_mask = inst_list[block.end_index - 2]; - exec_mask.src[0].field == OperandField::ConstZero) { - block.end_class = EndClass::Kill; - } else { - block.end_class = EndClass::Exit; - } - } else { - block.end_class = EndClass::Exit; - } + block.end_class = EndClass::Exit; } else { UNREACHABLE(); } @@ -403,12 +390,6 @@ std::string CFG::Dot() const { fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n", node_uid); ++node_uid; break; - case EndClass::Kill: - dot += fmt::format("\t\t{}->N{};\n", name, node_uid); - dot += - fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n", node_uid); - ++node_uid; - break; } } dot += "\t\tlabel = \"main\";\n\t}\n"; diff --git a/src/shader_recompiler/frontend/control_flow_graph.h b/src/shader_recompiler/frontend/control_flow_graph.h index 88ea718cc..909bea6e4 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.h +++ b/src/shader_recompiler/frontend/control_flow_graph.h @@ -23,7 +23,6 @@ using Hook = enum class EndClass { Branch, ///< Block ends with a (un)conditional branch. Exit, ///< Block ends with an exit instruction. - Kill, ///< Block ends with a discard instruction. }; /// A block represents a linear range of instructions. diff --git a/src/shader_recompiler/frontend/structured_control_flow.cpp b/src/shader_recompiler/frontend/structured_control_flow.cpp index bfff32087..41ae3c045 100644 --- a/src/shader_recompiler/frontend/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/structured_control_flow.cpp @@ -39,7 +39,6 @@ enum class StatementType { Loop, Break, Return, - Kill, Unreachable, Function, Identity, @@ -88,7 +87,6 @@ struct Statement : ListBaseHook { Statement(Break, Statement* cond_, Statement* up_) : cond{cond_}, up{up_}, type{StatementType::Break} {} Statement(Return, Statement* up_) : up{up_}, type{StatementType::Return} {} - Statement(Kill, Statement* up_) : up{up_}, type{StatementType::Kill} {} Statement(Unreachable, Statement* up_) : up{up_}, type{StatementType::Unreachable} {} Statement(FunctionTag) : children{}, type{StatementType::Function} {} Statement(Identity, IR::Condition cond_, Statement* up_) @@ -174,9 +172,6 @@ std::string DumpExpr(const Statement* stmt) { case StatementType::Return: ret += fmt::format("{} return;\n", indent); break; - case StatementType::Kill: - ret += fmt::format("{} kill;\n", indent); - break; case StatementType::Unreachable: ret += fmt::format("{} unreachable;\n", indent); break; @@ -335,9 +330,9 @@ private: } } // Expensive operation: - if (!AreSiblings(goto_stmt, label_stmt)) { - UNREACHABLE_MSG("Goto is not a sibling with the label"); - } + // if (!AreSiblings(goto_stmt, label_stmt)) { + // UNREACHABLE_MSG("Goto is not a sibling with the label"); + //} // goto_stmt and label_stmt are guaranteed to be siblings, eliminate if (std::next(goto_stmt) == label_stmt) { // Simply eliminate the goto if the label is next to it @@ -410,9 +405,6 @@ private: case EndClass::Exit: root.insert(ip, *pool.Create(Return{}, &root_stmt)); break; - case EndClass::Kill: - root.insert(ip, *pool.Create(Kill{}, &root_stmt)); - break; } } } @@ -637,6 +629,7 @@ private: if (!stmt.block->is_dummy) { const u32 start = stmt.block->begin_index; const u32 size = stmt.block->end_index - start + 1; + current_block->cfg_block = stmt.block; translator.Translate(current_block, stmt.block->begin, inst_list.subspan(start, size)); } @@ -770,18 +763,6 @@ private: syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return; break; } - case StatementType::Kill: { - ensure_block(); - IR::Block* demote_block{MergeBlock(parent, stmt)}; - IR::IREmitter{*current_block}.Discard(); - current_block->AddBranch(demote_block); - current_block = demote_block; - - auto& merge{syntax_list.emplace_back()}; - merge.type = IR::AbstractSyntaxNode::Type::Block; - merge.data.block = demote_block; - break; - } case StatementType::Unreachable: { ensure_block(); current_block = nullptr; @@ -789,7 +770,7 @@ private: break; } default: - throw NotImplementedException("Statement type {}", u32(stmt.type)); + UNREACHABLE_MSG("Statement type {}", u32(stmt.type)); } } if (current_block) { diff --git a/src/shader_recompiler/frontend/translate/scalar_flow.cpp b/src/shader_recompiler/frontend/translate/scalar_flow.cpp index 7b57d89ca..8e12adf6e 100644 --- a/src/shader_recompiler/frontend/translate/scalar_flow.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_flow.cpp @@ -6,7 +6,7 @@ namespace Shader::Gcn { -void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) { +void Translator::EmitFlowControl(const GcnInst& inst) { switch (inst.opcode) { case Opcode::S_BARRIER: return S_BARRIER(); @@ -20,7 +20,7 @@ void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) { LOG_WARNING(Render_Vulkan, "S_TRAP instruction!"); return; case Opcode::S_GETPC_B64: - return S_GETPC_B64(pc, inst); + return S_GETPC_B64(inst); case Opcode::S_SETPC_B64: case Opcode::S_WAITCNT: case Opcode::S_NOP: @@ -45,9 +45,7 @@ void Translator::S_BARRIER() { ir.Barrier(); } -void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) { - // This only really exists to let resource tracking pass know - // there is an inline cbuf. +void Translator::S_GETPC_B64(const GcnInst& inst) { const IR::ScalarReg dst{inst.dst[0].code}; ir.SetScalarReg(dst, ir.Imm32(pc)); ir.SetScalarReg(dst + 1, ir.Imm32(0)); diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 39ca3eaa7..ad6cf5f12 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -520,14 +520,13 @@ void Translator::EmitFetch(const GcnInst& inst) { GcnDecodeContext decoder; // Decode and save instructions - u32 sub_pc = 0; while (!slice.atEnd()) { const auto sub_inst = decoder.decodeInstruction(slice); if (sub_inst.opcode == Opcode::S_SETPC_B64) { // Assume we're swapping back to the main shader. break; } - TranslateInstruction(sub_inst, sub_pc++); + TranslateInstruction(sub_inst); } return; } @@ -574,11 +573,12 @@ void Translator::LogMissingOpcode(const GcnInst& inst) { info.translation_failed = true; } -void Translator::Translate(IR::Block* block, u32 pc, std::span inst_list) { +void Translator::Translate(IR::Block* block, u32 start_pc, std::span inst_list) { if (inst_list.empty()) { return; } ir = IR::IREmitter{*block, block->begin()}; + pc = start_pc; for (const auto& inst : inst_list) { pc += inst.length; @@ -590,11 +590,11 @@ void Translator::Translate(IR::Block* block, u32 pc, std::span in continue; } - TranslateInstruction(inst, pc); + TranslateInstruction(inst); } } -void Translator::TranslateInstruction(const GcnInst& inst, const u32 pc) { +void Translator::TranslateInstruction(const GcnInst& inst) { // Emit instructions for each category. switch (inst.category) { case InstCategory::DataShare: @@ -613,7 +613,7 @@ void Translator::TranslateInstruction(const GcnInst& inst, const u32 pc) { EmitExport(inst); break; case InstCategory::FlowControl: - EmitFlowControl(pc, inst); + EmitFlowControl(inst); break; case InstCategory::ScalarALU: EmitScalarAlu(inst); diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index d90806728..585c2f1b4 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -61,13 +61,13 @@ public: explicit Translator(Info& info, const RuntimeInfo& runtime_info, const Profile& profile); void Translate(IR::Block* block, u32 pc, std::span inst_list); - void TranslateInstruction(const GcnInst& inst, u32 pc); + void TranslateInstruction(const GcnInst& inst); // Instruction categories void EmitPrologue(IR::Block* first_block); void EmitFetch(const GcnInst& inst); void EmitExport(const GcnInst& inst); - void EmitFlowControl(u32 pc, const GcnInst& inst); + void EmitFlowControl(const GcnInst& inst); void EmitScalarAlu(const GcnInst& inst); void EmitScalarMemory(const GcnInst& inst); void EmitVectorAlu(const GcnInst& inst); @@ -126,7 +126,7 @@ public: void S_FLBIT_I32_B32(const GcnInst& inst); void S_FLBIT_I32_B64(const GcnInst& inst); void S_BITSET_B32(const GcnInst& inst, u32 bit_value); - void S_GETPC_B64(u32 pc, const GcnInst& inst); + void S_GETPC_B64(const GcnInst& inst); void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst); void S_ABS_I32(const GcnInst& inst); @@ -337,6 +337,7 @@ private: std::unordered_map vgpr_map; std::array vgpr_to_interp{}; bool opcode_missing = false; + u32 pc{}; }; } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index ec9bc200d..872c89d7c 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -588,7 +588,7 @@ void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) { IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::ScalarReg tsharp_reg, const IR::ScalarReg sampler_reg, const IR::VectorReg addr_reg, - bool gather) { + bool gather, u32 pc) { const auto& mimg = inst.control.mimg; const auto flags = MimgModifierFlags(mimg.mod); @@ -602,6 +602,7 @@ IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::Scal info.is_array.Assign(mimg.da); info.is_unnormalized.Assign(mimg.unrm); info.is_r128.Assign(mimg.r128); + info.pc.Assign(pc); if (gather) { info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1); @@ -610,11 +611,11 @@ IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::Scal info.has_derivatives.Assign(flags.test(MimgModifier::Derivative)); } - // Load first dword of T# and S#. We will use them as the handle that will guide resource - // tracking pass where to read the sharps. This will later also get patched to the SPIRV texture - // binding index. - const IR::Value handle = ir.GetScalarReg(tsharp_reg); - const IR::Value inline_sampler = + // Load first dword of T# and the full S#. We will use them as the handle that will guide + // resource tracking pass where to read the sharps. This will later also get patched to the + // backend texture binding index. + const IR::Value image_handle = ir.GetScalarReg(tsharp_reg); + const IR::Value sampler_handle = ir.CompositeConstruct(ir.GetScalarReg(sampler_reg), ir.GetScalarReg(sampler_reg + 1), ir.GetScalarReg(sampler_reg + 2), ir.GetScalarReg(sampler_reg + 3)); @@ -652,8 +653,8 @@ IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::Scal const IR::Value address4 = get_addr_reg(12); // Issue the placeholder IR instruction. - IR::Value texel = - ir.ImageSampleRaw(handle, address1, address2, address3, address4, inline_sampler, info); + IR::Value texel = ir.ImageSampleRaw(image_handle, sampler_handle, address1, address2, address3, + address4, info); if (info.is_depth && !gather) { // For non-gather depth sampling, only return a single value. texel = ir.CompositeExtract(texel, 0); @@ -669,7 +670,7 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) { const IR::ScalarReg sampler_reg{inst.src[3].code * 4}; const auto flags = MimgModifierFlags(mimg.mod); - const IR::Value texel = EmitImageSample(ir, inst, tsharp_reg, sampler_reg, addr_reg, false); + const IR::Value texel = EmitImageSample(ir, inst, tsharp_reg, sampler_reg, addr_reg, false, pc); for (u32 i = 0; i < 4; i++) { if (((mimg.dmask >> i) & 1) == 0) { continue; @@ -698,7 +699,7 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) { // should be always 1st (R) component for depth ASSERT(!flags.test(MimgModifier::Pcf) || mimg.dmask & 1); - const IR::Value texel = EmitImageSample(ir, inst, tsharp_reg, sampler_reg, addr_reg, true); + const IR::Value texel = EmitImageSample(ir, inst, tsharp_reg, sampler_reg, addr_reg, true, pc); for (u32 i = 0; i < 4; i++) { const IR::F32 value = IR::F32{ir.CompositeExtract(texel, i)}; ir.SetVectorReg(dest_reg++, value); diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 11dd9c05e..d80f2956b 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -4,7 +4,6 @@ #pragma once #include -#include #include #include #include @@ -93,15 +92,12 @@ struct ImageResource { using ImageResourceList = boost::container::small_vector; struct SamplerResource { - std::variant sampler; + u32 sharp_idx; + AmdGpu::Sampler inline_sampler; + u32 is_inline_sampler : 1; u32 associated_image : 4; u32 disable_aniso : 1; - SamplerResource(u32 sharp_idx, u32 associated_image_, bool disable_aniso_) - : sampler{sharp_idx}, associated_image{associated_image_}, disable_aniso{disable_aniso_} {} - SamplerResource(AmdGpu::Sampler sampler_) - : sampler{sampler_}, associated_image{0}, disable_aniso(0) {} - constexpr AmdGpu::Sampler GetSharp(const Info& info) const noexcept; }; using SamplerResourceList = boost::container::small_vector; @@ -312,20 +308,24 @@ struct Info { DECLARE_ENUM_FLAG_OPERATORS(Info::ReadConstType); constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept { - return inline_cbuf ? inline_cbuf : info.ReadUdSharp(sharp_idx); + const auto buffer = inline_cbuf ? inline_cbuf : info.ReadUdSharp(sharp_idx); + if (!buffer.Valid()) { + LOG_DEBUG(Render, "Encountered invalid buffer sharp"); + return AmdGpu::Buffer::Null(); + } + return buffer; } constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept { - AmdGpu::Image image{0}; + AmdGpu::Image image{}; if (!is_r128) { image = info.ReadUdSharp(sharp_idx); } else { - const auto buf = info.ReadUdSharp(sharp_idx); - memcpy(&image, &buf, sizeof(buf)); + const auto raw = info.ReadUdSharp(sharp_idx); + std::memcpy(&image, &raw, sizeof(raw)); } if (!image.Valid()) { - // Fall back to null image if unbound. - LOG_DEBUG(Render_Vulkan, "Encountered unbound image!"); + LOG_DEBUG(Render_Vulkan, "Encountered invalid image sharp"); image = is_depth ? AmdGpu::Image::NullDepth() : AmdGpu::Image::Null(); } else if (is_depth) { const auto data_fmt = image.GetDataFmt(); @@ -338,9 +338,7 @@ constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept } constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept { - return std::holds_alternative(sampler) - ? std::get(sampler) - : info.ReadUdSharp(std::get(sampler)); + return is_inline_sampler ? inline_sampler : info.ReadUdSharp(sharp_idx); } constexpr AmdGpu::Image FMaskResource::GetSharp(const Info& info) const noexcept { diff --git a/src/shader_recompiler/ir/basic_block.cpp b/src/shader_recompiler/ir/basic_block.cpp index a312eabde..22af927d7 100644 --- a/src/shader_recompiler/ir/basic_block.cpp +++ b/src/shader_recompiler/ir/basic_block.cpp @@ -123,8 +123,8 @@ std::string DumpBlock(const Block& block, const std::map& ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces } - if (op == Opcode::ReadConst) { - ret += fmt::format(" (flags={}) ", inst.Flags()); + if (op == Opcode::ReadConst || op == Opcode::ImageSampleRaw) { + ret += fmt::format(" (flags={:#x}) ", inst.Flags()); } const size_t arg_count{inst.NumArgs()}; for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { diff --git a/src/shader_recompiler/ir/basic_block.h b/src/shader_recompiler/ir/basic_block.h index e3595338d..ad76ae17a 100644 --- a/src/shader_recompiler/ir/basic_block.h +++ b/src/shader_recompiler/ir/basic_block.h @@ -14,6 +14,10 @@ #include "shader_recompiler/ir/reg.h" #include "shader_recompiler/ir/value.h" +namespace Shader::Gcn { +struct Block; +} + namespace Shader::IR { class Block { @@ -150,6 +154,10 @@ public: std::array ssa_sbit_values; std::array ssa_vreg_values; + /// Block of the CFG that corresponds to this IR block. + /// It can be null as IR has additional control flow blocks. + const Shader::Gcn::Block* cfg_block{}; + private: /// Memory pool for instruction list Common::ObjectPool* inst_pool; diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index a6d43d102..498615b67 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -2105,11 +2105,11 @@ Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, c return Inst(Opcode::ImageAtomicExchange32, Flags{info}, handle, coords, value); } -Value IREmitter::ImageSampleRaw(const Value& handle, const Value& address1, const Value& address2, - const Value& address3, const Value& address4, - const Value& inline_sampler, TextureInstInfo info) { - return Inst(Opcode::ImageSampleRaw, Flags{info}, handle, address1, address2, address3, address4, - inline_sampler); +Value IREmitter::ImageSampleRaw(const Value& image_handle, const Value& sampler_handle, + const Value& address1, const Value& address2, const Value& address3, + const Value& address4, TextureInstInfo info) { + return Inst(Opcode::ImageSampleRaw, Flags{info}, image_handle, sampler_handle, address1, + address2, address3, address4); } Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias, diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index e4afb8739..2cde957f3 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -359,9 +359,9 @@ public: [[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords, const Value& value, TextureInstInfo info); - [[nodiscard]] Value ImageSampleRaw(const Value& handle, const Value& address1, - const Value& address2, const Value& address3, - const Value& address4, const Value& inline_sampler, + [[nodiscard]] Value ImageSampleRaw(const Value& image_handle, const Value& sampler_handle, + const Value& address1, const Value& address2, + const Value& address3, const Value& address4, TextureInstInfo info); [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& body, diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 280cd47ec..fecfa472c 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -436,7 +436,7 @@ OPCODE(ConvertS32S8, U32, U8, OPCODE(ConvertS32S16, U32, U16, ) // Image operations -OPCODE(ImageSampleRaw, F32x4, Opaque, F32x4, F32x4, F32x4, F32, Opaque, ) +OPCODE(ImageSampleRaw, F32x4, Opaque, Opaque, F32x4, F32x4, F32x4, F32, ) OPCODE(ImageSampleImplicitLod, F32x4, Opaque, F32x4, F32, Opaque, ) OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, F32, Opaque, ) OPCODE(ImageSampleDrefImplicitLod, F32x4, Opaque, Opaque, F32, F32, Opaque, ) @@ -445,7 +445,7 @@ OPCODE(ImageGather, F32x4, Opaq OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, F32, ) OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, ) OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) -OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, F32, ) +OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, F32, ) OPCODE(ImageRead, F32x4, Opaque, Opaque, U32, U32, ) OPCODE(ImageWrite, Void, Opaque, Opaque, U32, U32, F32x4, ) diff --git a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp index 2a39d3a2e..dc1762fab 100644 --- a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp @@ -204,6 +204,18 @@ void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) { } } +void FoldDiscardCond(IR::Inst& inst) { + const IR::U1 cond{inst.Arg(0)}; + if (!cond.IsImmediate()) { + return; + } + if (cond.U1()) { + inst.ReplaceOpcode(IR::Opcode::Discard); + } else { + inst.Invalidate(); + } +} + template void FoldAdd(IR::Block& block, IR::Inst& inst) { if (!FoldCommutative(inst, [](T a, T b) { return a + b; })) { @@ -505,6 +517,8 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { return FoldConvert(inst, IR::Opcode::ConvertF16F32); case IR::Opcode::ConvertF16F32: return FoldConvert(inst, IR::Opcode::ConvertF32F16); + case IR::Opcode::DiscardCond: + return FoldDiscardCond(inst); default: break; } diff --git a/src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp b/src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp index d6586bda0..6a9214f34 100644 --- a/src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp +++ b/src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp @@ -78,10 +78,20 @@ static IR::Value GetRealValue(PhiMap& phi_map, IR::Inst* inst, u32 lane) { it->second = new_phi; // Gather all arguments. + boost::container::static_vector phi_args; for (size_t arg_index = 0; arg_index < inst->NumArgs(); arg_index++) { IR::Inst* arg_prod = inst->Arg(arg_index).InstRecursive(); const IR::Value arg = GetRealValue(phi_map, arg_prod, lane); - new_phi->AddPhiOperand(inst->PhiBlock(arg_index), arg); + phi_args.push_back(arg); + } + const IR::Value arg0 = phi_args[0].Resolve(); + if (std::ranges::all_of(phi_args, + [&](const IR::Value& arg) { return arg.Resolve() == arg0; })) { + new_phi->ReplaceUsesWith(arg0); + } else { + for (size_t arg_index = 0; arg_index < inst->NumArgs(); arg_index++) { + new_phi->AddPhiOperand(inst->PhiBlock(arg_index), phi_args[arg_index]); + } } return IR::Value{new_phi}; } diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 2cf39c98e..56f29a2c4 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "shader_recompiler/frontend/control_flow_graph.h" #include "shader_recompiler/info.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/breadth_first_search.h" @@ -259,7 +260,9 @@ public: u32 Add(const SamplerResource& desc) { const u32 index{Add(sampler_resources, desc, [this, &desc](const auto& existing) { - return desc.sampler == existing.sampler; + return desc.sharp_idx == existing.sharp_idx && + desc.is_inline_sampler == existing.is_inline_sampler && + desc.inline_sampler == existing.inline_sampler; })}; return index; } @@ -313,11 +316,24 @@ std::pair TryDisableAnisoLod0(const IR::Inst* inst) { return not_found; } + // The bitfield extract might be hidden by phi sometimes + auto* prod0_arg0 = prod0->Arg(0).InstRecursive(); + if (prod0_arg0->GetOpcode() == IR::Opcode::Phi) { + auto arg0 = prod0_arg0->Arg(0); + auto arg1 = prod0_arg0->Arg(1); + if (!arg0.IsImmediate() && + arg0.InstRecursive()->GetOpcode() == IR::Opcode::BitFieldUExtract) { + prod0_arg0 = arg0.InstRecursive(); + } else if (!arg1.IsImmediate() && + arg1.InstRecursive()->GetOpcode() == IR::Opcode::BitFieldUExtract) { + prod0_arg0 = arg1.InstRecursive(); + } + } + // The bits range is for lods (note that constants are changed after constant propagation pass) - const auto* prod0_arg0 = prod0->Arg(0).InstRecursive(); if (prod0_arg0->GetOpcode() != IR::Opcode::BitFieldUExtract || - !(prod0_arg0->Arg(1).IsIdentity() && prod0_arg0->Arg(1).U32() == 12) || - !(prod0_arg0->Arg(2).IsIdentity() && prod0_arg0->Arg(2).U32() == 8)) { + !(prod0_arg0->Arg(1).IsImmediate() && prod0_arg0->Arg(1).U32() == 12) || + !(prod0_arg0->Arg(2).IsImmediate() && prod0_arg0->Arg(2).U32() == 8)) { return not_found; } @@ -330,102 +346,170 @@ std::pair TryDisableAnisoLod0(const IR::Inst* inst) { // We're working on the first dword of s# const auto* prod2 = inst->Arg(2).InstRecursive(); if (prod2->GetOpcode() != IR::Opcode::GetUserData && - prod2->GetOpcode() != IR::Opcode::ReadConst) { + prod2->GetOpcode() != IR::Opcode::ReadConst && prod2->GetOpcode() != IR::Opcode::Phi) { return not_found; } return {prod2, true}; } -SharpLocation AttemptTrackSharp(const IR::Inst* inst, auto& visited_insts) { - // Search until we find a potential sharp source. - const auto pred = [&visited_insts](const IR::Inst* inst) -> std::optional { - if (std::ranges::find(visited_insts, inst) != visited_insts.end()) { - return std::nullopt; +using SharpSources = boost::container::small_vector; + +bool IsSharpSource(const IR::Inst* inst) { + return inst->GetOpcode() == IR::Opcode::GetUserData || + inst->GetOpcode() == IR::Opcode::ReadConst; +} + +SharpSources FindSharpSources(const IR::Inst* handle, u32 pc) { + SharpSources sources; + if (IsSharpSource(handle)) { + sources.push_back(handle); + return sources; + } + + bool found_read_const_buffer = false; + + boost::container::small_vector visited; + std::queue queue; + queue.push(handle); + + while (!queue.empty()) { + const IR::Inst* inst{queue.front()}; + queue.pop(); + if (IsSharpSource(inst)) { + sources.push_back(inst); + continue; } - if (inst->GetOpcode() == IR::Opcode::GetUserData || - inst->GetOpcode() == IR::Opcode::ReadConst) { - return inst; + found_read_const_buffer |= inst->GetOpcode() == IR::Opcode::ReadConstBuffer; + if (inst->GetOpcode() != IR::Opcode::Phi) { + continue; } - return std::nullopt; - }; - const auto result = IR::BreadthFirstSearch(inst, pred); - ASSERT_MSG(result, "Unable to track sharp source"); - inst = result.value(); - visited_insts.emplace_back(inst); + for (size_t arg = inst->NumArgs(); arg--;) { + const IR::Value arg_value = inst->Arg(arg); + if (arg_value.IsImmediate()) { + continue; + } + const IR::Inst* arg_inst = arg_value.InstRecursive(); + if (std::ranges::find(visited, arg_inst) == visited.end()) { + visited.push_back(arg_inst); + queue.push(arg_inst); + } + } + } + if (sources.empty()) { + if (found_read_const_buffer) { + UNREACHABLE_MSG("Bindless sharp access detected pc={:#x}", pc); + } else { + UNREACHABLE_MSG("Unable to find sharp sources pc={:#x}", pc); + } + } + return sources; +} + +bool IsCfgBlockDominatedBy(const Shader::Gcn::Block* maybe_dominator, + const Shader::Gcn::Block* block, const Shader::Gcn::Block* dest_block) { + if (block == maybe_dominator) { + return true; + } + + boost::container::small_vector visited; + std::queue queue; + queue.push(block); + + while (!queue.empty()) { + const Shader::Gcn::Block* block{queue.front()}; + queue.pop(); + if (block == dest_block) { + return false; + } + if (block == maybe_dominator) { + continue; + } + if (block->branch_false && !std::ranges::contains(visited, block->branch_false)) { + visited.push_back(block->branch_false); + queue.push(block->branch_false); + } + if (block->branch_true && !std::ranges::contains(visited, block->branch_true)) { + visited.push_back(block->branch_true); + queue.push(block->branch_true); + } + } + + return true; +} + +SharpLocation SharpLocationFromSource(const IR::Inst* inst) { if (inst->GetOpcode() == IR::Opcode::GetUserData) { - return static_cast(inst->Arg(0).ScalarReg()); + return static_cast(inst->Arg(0).ScalarReg()); } else { - ASSERT_MSG(inst->GetOpcode() == IR::Opcode::ReadConst, - "Sharp load not from constant memory"); return inst->Flags(); } } -/// Tracks a sharp with validation of the chosen data type. -template -std::pair TrackSharp(const IR::Inst* inst, const Info& info) { - boost::container::small_vector visited_insts{}; - while (true) { - const auto prev_size = visited_insts.size(); - const auto sharp = AttemptTrackSharp(inst, visited_insts); - if (const auto data = info.ReadUdSharp(sharp); data.Valid()) { - return std::make_pair(sharp, data); +SharpLocation TrackSharp(const IR::Inst* inst, const IR::Block& current_parent, u32 pc = 0) { + auto sources = FindSharpSources(inst, pc); + size_t num_sources = sources.size(); + ASSERT(current_parent.cfg_block); + + // Perform dominance analysis on found sources and eliminate ones that don't pass + // If a sharp source is dominated by another, the former can be eliminated. + for (s32 i = 0; i < num_sources;) { + const IR::Block* block = sources[i]->GetParent(); + ASSERT(block->cfg_block); + bool was_removed = false; + for (s32 j = 0; j < num_sources;) { + const IR::Block* dominator = sources[j]->GetParent(); + ASSERT(dominator->cfg_block); + if (i != j && IsCfgBlockDominatedBy(dominator->cfg_block, block->cfg_block, + current_parent.cfg_block)) { + std::swap(sources[i], sources[num_sources - 1]); + --num_sources; + sources.pop_back(); + was_removed = true; + break; + } else { + ++j; + } } - if (prev_size == visited_insts.size()) { - // No change in visited instructions, we've run out of paths. - UNREACHABLE_MSG("Unable to find valid sharp."); + if (!was_removed) { + ++i; } } -} -/// Tracks a sharp without data validation. -SharpLocation TrackSharp(const IR::Inst* inst, const Info& info) { - boost::container::static_vector visited_insts{}; - return AttemptTrackSharp(inst, visited_insts); -} - -s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors, - AmdGpu::Buffer& cbuf) { - - // Assuming V# is in UD s[32:35] - // The next pattern: - // s_getpc_b64 s[32:33] - // s_add_u32 s32, , s32 - // s_addc_u32 s33, 0, s33 - // s_mov_b32 s35, - // s_movk_i32 s34, - // buffer_load_format_xyz v[8:10], v1, s[32:35], 0 ... - // is used to define an inline constant buffer - - IR::Inst* handle = inst.Arg(0).InstRecursive(); - if (!handle->AreAllArgsImmediates()) { - return -1; - } - // We have found this pattern. Build the sharp. - std::array buffer; - buffer[0] = info.pgm_base + (handle->Arg(0).U32() | u64(handle->Arg(1).U32()) << 32); - buffer[1] = handle->Arg(2).U32() | u64(handle->Arg(3).U32()) << 32; - cbuf = std::bit_cast(buffer); - // Assign a binding to this sharp. - return descriptors.Add(BufferResource{ - .sharp_idx = std::numeric_limits::max(), - .used_types = BufferDataType(inst, cbuf.GetNumberFmt()), - .inline_cbuf = cbuf, - .buffer_type = BufferType::Guest, - }); + ASSERT_MSG(sources.size() == 1, "Unable to deduce sharp source"); + return SharpLocationFromSource(sources[0]); } void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { - s32 binding{}; - AmdGpu::Buffer buffer; - if (binding = TryHandleInlineCbuf(inst, info, descriptors, buffer); binding == -1) { - IR::Inst* handle = inst.Arg(0).InstRecursive(); - IR::Inst* producer = handle->Arg(0).InstRecursive(); - SharpLocation sharp; - std::tie(sharp, buffer) = TrackSharp(producer, info); - binding = descriptors.Add(BufferResource{ - .sharp_idx = sharp, + IR::Inst* handle = inst.Arg(0).InstRecursive(); + u32 buffer_binding = 0; + if (handle->AreAllArgsImmediates()) { + // Assuming V# is in UD s[32:35] + // The next pattern: + // s_getpc_b64 s[32:33] + // s_add_u32 s32, , s32 + // s_addc_u32 s33, 0, s33 + // s_mov_b32 s35, + // s_movk_i32 s34, + // buffer_load_format_xyz v[8:10], v1, s[32:35], 0 ... + // is used to define an inline buffer resource + std::array raw; + raw[0] = info.pgm_base + (handle->Arg(0).U32() | u64(handle->Arg(1).U32()) << 32); + raw[1] = handle->Arg(2).U32() | u64(handle->Arg(3).U32()) << 32; + const auto buffer = std::bit_cast(raw); + buffer_binding = descriptors.Add(BufferResource{ + .sharp_idx = std::numeric_limits::max(), + .used_types = BufferDataType(inst, buffer.GetNumberFmt()), + .inline_cbuf = buffer, + .buffer_type = BufferType::Guest, + }); + } else { + // Normal buffer resource. + IR::Inst* buffer_handle = handle->Arg(0).InstRecursive(); + const auto sharp_idx = TrackSharp(buffer_handle, block); + const auto buffer = info.ReadUdSharp(sharp_idx); + buffer_binding = descriptors.Add(BufferResource{ + .sharp_idx = sharp_idx, .used_types = BufferDataType(inst, buffer.GetNumberFmt()), .buffer_type = BufferType::Guest, .is_written = IsBufferStore(inst), @@ -436,25 +520,14 @@ void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& // Replace handle with binding index in buffer resource list. IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; - inst.SetArg(0, ir.Imm32(binding)); + inst.SetArg(0, ir.Imm32(buffer_binding)); } void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { - const auto pred = [](const IR::Inst* inst) -> std::optional { - const auto opcode = inst->GetOpcode(); - if (opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only) - opcode == IR::Opcode::GetUserData) { - return inst; - } - return std::nullopt; - }; - const auto result = IR::BreadthFirstSearch(&inst, pred); - ASSERT_MSG(result, "Unable to find image sharp source"); - const IR::Inst* tsharp_handle = result.value(); - // Read image sharp. - const auto tsharp = TrackSharp(tsharp_handle, info); const auto inst_info = inst.Flags(); + const IR::Inst* image_handle = inst.Arg(0).InstRecursive(); + const auto tsharp = TrackSharp(image_handle, block, inst_info.pc); const bool is_atomic = IsImageAtomicInstruction(inst); const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite || is_atomic; const ImageResource image_res = { @@ -506,38 +579,34 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) { - // Read sampler sharp. - const auto sampler_binding = [&] -> u32 { - const auto sampler = inst.Arg(5).InstRecursive(); - ASSERT(sampler && sampler->GetOpcode() == IR::Opcode::CompositeConstructU32x4); - const auto handle = sampler->Arg(0); - // Inline sampler resource. - if (handle.IsImmediate()) { - LOG_DEBUG(Render_Vulkan, "Inline sampler detected"); - const auto [s1, s2, s3, s4] = - std::tuple{sampler->Arg(0), sampler->Arg(1), sampler->Arg(2), sampler->Arg(3)}; - ASSERT(s1.IsImmediate() && s2.IsImmediate() && s3.IsImmediate() && - s4.IsImmediate()); - const auto inline_sampler = AmdGpu::Sampler{ - .raw0 = u64(s2.U32()) << 32 | u64(s1.U32()), - .raw1 = u64(s4.U32()) << 32 | u64(s3.U32()), - }; - const auto binding = descriptors.Add(SamplerResource{inline_sampler}); - return binding; - } else { - // Normal sampler resource. - const auto ssharp_handle = handle.InstRecursive(); - const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle); - const auto ssharp = TrackSharp(ssharp_ud, info); - const auto binding = - descriptors.Add(SamplerResource{ssharp, image_binding, disable_aniso}); - return binding; - } - }(); - // Patch image and sampler handle. + u32 sampler_binding = 0; + const IR::Inst* sampler = inst.Arg(1).InstRecursive(); + ASSERT(sampler && sampler->GetOpcode() == IR::Opcode::CompositeConstructU32x4); + // Inline sampler resource. + if (sampler->AreAllArgsImmediates()) { + const auto inline_sampler = AmdGpu::Sampler{ + .raw0 = u64(sampler->Arg(1).U32()) << 32 | u64(sampler->Arg(0).U32()), + .raw1 = u64(sampler->Arg(3).U32()) << 32 | u64(sampler->Arg(2).U32()), + }; + sampler_binding = descriptors.Add(SamplerResource{ + .sharp_idx = std::numeric_limits::max(), + .inline_sampler = inline_sampler, + .is_inline_sampler = true, + }); + } else { + // Normal sampler resource. + const auto& [sampler_handle, disable_aniso] = + TryDisableAnisoLod0(sampler->Arg(0).InstRecursive()); + const auto ssharp = TrackSharp(sampler_handle, block, inst_info.pc); + sampler_binding = descriptors.Add(SamplerResource{ + .sharp_idx = ssharp, + .is_inline_sampler = false, + .associated_image = image_binding, + .disable_aniso = disable_aniso, + }); + } inst.SetArg(0, ir.Imm32(image_binding | sampler_binding << 16)); } else { - // Patch image handle. inst.SetArg(0, ir.Imm32(image_binding)); } } @@ -768,10 +837,10 @@ void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info, const auto inst_info = inst.Flags(); const auto view_type = image.GetViewType(image_res.is_array); - IR::Inst* body1 = inst.Arg(1).InstRecursive(); - IR::Inst* body2 = inst.Arg(2).InstRecursive(); - IR::Inst* body3 = inst.Arg(3).InstRecursive(); - IR::F32 body4 = IR::F32{inst.Arg(4)}; + IR::Inst* body1 = inst.Arg(2).InstRecursive(); + IR::Inst* body2 = inst.Arg(3).InstRecursive(); + IR::Inst* body3 = inst.Arg(4).InstRecursive(); + IR::F32 body4 = IR::F32{inst.Arg(5)}; const auto get_addr_reg = [&](u32 index) -> IR::F32 { if (index <= 3) { return IR::F32{body1->Arg(index)}; @@ -942,14 +1011,13 @@ void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) { return; } - const auto handle = inst.Arg(0); - const auto image_res = info.images[handle.U32() & 0xFFFF]; + const auto image_handle = inst.Arg(0); + const auto& image_res = info.images[image_handle.U32() & 0xFFFF]; auto image = image_res.GetSharp(info); // Sample instructions must be handled separately using address register data. if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) { - PatchImageSampleArgs(block, inst, info, image_res, image); - return; + return PatchImageSampleArgs(block, inst, info, image_res, image); } IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; @@ -963,17 +1031,13 @@ void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) { case AmdGpu::ImageType::Color1D: // x, [lod] return {body->Arg(0), body->Arg(1)}; case AmdGpu::ImageType::Color1DArray: // x, slice, [lod] - [[fallthrough]]; - case AmdGpu::ImageType::Color2D: // x, y, [lod] - [[fallthrough]]; - case AmdGpu::ImageType::Color2DMsaa: // x, y. (sample is passed on different argument) + case AmdGpu::ImageType::Color2D: // x, y, [lod] + case AmdGpu::ImageType::Color2DMsaa: // x, y. (sample is passed on different argument) return {ir.CompositeConstruct(body->Arg(0), body->Arg(1)), body->Arg(2)}; - case AmdGpu::ImageType::Color2DArray: // x, y, slice, [lod] - [[fallthrough]]; + case AmdGpu::ImageType::Color2DArray: // x, y, slice, [lod] case AmdGpu::ImageType::Color2DMsaaArray: // x, y, slice. (sample is passed on different // argument) - [[fallthrough]]; - case AmdGpu::ImageType::Color3D: // x, y, z, [lod] + case AmdGpu::ImageType::Color3D: // x, y, z, [lod] return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)}; default: UNREACHABLE_MSG("Unknown image type {}", view_type); @@ -988,7 +1052,7 @@ void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) { const auto is_storage = image_res.is_written; if (inst.GetOpcode() == IR::Opcode::ImageRead) { - auto texel = ir.ImageRead(handle, coords, lod, ms, inst_info); + auto texel = ir.ImageRead(image_handle, coords, lod, ms, inst_info); if (is_storage) { // Storage image requires shader swizzle. texel = ApplySwizzle(ir, texel, image.DstSelect()); diff --git a/src/shader_recompiler/ir/reg.h b/src/shader_recompiler/ir/reg.h index c534eecd8..96c5b2dc7 100644 --- a/src/shader_recompiler/ir/reg.h +++ b/src/shader_recompiler/ir/reg.h @@ -45,6 +45,7 @@ union TextureInstInfo { BitField<10, 1, u32> is_unnormalized; BitField<11, 1, u32> is_gather; BitField<12, 1, u32> is_r128; + BitField<16, 16, u32> pc; }; union BufferInstInfo { diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index 5ede90200..ff9cfe2cc 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -486,6 +486,10 @@ struct Sampler { return raw0 != 0 || raw1 != 0; } + bool Valid() const { + return true; + } + bool operator==(const Sampler& other) const noexcept { return std::memcmp(this, &other, sizeof(Sampler)) == 0; }