shader_recompiler: Rework sharp tracking for robustness (#3327)

* shader_recompiler: Remove remnants of old discard

Also constant propagate conditional discard if condition is constant

* resource_tracking_pass: Rework sharp tracking for robustness

* resource_tracking_pass: Add source dominance analysis

When reachability is not enough to prune source list, check if a source dominates all other sources

* resource_tracking_pass: Fix immediate check

How did this work before

* resource_tracking_pass: Remove unused template type

* readlane_elimination_pass: Don't add phi when all args are the same

New sharp tracking exposed some bad sources coming on sampler sharps with aniso disable pattern that also were part of readlane pattern, fix tracking by removing the unnecessary phis inbetween

* resource_tracking_pass: Allow phi in disable aniso pattern

* resource_tracking_pass: Handle not valid buffer sharp and more phi in aniso pattern
This commit is contained in:
TheTurtle 2025-07-28 23:32:16 +03:00 committed by GitHub
parent d286631798
commit 93767ae31b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
20 changed files with 304 additions and 243 deletions

View File

@ -893,6 +893,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/ir/attribute.h src/shader_recompiler/ir/attribute.h
src/shader_recompiler/ir/basic_block.cpp src/shader_recompiler/ir/basic_block.cpp
src/shader_recompiler/ir/basic_block.h src/shader_recompiler/ir/basic_block.h
src/shader_recompiler/ir/breadth_first_search.h
src/shader_recompiler/ir/condition.h src/shader_recompiler/ir/condition.h
src/shader_recompiler/ir/ir_emitter.cpp src/shader_recompiler/ir/ir_emitter.cpp
src/shader_recompiler/ir/ir_emitter.h src/shader_recompiler/ir/ir_emitter.h

View File

@ -945,11 +945,11 @@ void EmitContext::DefineImagesAndSamplers() {
const Id id{AddGlobalVariable(sampler_pointer_type, spv::StorageClass::UniformConstant)}; const Id id{AddGlobalVariable(sampler_pointer_type, spv::StorageClass::UniformConstant)};
Decorate(id, spv::Decoration::Binding, binding.unified++); Decorate(id, spv::Decoration::Binding, binding.unified++);
Decorate(id, spv::Decoration::DescriptorSet, 0U); Decorate(id, spv::Decoration::DescriptorSet, 0U);
auto sharp_desc = std::holds_alternative<u32>(samp_desc.sampler) const auto sharp_desc =
? fmt::format("sgpr:{}", std::get<u32>(samp_desc.sampler)) samp_desc.is_inline_sampler
: fmt::format("inline:{:#x}:{:#x}", ? fmt::format("inline:{:#x}:{:#x}", samp_desc.inline_sampler.raw0,
std::get<AmdGpu::Sampler>(samp_desc.sampler).raw0, samp_desc.inline_sampler.raw1)
std::get<AmdGpu::Sampler>(samp_desc.sampler).raw1); : fmt::format("sgpr:{}", samp_desc.sharp_idx);
Name(id, fmt::format("{}_{}{}", stage, "samp", sharp_desc)); Name(id, fmt::format("{}_{}{}", stage, "samp", sharp_desc));
samplers.push_back(id); samplers.push_back(id);
interfaces.push_back(id); interfaces.push_back(id);

View File

@ -2,7 +2,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm> #include <algorithm>
#include <unordered_map>
#include "common/assert.h" #include "common/assert.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "shader_recompiler/frontend/control_flow_graph.h" #include "shader_recompiler/frontend/control_flow_graph.h"
@ -350,19 +349,7 @@ void CFG::LinkBlocks() {
block.branch_false = end_block; block.branch_false = end_block;
block.end_class = EndClass::Branch; block.end_class = EndClass::Branch;
} else if (end_inst.opcode == Opcode::S_ENDPGM) { } else if (end_inst.opcode == Opcode::S_ENDPGM) {
const auto& prev_inst = inst_list[block.end_index - 1]; block.end_class = EndClass::Exit;
if (prev_inst.opcode == Opcode::EXP && prev_inst.control.exp.en == 0) {
if (prev_inst.control.exp.target != 9) {
block.end_class = EndClass::Kill;
} else if (const auto& exec_mask = inst_list[block.end_index - 2];
exec_mask.src[0].field == OperandField::ConstZero) {
block.end_class = EndClass::Kill;
} else {
block.end_class = EndClass::Exit;
}
} else {
block.end_class = EndClass::Exit;
}
} else { } else {
UNREACHABLE(); UNREACHABLE();
} }
@ -403,12 +390,6 @@ std::string CFG::Dot() const {
fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n", node_uid); fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n", node_uid);
++node_uid; ++node_uid;
break; break;
case EndClass::Kill:
dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
dot +=
fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n", node_uid);
++node_uid;
break;
} }
} }
dot += "\t\tlabel = \"main\";\n\t}\n"; dot += "\t\tlabel = \"main\";\n\t}\n";

View File

@ -23,7 +23,6 @@ using Hook =
enum class EndClass { enum class EndClass {
Branch, ///< Block ends with a (un)conditional branch. Branch, ///< Block ends with a (un)conditional branch.
Exit, ///< Block ends with an exit instruction. Exit, ///< Block ends with an exit instruction.
Kill, ///< Block ends with a discard instruction.
}; };
/// A block represents a linear range of instructions. /// A block represents a linear range of instructions.

View File

@ -39,7 +39,6 @@ enum class StatementType {
Loop, Loop,
Break, Break,
Return, Return,
Kill,
Unreachable, Unreachable,
Function, Function,
Identity, Identity,
@ -88,7 +87,6 @@ struct Statement : ListBaseHook {
Statement(Break, Statement* cond_, Statement* up_) Statement(Break, Statement* cond_, Statement* up_)
: cond{cond_}, up{up_}, type{StatementType::Break} {} : cond{cond_}, up{up_}, type{StatementType::Break} {}
Statement(Return, Statement* up_) : up{up_}, type{StatementType::Return} {} Statement(Return, Statement* up_) : up{up_}, type{StatementType::Return} {}
Statement(Kill, Statement* up_) : up{up_}, type{StatementType::Kill} {}
Statement(Unreachable, Statement* up_) : up{up_}, type{StatementType::Unreachable} {} Statement(Unreachable, Statement* up_) : up{up_}, type{StatementType::Unreachable} {}
Statement(FunctionTag) : children{}, type{StatementType::Function} {} Statement(FunctionTag) : children{}, type{StatementType::Function} {}
Statement(Identity, IR::Condition cond_, Statement* up_) Statement(Identity, IR::Condition cond_, Statement* up_)
@ -174,9 +172,6 @@ std::string DumpExpr(const Statement* stmt) {
case StatementType::Return: case StatementType::Return:
ret += fmt::format("{} return;\n", indent); ret += fmt::format("{} return;\n", indent);
break; break;
case StatementType::Kill:
ret += fmt::format("{} kill;\n", indent);
break;
case StatementType::Unreachable: case StatementType::Unreachable:
ret += fmt::format("{} unreachable;\n", indent); ret += fmt::format("{} unreachable;\n", indent);
break; break;
@ -335,9 +330,9 @@ private:
} }
} }
// Expensive operation: // Expensive operation:
if (!AreSiblings(goto_stmt, label_stmt)) { // if (!AreSiblings(goto_stmt, label_stmt)) {
UNREACHABLE_MSG("Goto is not a sibling with the label"); // UNREACHABLE_MSG("Goto is not a sibling with the label");
} //}
// goto_stmt and label_stmt are guaranteed to be siblings, eliminate // goto_stmt and label_stmt are guaranteed to be siblings, eliminate
if (std::next(goto_stmt) == label_stmt) { if (std::next(goto_stmt) == label_stmt) {
// Simply eliminate the goto if the label is next to it // Simply eliminate the goto if the label is next to it
@ -410,9 +405,6 @@ private:
case EndClass::Exit: case EndClass::Exit:
root.insert(ip, *pool.Create(Return{}, &root_stmt)); root.insert(ip, *pool.Create(Return{}, &root_stmt));
break; break;
case EndClass::Kill:
root.insert(ip, *pool.Create(Kill{}, &root_stmt));
break;
} }
} }
} }
@ -637,6 +629,7 @@ private:
if (!stmt.block->is_dummy) { if (!stmt.block->is_dummy) {
const u32 start = stmt.block->begin_index; const u32 start = stmt.block->begin_index;
const u32 size = stmt.block->end_index - start + 1; const u32 size = stmt.block->end_index - start + 1;
current_block->cfg_block = stmt.block;
translator.Translate(current_block, stmt.block->begin, translator.Translate(current_block, stmt.block->begin,
inst_list.subspan(start, size)); inst_list.subspan(start, size));
} }
@ -770,18 +763,6 @@ private:
syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return; syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return;
break; break;
} }
case StatementType::Kill: {
ensure_block();
IR::Block* demote_block{MergeBlock(parent, stmt)};
IR::IREmitter{*current_block}.Discard();
current_block->AddBranch(demote_block);
current_block = demote_block;
auto& merge{syntax_list.emplace_back()};
merge.type = IR::AbstractSyntaxNode::Type::Block;
merge.data.block = demote_block;
break;
}
case StatementType::Unreachable: { case StatementType::Unreachable: {
ensure_block(); ensure_block();
current_block = nullptr; current_block = nullptr;
@ -789,7 +770,7 @@ private:
break; break;
} }
default: default:
throw NotImplementedException("Statement type {}", u32(stmt.type)); UNREACHABLE_MSG("Statement type {}", u32(stmt.type));
} }
} }
if (current_block) { if (current_block) {

View File

@ -6,7 +6,7 @@
namespace Shader::Gcn { namespace Shader::Gcn {
void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) { void Translator::EmitFlowControl(const GcnInst& inst) {
switch (inst.opcode) { switch (inst.opcode) {
case Opcode::S_BARRIER: case Opcode::S_BARRIER:
return S_BARRIER(); return S_BARRIER();
@ -20,7 +20,7 @@ void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) {
LOG_WARNING(Render_Vulkan, "S_TRAP instruction!"); LOG_WARNING(Render_Vulkan, "S_TRAP instruction!");
return; return;
case Opcode::S_GETPC_B64: case Opcode::S_GETPC_B64:
return S_GETPC_B64(pc, inst); return S_GETPC_B64(inst);
case Opcode::S_SETPC_B64: case Opcode::S_SETPC_B64:
case Opcode::S_WAITCNT: case Opcode::S_WAITCNT:
case Opcode::S_NOP: case Opcode::S_NOP:
@ -45,9 +45,7 @@ void Translator::S_BARRIER() {
ir.Barrier(); ir.Barrier();
} }
void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) { void Translator::S_GETPC_B64(const GcnInst& inst) {
// This only really exists to let resource tracking pass know
// there is an inline cbuf.
const IR::ScalarReg dst{inst.dst[0].code}; const IR::ScalarReg dst{inst.dst[0].code};
ir.SetScalarReg(dst, ir.Imm32(pc)); ir.SetScalarReg(dst, ir.Imm32(pc));
ir.SetScalarReg(dst + 1, ir.Imm32(0)); ir.SetScalarReg(dst + 1, ir.Imm32(0));

View File

@ -520,14 +520,13 @@ void Translator::EmitFetch(const GcnInst& inst) {
GcnDecodeContext decoder; GcnDecodeContext decoder;
// Decode and save instructions // Decode and save instructions
u32 sub_pc = 0;
while (!slice.atEnd()) { while (!slice.atEnd()) {
const auto sub_inst = decoder.decodeInstruction(slice); const auto sub_inst = decoder.decodeInstruction(slice);
if (sub_inst.opcode == Opcode::S_SETPC_B64) { if (sub_inst.opcode == Opcode::S_SETPC_B64) {
// Assume we're swapping back to the main shader. // Assume we're swapping back to the main shader.
break; break;
} }
TranslateInstruction(sub_inst, sub_pc++); TranslateInstruction(sub_inst);
} }
return; return;
} }
@ -574,11 +573,12 @@ void Translator::LogMissingOpcode(const GcnInst& inst) {
info.translation_failed = true; info.translation_failed = true;
} }
void Translator::Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list) { void Translator::Translate(IR::Block* block, u32 start_pc, std::span<const GcnInst> inst_list) {
if (inst_list.empty()) { if (inst_list.empty()) {
return; return;
} }
ir = IR::IREmitter{*block, block->begin()}; ir = IR::IREmitter{*block, block->begin()};
pc = start_pc;
for (const auto& inst : inst_list) { for (const auto& inst : inst_list) {
pc += inst.length; pc += inst.length;
@ -590,11 +590,11 @@ void Translator::Translate(IR::Block* block, u32 pc, std::span<const GcnInst> in
continue; continue;
} }
TranslateInstruction(inst, pc); TranslateInstruction(inst);
} }
} }
void Translator::TranslateInstruction(const GcnInst& inst, const u32 pc) { void Translator::TranslateInstruction(const GcnInst& inst) {
// Emit instructions for each category. // Emit instructions for each category.
switch (inst.category) { switch (inst.category) {
case InstCategory::DataShare: case InstCategory::DataShare:
@ -613,7 +613,7 @@ void Translator::TranslateInstruction(const GcnInst& inst, const u32 pc) {
EmitExport(inst); EmitExport(inst);
break; break;
case InstCategory::FlowControl: case InstCategory::FlowControl:
EmitFlowControl(pc, inst); EmitFlowControl(inst);
break; break;
case InstCategory::ScalarALU: case InstCategory::ScalarALU:
EmitScalarAlu(inst); EmitScalarAlu(inst);

View File

@ -61,13 +61,13 @@ public:
explicit Translator(Info& info, const RuntimeInfo& runtime_info, const Profile& profile); explicit Translator(Info& info, const RuntimeInfo& runtime_info, const Profile& profile);
void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list); void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list);
void TranslateInstruction(const GcnInst& inst, u32 pc); void TranslateInstruction(const GcnInst& inst);
// Instruction categories // Instruction categories
void EmitPrologue(IR::Block* first_block); void EmitPrologue(IR::Block* first_block);
void EmitFetch(const GcnInst& inst); void EmitFetch(const GcnInst& inst);
void EmitExport(const GcnInst& inst); void EmitExport(const GcnInst& inst);
void EmitFlowControl(u32 pc, const GcnInst& inst); void EmitFlowControl(const GcnInst& inst);
void EmitScalarAlu(const GcnInst& inst); void EmitScalarAlu(const GcnInst& inst);
void EmitScalarMemory(const GcnInst& inst); void EmitScalarMemory(const GcnInst& inst);
void EmitVectorAlu(const GcnInst& inst); void EmitVectorAlu(const GcnInst& inst);
@ -126,7 +126,7 @@ public:
void S_FLBIT_I32_B32(const GcnInst& inst); void S_FLBIT_I32_B32(const GcnInst& inst);
void S_FLBIT_I32_B64(const GcnInst& inst); void S_FLBIT_I32_B64(const GcnInst& inst);
void S_BITSET_B32(const GcnInst& inst, u32 bit_value); void S_BITSET_B32(const GcnInst& inst, u32 bit_value);
void S_GETPC_B64(u32 pc, const GcnInst& inst); void S_GETPC_B64(const GcnInst& inst);
void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst); void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst);
void S_ABS_I32(const GcnInst& inst); void S_ABS_I32(const GcnInst& inst);
@ -337,6 +337,7 @@ private:
std::unordered_map<u32, IR::VectorReg> vgpr_map; std::unordered_map<u32, IR::VectorReg> vgpr_map;
std::array<IR::Attribute, MaxInterpVgpr> vgpr_to_interp{}; std::array<IR::Attribute, MaxInterpVgpr> vgpr_to_interp{};
bool opcode_missing = false; bool opcode_missing = false;
u32 pc{};
}; };
} // namespace Shader::Gcn } // namespace Shader::Gcn

View File

@ -588,7 +588,7 @@ void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) {
IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::ScalarReg tsharp_reg, IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::ScalarReg tsharp_reg,
const IR::ScalarReg sampler_reg, const IR::VectorReg addr_reg, const IR::ScalarReg sampler_reg, const IR::VectorReg addr_reg,
bool gather) { bool gather, u32 pc) {
const auto& mimg = inst.control.mimg; const auto& mimg = inst.control.mimg;
const auto flags = MimgModifierFlags(mimg.mod); const auto flags = MimgModifierFlags(mimg.mod);
@ -602,6 +602,7 @@ IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::Scal
info.is_array.Assign(mimg.da); info.is_array.Assign(mimg.da);
info.is_unnormalized.Assign(mimg.unrm); info.is_unnormalized.Assign(mimg.unrm);
info.is_r128.Assign(mimg.r128); info.is_r128.Assign(mimg.r128);
info.pc.Assign(pc);
if (gather) { if (gather) {
info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1); info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1);
@ -610,11 +611,11 @@ IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::Scal
info.has_derivatives.Assign(flags.test(MimgModifier::Derivative)); info.has_derivatives.Assign(flags.test(MimgModifier::Derivative));
} }
// Load first dword of T# and S#. We will use them as the handle that will guide resource // Load first dword of T# and the full S#. We will use them as the handle that will guide
// tracking pass where to read the sharps. This will later also get patched to the SPIRV texture // resource tracking pass where to read the sharps. This will later also get patched to the
// binding index. // backend texture binding index.
const IR::Value handle = ir.GetScalarReg(tsharp_reg); const IR::Value image_handle = ir.GetScalarReg(tsharp_reg);
const IR::Value inline_sampler = const IR::Value sampler_handle =
ir.CompositeConstruct(ir.GetScalarReg(sampler_reg), ir.GetScalarReg(sampler_reg + 1), ir.CompositeConstruct(ir.GetScalarReg(sampler_reg), ir.GetScalarReg(sampler_reg + 1),
ir.GetScalarReg(sampler_reg + 2), ir.GetScalarReg(sampler_reg + 3)); ir.GetScalarReg(sampler_reg + 2), ir.GetScalarReg(sampler_reg + 3));
@ -652,8 +653,8 @@ IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::Scal
const IR::Value address4 = get_addr_reg(12); const IR::Value address4 = get_addr_reg(12);
// Issue the placeholder IR instruction. // Issue the placeholder IR instruction.
IR::Value texel = IR::Value texel = ir.ImageSampleRaw(image_handle, sampler_handle, address1, address2, address3,
ir.ImageSampleRaw(handle, address1, address2, address3, address4, inline_sampler, info); address4, info);
if (info.is_depth && !gather) { if (info.is_depth && !gather) {
// For non-gather depth sampling, only return a single value. // For non-gather depth sampling, only return a single value.
texel = ir.CompositeExtract(texel, 0); texel = ir.CompositeExtract(texel, 0);
@ -669,7 +670,7 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
const IR::ScalarReg sampler_reg{inst.src[3].code * 4}; const IR::ScalarReg sampler_reg{inst.src[3].code * 4};
const auto flags = MimgModifierFlags(mimg.mod); const auto flags = MimgModifierFlags(mimg.mod);
const IR::Value texel = EmitImageSample(ir, inst, tsharp_reg, sampler_reg, addr_reg, false); const IR::Value texel = EmitImageSample(ir, inst, tsharp_reg, sampler_reg, addr_reg, false, pc);
for (u32 i = 0; i < 4; i++) { for (u32 i = 0; i < 4; i++) {
if (((mimg.dmask >> i) & 1) == 0) { if (((mimg.dmask >> i) & 1) == 0) {
continue; continue;
@ -698,7 +699,7 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) {
// should be always 1st (R) component for depth // should be always 1st (R) component for depth
ASSERT(!flags.test(MimgModifier::Pcf) || mimg.dmask & 1); ASSERT(!flags.test(MimgModifier::Pcf) || mimg.dmask & 1);
const IR::Value texel = EmitImageSample(ir, inst, tsharp_reg, sampler_reg, addr_reg, true); const IR::Value texel = EmitImageSample(ir, inst, tsharp_reg, sampler_reg, addr_reg, true, pc);
for (u32 i = 0; i < 4; i++) { for (u32 i = 0; i < 4; i++) {
const IR::F32 value = IR::F32{ir.CompositeExtract(texel, i)}; const IR::F32 value = IR::F32{ir.CompositeExtract(texel, i)};
ir.SetVectorReg(dest_reg++, value); ir.SetVectorReg(dest_reg++, value);

View File

@ -4,7 +4,6 @@
#pragma once #pragma once
#include <span> #include <span>
#include <variant>
#include <vector> #include <vector>
#include <boost/container/small_vector.hpp> #include <boost/container/small_vector.hpp>
#include <boost/container/static_vector.hpp> #include <boost/container/static_vector.hpp>
@ -93,15 +92,12 @@ struct ImageResource {
using ImageResourceList = boost::container::small_vector<ImageResource, NumImages>; using ImageResourceList = boost::container::small_vector<ImageResource, NumImages>;
struct SamplerResource { struct SamplerResource {
std::variant<u32, AmdGpu::Sampler> sampler; u32 sharp_idx;
AmdGpu::Sampler inline_sampler;
u32 is_inline_sampler : 1;
u32 associated_image : 4; u32 associated_image : 4;
u32 disable_aniso : 1; u32 disable_aniso : 1;
SamplerResource(u32 sharp_idx, u32 associated_image_, bool disable_aniso_)
: sampler{sharp_idx}, associated_image{associated_image_}, disable_aniso{disable_aniso_} {}
SamplerResource(AmdGpu::Sampler sampler_)
: sampler{sampler_}, associated_image{0}, disable_aniso(0) {}
constexpr AmdGpu::Sampler GetSharp(const Info& info) const noexcept; constexpr AmdGpu::Sampler GetSharp(const Info& info) const noexcept;
}; };
using SamplerResourceList = boost::container::small_vector<SamplerResource, NumSamplers>; using SamplerResourceList = boost::container::small_vector<SamplerResource, NumSamplers>;
@ -312,20 +308,24 @@ struct Info {
DECLARE_ENUM_FLAG_OPERATORS(Info::ReadConstType); DECLARE_ENUM_FLAG_OPERATORS(Info::ReadConstType);
constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept { constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept {
return inline_cbuf ? inline_cbuf : info.ReadUdSharp<AmdGpu::Buffer>(sharp_idx); const auto buffer = inline_cbuf ? inline_cbuf : info.ReadUdSharp<AmdGpu::Buffer>(sharp_idx);
if (!buffer.Valid()) {
LOG_DEBUG(Render, "Encountered invalid buffer sharp");
return AmdGpu::Buffer::Null();
}
return buffer;
} }
constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept { constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept {
AmdGpu::Image image{0}; AmdGpu::Image image{};
if (!is_r128) { if (!is_r128) {
image = info.ReadUdSharp<AmdGpu::Image>(sharp_idx); image = info.ReadUdSharp<AmdGpu::Image>(sharp_idx);
} else { } else {
const auto buf = info.ReadUdSharp<AmdGpu::Buffer>(sharp_idx); const auto raw = info.ReadUdSharp<u128>(sharp_idx);
memcpy(&image, &buf, sizeof(buf)); std::memcpy(&image, &raw, sizeof(raw));
} }
if (!image.Valid()) { if (!image.Valid()) {
// Fall back to null image if unbound. LOG_DEBUG(Render_Vulkan, "Encountered invalid image sharp");
LOG_DEBUG(Render_Vulkan, "Encountered unbound image!");
image = is_depth ? AmdGpu::Image::NullDepth() : AmdGpu::Image::Null(); image = is_depth ? AmdGpu::Image::NullDepth() : AmdGpu::Image::Null();
} else if (is_depth) { } else if (is_depth) {
const auto data_fmt = image.GetDataFmt(); const auto data_fmt = image.GetDataFmt();
@ -338,9 +338,7 @@ constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept
} }
constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept { constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept {
return std::holds_alternative<AmdGpu::Sampler>(sampler) return is_inline_sampler ? inline_sampler : info.ReadUdSharp<AmdGpu::Sampler>(sharp_idx);
? std::get<AmdGpu::Sampler>(sampler)
: info.ReadUdSharp<AmdGpu::Sampler>(std::get<u32>(sampler));
} }
constexpr AmdGpu::Image FMaskResource::GetSharp(const Info& info) const noexcept { constexpr AmdGpu::Image FMaskResource::GetSharp(const Info& info) const noexcept {

View File

@ -123,8 +123,8 @@ std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>&
ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces
} }
if (op == Opcode::ReadConst) { if (op == Opcode::ReadConst || op == Opcode::ImageSampleRaw) {
ret += fmt::format(" (flags={}) ", inst.Flags<u32>()); ret += fmt::format(" (flags={:#x}) ", inst.Flags<u32>());
} }
const size_t arg_count{inst.NumArgs()}; const size_t arg_count{inst.NumArgs()};
for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) {

View File

@ -14,6 +14,10 @@
#include "shader_recompiler/ir/reg.h" #include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/ir/value.h" #include "shader_recompiler/ir/value.h"
namespace Shader::Gcn {
struct Block;
}
namespace Shader::IR { namespace Shader::IR {
class Block { class Block {
@ -150,6 +154,10 @@ public:
std::array<Value, NumScalarRegs> ssa_sbit_values; std::array<Value, NumScalarRegs> ssa_sbit_values;
std::array<Value, NumVectorRegs> ssa_vreg_values; std::array<Value, NumVectorRegs> ssa_vreg_values;
/// Block of the CFG that corresponds to this IR block.
/// It can be null as IR has additional control flow blocks.
const Shader::Gcn::Block* cfg_block{};
private: private:
/// Memory pool for instruction list /// Memory pool for instruction list
Common::ObjectPool<Inst>* inst_pool; Common::ObjectPool<Inst>* inst_pool;

View File

@ -2105,11 +2105,11 @@ Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, c
return Inst(Opcode::ImageAtomicExchange32, Flags{info}, handle, coords, value); return Inst(Opcode::ImageAtomicExchange32, Flags{info}, handle, coords, value);
} }
Value IREmitter::ImageSampleRaw(const Value& handle, const Value& address1, const Value& address2, Value IREmitter::ImageSampleRaw(const Value& image_handle, const Value& sampler_handle,
const Value& address3, const Value& address4, const Value& address1, const Value& address2, const Value& address3,
const Value& inline_sampler, TextureInstInfo info) { const Value& address4, TextureInstInfo info) {
return Inst(Opcode::ImageSampleRaw, Flags{info}, handle, address1, address2, address3, address4, return Inst(Opcode::ImageSampleRaw, Flags{info}, image_handle, sampler_handle, address1,
inline_sampler); address2, address3, address4);
} }
Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias, Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias,

View File

@ -359,9 +359,9 @@ public:
[[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords, [[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info); const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageSampleRaw(const Value& handle, const Value& address1, [[nodiscard]] Value ImageSampleRaw(const Value& image_handle, const Value& sampler_handle,
const Value& address2, const Value& address3, const Value& address1, const Value& address2,
const Value& address4, const Value& inline_sampler, const Value& address3, const Value& address4,
TextureInstInfo info); TextureInstInfo info);
[[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& body, [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& body,

View File

@ -436,7 +436,7 @@ OPCODE(ConvertS32S8, U32, U8,
OPCODE(ConvertS32S16, U32, U16, ) OPCODE(ConvertS32S16, U32, U16, )
// Image operations // Image operations
OPCODE(ImageSampleRaw, F32x4, Opaque, F32x4, F32x4, F32x4, F32, Opaque, ) OPCODE(ImageSampleRaw, F32x4, Opaque, Opaque, F32x4, F32x4, F32x4, F32, )
OPCODE(ImageSampleImplicitLod, F32x4, Opaque, F32x4, F32, Opaque, ) OPCODE(ImageSampleImplicitLod, F32x4, Opaque, F32x4, F32, Opaque, )
OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, F32, Opaque, ) OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, F32, Opaque, )
OPCODE(ImageSampleDrefImplicitLod, F32x4, Opaque, Opaque, F32, F32, Opaque, ) OPCODE(ImageSampleDrefImplicitLod, F32x4, Opaque, Opaque, F32, F32, Opaque, )
@ -445,7 +445,7 @@ OPCODE(ImageGather, F32x4, Opaq
OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, F32, ) OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, F32, )
OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, ) OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, )
OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, )
OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, F32, ) OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, F32, )
OPCODE(ImageRead, F32x4, Opaque, Opaque, U32, U32, ) OPCODE(ImageRead, F32x4, Opaque, Opaque, U32, U32, )
OPCODE(ImageWrite, Void, Opaque, Opaque, U32, U32, F32x4, ) OPCODE(ImageWrite, Void, Opaque, Opaque, U32, U32, F32x4, )

View File

@ -204,6 +204,18 @@ void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) {
} }
} }
void FoldDiscardCond(IR::Inst& inst) {
const IR::U1 cond{inst.Arg(0)};
if (!cond.IsImmediate()) {
return;
}
if (cond.U1()) {
inst.ReplaceOpcode(IR::Opcode::Discard);
} else {
inst.Invalidate();
}
}
template <typename T> template <typename T>
void FoldAdd(IR::Block& block, IR::Inst& inst) { void FoldAdd(IR::Block& block, IR::Inst& inst) {
if (!FoldCommutative<T>(inst, [](T a, T b) { return a + b; })) { if (!FoldCommutative<T>(inst, [](T a, T b) { return a + b; })) {
@ -505,6 +517,8 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
return FoldConvert(inst, IR::Opcode::ConvertF16F32); return FoldConvert(inst, IR::Opcode::ConvertF16F32);
case IR::Opcode::ConvertF16F32: case IR::Opcode::ConvertF16F32:
return FoldConvert(inst, IR::Opcode::ConvertF32F16); return FoldConvert(inst, IR::Opcode::ConvertF32F16);
case IR::Opcode::DiscardCond:
return FoldDiscardCond(inst);
default: default:
break; break;
} }

View File

@ -78,10 +78,20 @@ static IR::Value GetRealValue(PhiMap& phi_map, IR::Inst* inst, u32 lane) {
it->second = new_phi; it->second = new_phi;
// Gather all arguments. // Gather all arguments.
boost::container::static_vector<IR::Value, 5> phi_args;
for (size_t arg_index = 0; arg_index < inst->NumArgs(); arg_index++) { for (size_t arg_index = 0; arg_index < inst->NumArgs(); arg_index++) {
IR::Inst* arg_prod = inst->Arg(arg_index).InstRecursive(); IR::Inst* arg_prod = inst->Arg(arg_index).InstRecursive();
const IR::Value arg = GetRealValue(phi_map, arg_prod, lane); const IR::Value arg = GetRealValue(phi_map, arg_prod, lane);
new_phi->AddPhiOperand(inst->PhiBlock(arg_index), arg); phi_args.push_back(arg);
}
const IR::Value arg0 = phi_args[0].Resolve();
if (std::ranges::all_of(phi_args,
[&](const IR::Value& arg) { return arg.Resolve() == arg0; })) {
new_phi->ReplaceUsesWith(arg0);
} else {
for (size_t arg_index = 0; arg_index < inst->NumArgs(); arg_index++) {
new_phi->AddPhiOperand(inst->PhiBlock(arg_index), phi_args[arg_index]);
}
} }
return IR::Value{new_phi}; return IR::Value{new_phi};
} }

View File

@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/frontend/control_flow_graph.h"
#include "shader_recompiler/info.h" #include "shader_recompiler/info.h"
#include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/breadth_first_search.h" #include "shader_recompiler/ir/breadth_first_search.h"
@ -259,7 +260,9 @@ public:
u32 Add(const SamplerResource& desc) { u32 Add(const SamplerResource& desc) {
const u32 index{Add(sampler_resources, desc, [this, &desc](const auto& existing) { const u32 index{Add(sampler_resources, desc, [this, &desc](const auto& existing) {
return desc.sampler == existing.sampler; return desc.sharp_idx == existing.sharp_idx &&
desc.is_inline_sampler == existing.is_inline_sampler &&
desc.inline_sampler == existing.inline_sampler;
})}; })};
return index; return index;
} }
@ -313,11 +316,24 @@ std::pair<const IR::Inst*, bool> TryDisableAnisoLod0(const IR::Inst* inst) {
return not_found; return not_found;
} }
// The bitfield extract might be hidden by phi sometimes
auto* prod0_arg0 = prod0->Arg(0).InstRecursive();
if (prod0_arg0->GetOpcode() == IR::Opcode::Phi) {
auto arg0 = prod0_arg0->Arg(0);
auto arg1 = prod0_arg0->Arg(1);
if (!arg0.IsImmediate() &&
arg0.InstRecursive()->GetOpcode() == IR::Opcode::BitFieldUExtract) {
prod0_arg0 = arg0.InstRecursive();
} else if (!arg1.IsImmediate() &&
arg1.InstRecursive()->GetOpcode() == IR::Opcode::BitFieldUExtract) {
prod0_arg0 = arg1.InstRecursive();
}
}
// The bits range is for lods (note that constants are changed after constant propagation pass) // The bits range is for lods (note that constants are changed after constant propagation pass)
const auto* prod0_arg0 = prod0->Arg(0).InstRecursive();
if (prod0_arg0->GetOpcode() != IR::Opcode::BitFieldUExtract || if (prod0_arg0->GetOpcode() != IR::Opcode::BitFieldUExtract ||
!(prod0_arg0->Arg(1).IsIdentity() && prod0_arg0->Arg(1).U32() == 12) || !(prod0_arg0->Arg(1).IsImmediate() && prod0_arg0->Arg(1).U32() == 12) ||
!(prod0_arg0->Arg(2).IsIdentity() && prod0_arg0->Arg(2).U32() == 8)) { !(prod0_arg0->Arg(2).IsImmediate() && prod0_arg0->Arg(2).U32() == 8)) {
return not_found; return not_found;
} }
@ -330,102 +346,170 @@ std::pair<const IR::Inst*, bool> TryDisableAnisoLod0(const IR::Inst* inst) {
// We're working on the first dword of s# // We're working on the first dword of s#
const auto* prod2 = inst->Arg(2).InstRecursive(); const auto* prod2 = inst->Arg(2).InstRecursive();
if (prod2->GetOpcode() != IR::Opcode::GetUserData && if (prod2->GetOpcode() != IR::Opcode::GetUserData &&
prod2->GetOpcode() != IR::Opcode::ReadConst) { prod2->GetOpcode() != IR::Opcode::ReadConst && prod2->GetOpcode() != IR::Opcode::Phi) {
return not_found; return not_found;
} }
return {prod2, true}; return {prod2, true};
} }
SharpLocation AttemptTrackSharp(const IR::Inst* inst, auto& visited_insts) { using SharpSources = boost::container::small_vector<const IR::Inst*, 4>;
// Search until we find a potential sharp source.
const auto pred = [&visited_insts](const IR::Inst* inst) -> std::optional<const IR::Inst*> { bool IsSharpSource(const IR::Inst* inst) {
if (std::ranges::find(visited_insts, inst) != visited_insts.end()) { return inst->GetOpcode() == IR::Opcode::GetUserData ||
return std::nullopt; inst->GetOpcode() == IR::Opcode::ReadConst;
}
SharpSources FindSharpSources(const IR::Inst* handle, u32 pc) {
SharpSources sources;
if (IsSharpSource(handle)) {
sources.push_back(handle);
return sources;
}
bool found_read_const_buffer = false;
boost::container::small_vector<const IR::Inst*, 8> visited;
std::queue<const IR::Inst*> queue;
queue.push(handle);
while (!queue.empty()) {
const IR::Inst* inst{queue.front()};
queue.pop();
if (IsSharpSource(inst)) {
sources.push_back(inst);
continue;
} }
if (inst->GetOpcode() == IR::Opcode::GetUserData || found_read_const_buffer |= inst->GetOpcode() == IR::Opcode::ReadConstBuffer;
inst->GetOpcode() == IR::Opcode::ReadConst) { if (inst->GetOpcode() != IR::Opcode::Phi) {
return inst; continue;
} }
return std::nullopt; for (size_t arg = inst->NumArgs(); arg--;) {
}; const IR::Value arg_value = inst->Arg(arg);
const auto result = IR::BreadthFirstSearch(inst, pred); if (arg_value.IsImmediate()) {
ASSERT_MSG(result, "Unable to track sharp source"); continue;
inst = result.value(); }
visited_insts.emplace_back(inst); const IR::Inst* arg_inst = arg_value.InstRecursive();
if (std::ranges::find(visited, arg_inst) == visited.end()) {
visited.push_back(arg_inst);
queue.push(arg_inst);
}
}
}
if (sources.empty()) {
if (found_read_const_buffer) {
UNREACHABLE_MSG("Bindless sharp access detected pc={:#x}", pc);
} else {
UNREACHABLE_MSG("Unable to find sharp sources pc={:#x}", pc);
}
}
return sources;
}
bool IsCfgBlockDominatedBy(const Shader::Gcn::Block* maybe_dominator,
const Shader::Gcn::Block* block, const Shader::Gcn::Block* dest_block) {
if (block == maybe_dominator) {
return true;
}
boost::container::small_vector<const Shader::Gcn::Block*, 8> visited;
std::queue<const Shader::Gcn::Block*> queue;
queue.push(block);
while (!queue.empty()) {
const Shader::Gcn::Block* block{queue.front()};
queue.pop();
if (block == dest_block) {
return false;
}
if (block == maybe_dominator) {
continue;
}
if (block->branch_false && !std::ranges::contains(visited, block->branch_false)) {
visited.push_back(block->branch_false);
queue.push(block->branch_false);
}
if (block->branch_true && !std::ranges::contains(visited, block->branch_true)) {
visited.push_back(block->branch_true);
queue.push(block->branch_true);
}
}
return true;
}
SharpLocation SharpLocationFromSource(const IR::Inst* inst) {
if (inst->GetOpcode() == IR::Opcode::GetUserData) { if (inst->GetOpcode() == IR::Opcode::GetUserData) {
return static_cast<u32>(inst->Arg(0).ScalarReg()); return static_cast<SharpLocation>(inst->Arg(0).ScalarReg());
} else { } else {
ASSERT_MSG(inst->GetOpcode() == IR::Opcode::ReadConst,
"Sharp load not from constant memory");
return inst->Flags<u32>(); return inst->Flags<u32>();
} }
} }
/// Tracks a sharp with validation of the chosen data type. SharpLocation TrackSharp(const IR::Inst* inst, const IR::Block& current_parent, u32 pc = 0) {
template <typename DataType> auto sources = FindSharpSources(inst, pc);
std::pair<SharpLocation, DataType> TrackSharp(const IR::Inst* inst, const Info& info) { size_t num_sources = sources.size();
boost::container::small_vector<const IR::Inst*, 4> visited_insts{}; ASSERT(current_parent.cfg_block);
while (true) {
const auto prev_size = visited_insts.size(); // Perform dominance analysis on found sources and eliminate ones that don't pass
const auto sharp = AttemptTrackSharp(inst, visited_insts); // If a sharp source is dominated by another, the former can be eliminated.
if (const auto data = info.ReadUdSharp<DataType>(sharp); data.Valid()) { for (s32 i = 0; i < num_sources;) {
return std::make_pair(sharp, data); const IR::Block* block = sources[i]->GetParent();
ASSERT(block->cfg_block);
bool was_removed = false;
for (s32 j = 0; j < num_sources;) {
const IR::Block* dominator = sources[j]->GetParent();
ASSERT(dominator->cfg_block);
if (i != j && IsCfgBlockDominatedBy(dominator->cfg_block, block->cfg_block,
current_parent.cfg_block)) {
std::swap(sources[i], sources[num_sources - 1]);
--num_sources;
sources.pop_back();
was_removed = true;
break;
} else {
++j;
}
} }
if (prev_size == visited_insts.size()) { if (!was_removed) {
// No change in visited instructions, we've run out of paths. ++i;
UNREACHABLE_MSG("Unable to find valid sharp.");
} }
} }
}
/// Tracks a sharp without data validation. ASSERT_MSG(sources.size() == 1, "Unable to deduce sharp source");
SharpLocation TrackSharp(const IR::Inst* inst, const Info& info) { return SharpLocationFromSource(sources[0]);
boost::container::static_vector<const IR::Inst*, 1> visited_insts{};
return AttemptTrackSharp(inst, visited_insts);
}
s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
AmdGpu::Buffer& cbuf) {
// Assuming V# is in UD s[32:35]
// The next pattern:
// s_getpc_b64 s[32:33]
// s_add_u32 s32, <const>, s32
// s_addc_u32 s33, 0, s33
// s_mov_b32 s35, <const>
// s_movk_i32 s34, <const>
// buffer_load_format_xyz v[8:10], v1, s[32:35], 0 ...
// is used to define an inline constant buffer
IR::Inst* handle = inst.Arg(0).InstRecursive();
if (!handle->AreAllArgsImmediates()) {
return -1;
}
// We have found this pattern. Build the sharp.
std::array<u64, 2> buffer;
buffer[0] = info.pgm_base + (handle->Arg(0).U32() | u64(handle->Arg(1).U32()) << 32);
buffer[1] = handle->Arg(2).U32() | u64(handle->Arg(3).U32()) << 32;
cbuf = std::bit_cast<AmdGpu::Buffer>(buffer);
// Assign a binding to this sharp.
return descriptors.Add(BufferResource{
.sharp_idx = std::numeric_limits<u32>::max(),
.used_types = BufferDataType(inst, cbuf.GetNumberFmt()),
.inline_cbuf = cbuf,
.buffer_type = BufferType::Guest,
});
} }
void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
s32 binding{}; IR::Inst* handle = inst.Arg(0).InstRecursive();
AmdGpu::Buffer buffer; u32 buffer_binding = 0;
if (binding = TryHandleInlineCbuf(inst, info, descriptors, buffer); binding == -1) { if (handle->AreAllArgsImmediates()) {
IR::Inst* handle = inst.Arg(0).InstRecursive(); // Assuming V# is in UD s[32:35]
IR::Inst* producer = handle->Arg(0).InstRecursive(); // The next pattern:
SharpLocation sharp; // s_getpc_b64 s[32:33]
std::tie(sharp, buffer) = TrackSharp<AmdGpu::Buffer>(producer, info); // s_add_u32 s32, <const>, s32
binding = descriptors.Add(BufferResource{ // s_addc_u32 s33, 0, s33
.sharp_idx = sharp, // s_mov_b32 s35, <const>
// s_movk_i32 s34, <const>
// buffer_load_format_xyz v[8:10], v1, s[32:35], 0 ...
// is used to define an inline buffer resource
std::array<u64, 2> raw;
raw[0] = info.pgm_base + (handle->Arg(0).U32() | u64(handle->Arg(1).U32()) << 32);
raw[1] = handle->Arg(2).U32() | u64(handle->Arg(3).U32()) << 32;
const auto buffer = std::bit_cast<AmdGpu::Buffer>(raw);
buffer_binding = descriptors.Add(BufferResource{
.sharp_idx = std::numeric_limits<u32>::max(),
.used_types = BufferDataType(inst, buffer.GetNumberFmt()),
.inline_cbuf = buffer,
.buffer_type = BufferType::Guest,
});
} else {
// Normal buffer resource.
IR::Inst* buffer_handle = handle->Arg(0).InstRecursive();
const auto sharp_idx = TrackSharp(buffer_handle, block);
const auto buffer = info.ReadUdSharp<AmdGpu::Buffer>(sharp_idx);
buffer_binding = descriptors.Add(BufferResource{
.sharp_idx = sharp_idx,
.used_types = BufferDataType(inst, buffer.GetNumberFmt()), .used_types = BufferDataType(inst, buffer.GetNumberFmt()),
.buffer_type = BufferType::Guest, .buffer_type = BufferType::Guest,
.is_written = IsBufferStore(inst), .is_written = IsBufferStore(inst),
@ -436,25 +520,14 @@ void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
// Replace handle with binding index in buffer resource list. // Replace handle with binding index in buffer resource list.
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
inst.SetArg(0, ir.Imm32(binding)); inst.SetArg(0, ir.Imm32(buffer_binding));
} }
void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
const auto opcode = inst->GetOpcode();
if (opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
opcode == IR::Opcode::GetUserData) {
return inst;
}
return std::nullopt;
};
const auto result = IR::BreadthFirstSearch(&inst, pred);
ASSERT_MSG(result, "Unable to find image sharp source");
const IR::Inst* tsharp_handle = result.value();
// Read image sharp. // Read image sharp.
const auto tsharp = TrackSharp(tsharp_handle, info);
const auto inst_info = inst.Flags<IR::TextureInstInfo>(); const auto inst_info = inst.Flags<IR::TextureInstInfo>();
const IR::Inst* image_handle = inst.Arg(0).InstRecursive();
const auto tsharp = TrackSharp(image_handle, block, inst_info.pc);
const bool is_atomic = IsImageAtomicInstruction(inst); const bool is_atomic = IsImageAtomicInstruction(inst);
const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite || is_atomic; const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite || is_atomic;
const ImageResource image_res = { const ImageResource image_res = {
@ -506,38 +579,34 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) { if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) {
// Read sampler sharp. u32 sampler_binding = 0;
const auto sampler_binding = [&] -> u32 { const IR::Inst* sampler = inst.Arg(1).InstRecursive();
const auto sampler = inst.Arg(5).InstRecursive(); ASSERT(sampler && sampler->GetOpcode() == IR::Opcode::CompositeConstructU32x4);
ASSERT(sampler && sampler->GetOpcode() == IR::Opcode::CompositeConstructU32x4); // Inline sampler resource.
const auto handle = sampler->Arg(0); if (sampler->AreAllArgsImmediates()) {
// Inline sampler resource. const auto inline_sampler = AmdGpu::Sampler{
if (handle.IsImmediate()) { .raw0 = u64(sampler->Arg(1).U32()) << 32 | u64(sampler->Arg(0).U32()),
LOG_DEBUG(Render_Vulkan, "Inline sampler detected"); .raw1 = u64(sampler->Arg(3).U32()) << 32 | u64(sampler->Arg(2).U32()),
const auto [s1, s2, s3, s4] = };
std::tuple{sampler->Arg(0), sampler->Arg(1), sampler->Arg(2), sampler->Arg(3)}; sampler_binding = descriptors.Add(SamplerResource{
ASSERT(s1.IsImmediate() && s2.IsImmediate() && s3.IsImmediate() && .sharp_idx = std::numeric_limits<u32>::max(),
s4.IsImmediate()); .inline_sampler = inline_sampler,
const auto inline_sampler = AmdGpu::Sampler{ .is_inline_sampler = true,
.raw0 = u64(s2.U32()) << 32 | u64(s1.U32()), });
.raw1 = u64(s4.U32()) << 32 | u64(s3.U32()), } else {
}; // Normal sampler resource.
const auto binding = descriptors.Add(SamplerResource{inline_sampler}); const auto& [sampler_handle, disable_aniso] =
return binding; TryDisableAnisoLod0(sampler->Arg(0).InstRecursive());
} else { const auto ssharp = TrackSharp(sampler_handle, block, inst_info.pc);
// Normal sampler resource. sampler_binding = descriptors.Add(SamplerResource{
const auto ssharp_handle = handle.InstRecursive(); .sharp_idx = ssharp,
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle); .is_inline_sampler = false,
const auto ssharp = TrackSharp(ssharp_ud, info); .associated_image = image_binding,
const auto binding = .disable_aniso = disable_aniso,
descriptors.Add(SamplerResource{ssharp, image_binding, disable_aniso}); });
return binding; }
}
}();
// Patch image and sampler handle.
inst.SetArg(0, ir.Imm32(image_binding | sampler_binding << 16)); inst.SetArg(0, ir.Imm32(image_binding | sampler_binding << 16));
} else { } else {
// Patch image handle.
inst.SetArg(0, ir.Imm32(image_binding)); inst.SetArg(0, ir.Imm32(image_binding));
} }
} }
@ -768,10 +837,10 @@ void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info,
const auto inst_info = inst.Flags<IR::TextureInstInfo>(); const auto inst_info = inst.Flags<IR::TextureInstInfo>();
const auto view_type = image.GetViewType(image_res.is_array); const auto view_type = image.GetViewType(image_res.is_array);
IR::Inst* body1 = inst.Arg(1).InstRecursive(); IR::Inst* body1 = inst.Arg(2).InstRecursive();
IR::Inst* body2 = inst.Arg(2).InstRecursive(); IR::Inst* body2 = inst.Arg(3).InstRecursive();
IR::Inst* body3 = inst.Arg(3).InstRecursive(); IR::Inst* body3 = inst.Arg(4).InstRecursive();
IR::F32 body4 = IR::F32{inst.Arg(4)}; IR::F32 body4 = IR::F32{inst.Arg(5)};
const auto get_addr_reg = [&](u32 index) -> IR::F32 { const auto get_addr_reg = [&](u32 index) -> IR::F32 {
if (index <= 3) { if (index <= 3) {
return IR::F32{body1->Arg(index)}; return IR::F32{body1->Arg(index)};
@ -942,14 +1011,13 @@ void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) {
return; return;
} }
const auto handle = inst.Arg(0); const auto image_handle = inst.Arg(0);
const auto image_res = info.images[handle.U32() & 0xFFFF]; const auto& image_res = info.images[image_handle.U32() & 0xFFFF];
auto image = image_res.GetSharp(info); auto image = image_res.GetSharp(info);
// Sample instructions must be handled separately using address register data. // Sample instructions must be handled separately using address register data.
if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) { if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) {
PatchImageSampleArgs(block, inst, info, image_res, image); return PatchImageSampleArgs(block, inst, info, image_res, image);
return;
} }
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
@ -963,17 +1031,13 @@ void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) {
case AmdGpu::ImageType::Color1D: // x, [lod] case AmdGpu::ImageType::Color1D: // x, [lod]
return {body->Arg(0), body->Arg(1)}; return {body->Arg(0), body->Arg(1)};
case AmdGpu::ImageType::Color1DArray: // x, slice, [lod] case AmdGpu::ImageType::Color1DArray: // x, slice, [lod]
[[fallthrough]]; case AmdGpu::ImageType::Color2D: // x, y, [lod]
case AmdGpu::ImageType::Color2D: // x, y, [lod] case AmdGpu::ImageType::Color2DMsaa: // x, y. (sample is passed on different argument)
[[fallthrough]];
case AmdGpu::ImageType::Color2DMsaa: // x, y. (sample is passed on different argument)
return {ir.CompositeConstruct(body->Arg(0), body->Arg(1)), body->Arg(2)}; return {ir.CompositeConstruct(body->Arg(0), body->Arg(1)), body->Arg(2)};
case AmdGpu::ImageType::Color2DArray: // x, y, slice, [lod] case AmdGpu::ImageType::Color2DArray: // x, y, slice, [lod]
[[fallthrough]];
case AmdGpu::ImageType::Color2DMsaaArray: // x, y, slice. (sample is passed on different case AmdGpu::ImageType::Color2DMsaaArray: // x, y, slice. (sample is passed on different
// argument) // argument)
[[fallthrough]]; case AmdGpu::ImageType::Color3D: // x, y, z, [lod]
case AmdGpu::ImageType::Color3D: // x, y, z, [lod]
return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)}; return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)};
default: default:
UNREACHABLE_MSG("Unknown image type {}", view_type); UNREACHABLE_MSG("Unknown image type {}", view_type);
@ -988,7 +1052,7 @@ void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) {
const auto is_storage = image_res.is_written; const auto is_storage = image_res.is_written;
if (inst.GetOpcode() == IR::Opcode::ImageRead) { if (inst.GetOpcode() == IR::Opcode::ImageRead) {
auto texel = ir.ImageRead(handle, coords, lod, ms, inst_info); auto texel = ir.ImageRead(image_handle, coords, lod, ms, inst_info);
if (is_storage) { if (is_storage) {
// Storage image requires shader swizzle. // Storage image requires shader swizzle.
texel = ApplySwizzle(ir, texel, image.DstSelect()); texel = ApplySwizzle(ir, texel, image.DstSelect());

View File

@ -45,6 +45,7 @@ union TextureInstInfo {
BitField<10, 1, u32> is_unnormalized; BitField<10, 1, u32> is_unnormalized;
BitField<11, 1, u32> is_gather; BitField<11, 1, u32> is_gather;
BitField<12, 1, u32> is_r128; BitField<12, 1, u32> is_r128;
BitField<16, 16, u32> pc;
}; };
union BufferInstInfo { union BufferInstInfo {

View File

@ -486,6 +486,10 @@ struct Sampler {
return raw0 != 0 || raw1 != 0; return raw0 != 0 || raw1 != 0;
} }
bool Valid() const {
return true;
}
bool operator==(const Sampler& other) const noexcept { bool operator==(const Sampler& other) const noexcept {
return std::memcmp(this, &other, sizeof(Sampler)) == 0; return std::memcmp(this, &other, sizeof(Sampler)) == 0;
} }