shader_recompiler: Use push constants for user data regs

This commit is contained in:
IndecisiveTurtle 2024-09-20 00:42:56 +03:00
parent fb5bc371cb
commit 58d92188c4
18 changed files with 135 additions and 79 deletions

View File

@ -474,6 +474,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/params.h src/shader_recompiler/params.h
src/shader_recompiler/runtime_info.h src/shader_recompiler/runtime_info.h
src/shader_recompiler/specialization.h src/shader_recompiler/specialization.h
src/shader_recompiler/backend/bindings.h
src/shader_recompiler/backend/spirv/emit_spirv.cpp src/shader_recompiler/backend/spirv/emit_spirv.cpp
src/shader_recompiler/backend/spirv/emit_spirv.h src/shader_recompiler/backend/spirv/emit_spirv.h
src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp

View File

@ -1069,7 +1069,16 @@ ScePthread PThreadPool::Create(const char* name) {
} }
} }
#ifdef _WIN64
auto* ret = new PthreadInternal{}; auto* ret = new PthreadInternal{};
#else
// TODO: Linux specific hack
static u8* hint_address = reinterpret_cast<u8*>(0x7FFFFC000ULL);
auto* ret = reinterpret_cast<PthreadInternal*>(
mmap(hint_address, sizeof(PthreadInternal), PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0));
hint_address += Common::AlignUp(sizeof(PthreadInternal), 4_KB);
#endif
ret->is_free = false; ret->is_free = false;
ret->is_detached = false; ret->is_detached = false;
ret->is_almost_done = false; ret->is_almost_done = false;

View File

@ -9,10 +9,10 @@ namespace Shader::Backend {
struct Bindings { struct Bindings {
u32 unified{}; u32 unified{};
u32 uniform_buffer{}; u32 buffer{};
u32 storage_buffer{}; u32 user_data{};
u32 texture{};
u32 image{}; auto operator<=>(const Bindings&) const = default;
}; };
} // namespace Shader::Backend } // namespace Shader::Backend

View File

@ -265,7 +265,7 @@ void PatchPhiNodes(const IR::Program& program, EmitContext& ctx) {
} // Anonymous namespace } // Anonymous namespace
std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info,
const IR::Program& program, u32& binding) { const IR::Program& program, Bindings& binding) {
EmitContext ctx{profile, runtime_info, program.info, binding}; EmitContext ctx{profile, runtime_info, program.info, binding};
const Id main{DefineMain(ctx, program)}; const Id main{DefineMain(ctx, program)};
DefineEntryPoint(program, ctx, main); DefineEntryPoint(program, ctx, main);

View File

@ -4,12 +4,13 @@
#pragma once #pragma once
#include <vector> #include <vector>
#include "shader_recompiler/backend/bindings.h"
#include "shader_recompiler/ir/program.h" #include "shader_recompiler/ir/program.h"
#include "shader_recompiler/profile.h" #include "shader_recompiler/profile.h"
namespace Shader::Backend::SPIRV { namespace Shader::Backend::SPIRV {
[[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, [[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info,
const IR::Program& program, u32& binding); const IR::Program& program, Bindings& binding);
} // namespace Shader::Backend::SPIRV } // namespace Shader::Backend::SPIRV

View File

@ -86,7 +86,13 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
} // Anonymous namespace } // Anonymous namespace
Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg) { Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg) {
return ctx.ConstU32(ctx.info.user_data[static_cast<size_t>(reg)]); const Id ud_ptr{
ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]),
ctx.push_data_block, ctx.ConstU32(PushData::UdRegsIndex),
ctx.ConstU32(ctx.binding.user_data + ctx.info.ud_mask.Index(reg)))};
const Id ud_reg{ctx.OpLoad(ctx.U32[1], ud_ptr)};
ctx.Name(ud_reg, fmt::format("ud_{}", u32(reg)));
return ud_reg;
} }
void EmitGetThreadBitScalarReg(EmitContext& ctx) { void EmitGetThreadBitScalarReg(EmitContext& ctx) {

View File

@ -42,7 +42,7 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar
} // Anonymous namespace } // Anonymous namespace
EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_, EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_,
const Info& info_, u32& binding_) const Info& info_, Bindings& binding_)
: Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_}, : Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_},
profile{profile_}, stage{info.stage}, binding{binding_} { profile{profile_}, stage{info.stage}, binding{binding_} {
AddCapability(spv::Capability::Shader); AddCapability(spv::Capability::Shader);
@ -173,7 +173,7 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
} }
void EmitContext::DefineBufferOffsets() { void EmitContext::DefineBufferOffsets() {
for (auto& buffer : buffers) { for (BufferDefinition& buffer : buffers) {
const u32 binding = buffer.binding; const u32 binding = buffer.binding;
const u32 half = PushData::BufOffsetIndex + (binding >> 4); const u32 half = PushData::BufOffsetIndex + (binding >> 4);
const u32 comp = (binding & 0xf) >> 2; const u32 comp = (binding & 0xf) >> 2;
@ -181,10 +181,11 @@ void EmitContext::DefineBufferOffsets() {
const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]), const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]),
push_data_block, ConstU32(half), ConstU32(comp))}; push_data_block, ConstU32(half), ConstU32(comp))};
const Id value{OpLoad(U32[1], ptr)}; const Id value{OpLoad(U32[1], ptr)};
Name(value, fmt::format("buf{}_off", binding));
buffer.offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U)); buffer.offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U));
buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U)); buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U));
} }
for (auto& tex_buffer : texture_buffers) { for (TextureBufferDefinition& tex_buffer : texture_buffers) {
const u32 binding = tex_buffer.binding; const u32 binding = tex_buffer.binding;
const u32 half = PushData::BufOffsetIndex + (binding >> 4); const u32 half = PushData::BufOffsetIndex + (binding >> 4);
const u32 comp = (binding & 0xf) >> 2; const u32 comp = (binding & 0xf) >> 2;
@ -192,6 +193,7 @@ void EmitContext::DefineBufferOffsets() {
const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]), const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]),
push_data_block, ConstU32(half), ConstU32(comp))}; push_data_block, ConstU32(half), ConstU32(comp))};
const Id value{OpLoad(U32[1], ptr)}; const Id value{OpLoad(U32[1], ptr)};
Name(value, fmt::format("texbuf{}_off", binding));
tex_buffer.coord_offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U)); tex_buffer.coord_offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U));
} }
} }
@ -330,13 +332,15 @@ void EmitContext::DefineOutputs() {
void EmitContext::DefinePushDataBlock() { void EmitContext::DefinePushDataBlock() {
// Create push constants block for instance steps rates // Create push constants block for instance steps rates
const Id struct_type{Name(TypeStruct(U32[1], U32[1], U32[4], U32[4], U32[4]), "AuxData")}; const Id ud_array{TypeArray(U32[1], ConstU32(PushData::MaxUdRegs))};
Decorate(ud_array, spv::Decoration::ArrayStride, 4U);
const Id struct_type{Name(TypeStruct(U32[1], U32[1], U32[4], U32[4], ud_array), "AuxData")};
Decorate(struct_type, spv::Decoration::Block); Decorate(struct_type, spv::Decoration::Block);
MemberName(struct_type, 0, "sr0"); MemberName(struct_type, 0, "sr0");
MemberName(struct_type, 1, "sr1"); MemberName(struct_type, 1, "sr1");
MemberName(struct_type, 2, "buf_offsets0"); MemberName(struct_type, 2, "buf_offsets0");
MemberName(struct_type, 3, "buf_offsets1"); MemberName(struct_type, 3, "buf_offsets1");
MemberName(struct_type, 4, "buf_offsets2"); MemberName(struct_type, 4, "ud_regs");
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U); MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U);
MemberDecorate(struct_type, 2, spv::Decoration::Offset, 8U); MemberDecorate(struct_type, 2, spv::Decoration::Offset, 8U);
@ -379,7 +383,7 @@ void EmitContext::DefineBuffers() {
const Id struct_pointer_type{TypePointer(storage_class, struct_type)}; const Id struct_pointer_type{TypePointer(storage_class, struct_type)};
const Id pointer_type = TypePointer(storage_class, data_type); const Id pointer_type = TypePointer(storage_class, data_type);
const Id id{AddGlobalVariable(struct_pointer_type, storage_class)}; const Id id{AddGlobalVariable(struct_pointer_type, storage_class)};
Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::Binding, binding.unified++);
Decorate(id, spv::Decoration::DescriptorSet, 0U); Decorate(id, spv::Decoration::DescriptorSet, 0U);
if (is_storage && !desc.is_written) { if (is_storage && !desc.is_written) {
Decorate(id, spv::Decoration::NonWritable); Decorate(id, spv::Decoration::NonWritable);
@ -388,7 +392,7 @@ void EmitContext::DefineBuffers() {
buffers.push_back({ buffers.push_back({
.id = id, .id = id,
.binding = binding++, .binding = binding.buffer++,
.data_types = data_types, .data_types = data_types,
.pointer_type = pointer_type, .pointer_type = pointer_type,
}); });
@ -406,12 +410,12 @@ void EmitContext::DefineTextureBuffers() {
sampled, spv::ImageFormat::Unknown)}; sampled, spv::ImageFormat::Unknown)};
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::Binding, binding.unified++);
Decorate(id, spv::Decoration::DescriptorSet, 0U); Decorate(id, spv::Decoration::DescriptorSet, 0U);
Name(id, fmt::format("{}_{}", desc.is_written ? "imgbuf" : "texbuf", desc.sgpr_base)); Name(id, fmt::format("{}_{}", desc.is_written ? "imgbuf" : "texbuf", desc.sgpr_base));
texture_buffers.push_back({ texture_buffers.push_back({
.id = id, .id = id,
.binding = binding++, .binding = binding.buffer++,
.image_type = image_type, .image_type = image_type,
.result_type = sampled_type[4], .result_type = sampled_type[4],
.is_integer = is_integer, .is_integer = is_integer,
@ -525,7 +529,7 @@ void EmitContext::DefineImagesAndSamplers() {
const Id image_type{ImageType(*this, image_desc, sampled_type)}; const Id image_type{ImageType(*this, image_desc, sampled_type)};
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::Binding, binding.unified++);
Decorate(id, spv::Decoration::DescriptorSet, 0U); Decorate(id, spv::Decoration::DescriptorSet, 0U);
Name(id, fmt::format("{}_{}{}_{:02x}", stage, "img", image_desc.sgpr_base, Name(id, fmt::format("{}_{}{}_{:02x}", stage, "img", image_desc.sgpr_base,
image_desc.dword_offset)); image_desc.dword_offset));
@ -538,7 +542,6 @@ void EmitContext::DefineImagesAndSamplers() {
.is_storage = image_desc.is_storage, .is_storage = image_desc.is_storage,
}); });
interfaces.push_back(id); interfaces.push_back(id);
++binding;
} }
if (std::ranges::any_of(info.images, &ImageResource::is_atomic)) { if (std::ranges::any_of(info.images, &ImageResource::is_atomic)) {
image_u32 = TypePointer(spv::StorageClass::Image, U32[1]); image_u32 = TypePointer(spv::StorageClass::Image, U32[1]);
@ -550,13 +553,12 @@ void EmitContext::DefineImagesAndSamplers() {
sampler_pointer_type = TypePointer(spv::StorageClass::UniformConstant, sampler_type); sampler_pointer_type = TypePointer(spv::StorageClass::UniformConstant, sampler_type);
for (const auto& samp_desc : info.samplers) { for (const auto& samp_desc : info.samplers) {
const Id id{AddGlobalVariable(sampler_pointer_type, spv::StorageClass::UniformConstant)}; const Id id{AddGlobalVariable(sampler_pointer_type, spv::StorageClass::UniformConstant)};
Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::Binding, binding.unified++);
Decorate(id, spv::Decoration::DescriptorSet, 0U); Decorate(id, spv::Decoration::DescriptorSet, 0U);
Name(id, fmt::format("{}_{}{}_{:02x}", stage, "samp", samp_desc.sgpr_base, Name(id, fmt::format("{}_{}{}_{:02x}", stage, "samp", samp_desc.sgpr_base,
samp_desc.dword_offset)); samp_desc.dword_offset));
samplers.push_back(id); samplers.push_back(id);
interfaces.push_back(id); interfaces.push_back(id);
++binding;
} }
} }

View File

@ -6,6 +6,7 @@
#include <array> #include <array>
#include <sirit/sirit.h> #include <sirit/sirit.h>
#include "shader_recompiler/backend/bindings.h"
#include "shader_recompiler/info.h" #include "shader_recompiler/info.h"
#include "shader_recompiler/ir/program.h" #include "shader_recompiler/ir/program.h"
#include "shader_recompiler/profile.h" #include "shader_recompiler/profile.h"
@ -37,7 +38,7 @@ struct VectorIds {
class EmitContext final : public Sirit::Module { class EmitContext final : public Sirit::Module {
public: public:
explicit EmitContext(const Profile& profile, const RuntimeInfo& runtime_info, const Info& info, explicit EmitContext(const Profile& profile, const RuntimeInfo& runtime_info, const Info& info,
u32& binding); Bindings& binding);
~EmitContext(); ~EmitContext();
Id Def(const IR::Value& value); Id Def(const IR::Value& value);
@ -221,7 +222,7 @@ public:
bool is_storage = false; bool is_storage = false;
}; };
u32& binding; Bindings& binding;
boost::container::small_vector<BufferDefinition, 16> buffers; boost::container::small_vector<BufferDefinition, 16> buffers;
boost::container::small_vector<TextureBufferDefinition, 8> texture_buffers; boost::container::small_vector<TextureBufferDefinition, 8> texture_buffers;
boost::container::small_vector<TextureDefinition, 8> images; boost::container::small_vector<TextureDefinition, 8> images;

View File

@ -23,7 +23,6 @@ struct Compare {
static IR::Condition MakeCondition(const GcnInst& inst) { static IR::Condition MakeCondition(const GcnInst& inst) {
if (inst.IsCmpx()) { if (inst.IsCmpx()) {
ASSERT(inst.opcode == Opcode::V_CMPX_NE_U32);
return IR::Condition::Execnz; return IR::Condition::Execnz;
} }
@ -99,7 +98,7 @@ void CFG::EmitDivergenceLabels() {
// with SAVEEXEC to mask the threads that didn't pass the condition // with SAVEEXEC to mask the threads that didn't pass the condition
// of initial branch. // of initial branch.
(inst.opcode == Opcode::S_ANDN2_B64 && inst.dst[0].field == OperandField::ExecLo) || (inst.opcode == Opcode::S_ANDN2_B64 && inst.dst[0].field == OperandField::ExecLo) ||
inst.opcode == Opcode::V_CMPX_NE_U32; inst.IsCmpx();
}; };
const auto is_close_scope = [](const GcnInst& inst) { const auto is_close_scope = [](const GcnInst& inst) {
// Closing an EXEC scope can be either a branch instruction // Closing an EXEC scope can be either a branch instruction
@ -109,7 +108,7 @@ void CFG::EmitDivergenceLabels() {
// Sometimes compiler might insert instructions between the SAVEEXEC and the branch. // Sometimes compiler might insert instructions between the SAVEEXEC and the branch.
// Those instructions need to be wrapped in the condition as well so allow branch // Those instructions need to be wrapped in the condition as well so allow branch
// as end scope instruction. // as end scope instruction.
inst.opcode == Opcode::S_CBRANCH_EXECZ || inst.opcode == Opcode::S_CBRANCH_EXECZ || inst.opcode == Opcode::S_ENDPGM ||
(inst.opcode == Opcode::S_ANDN2_B64 && inst.dst[0].field == OperandField::ExecLo); (inst.opcode == Opcode::S_ANDN2_B64 && inst.dst[0].field == OperandField::ExecLo);
}; };
@ -127,7 +126,8 @@ void CFG::EmitDivergenceLabels() {
s32 curr_begin = -1; s32 curr_begin = -1;
for (size_t index = GetIndex(start); index < end_index; index++) { for (size_t index = GetIndex(start); index < end_index; index++) {
const auto& inst = inst_list[index]; const auto& inst = inst_list[index];
if (is_close_scope(inst) && curr_begin != -1) { const bool is_close = is_close_scope(inst);
if ((is_close || index == end_index - 1) && curr_begin != -1) {
// If there are no instructions inside scope don't do anything. // If there are no instructions inside scope don't do anything.
if (index - curr_begin == 1) { if (index - curr_begin == 1) {
curr_begin = -1; curr_begin = -1;
@ -139,7 +139,9 @@ void CFG::EmitDivergenceLabels() {
const Label label = index_to_pc[curr_begin] + save_inst.length; const Label label = index_to_pc[curr_begin] + save_inst.length;
AddLabel(label); AddLabel(label);
// Add a label to the close scope instruction as well. // Add a label to the close scope instruction as well.
AddLabel(index_to_pc[index]); if (is_close) {
AddLabel(index_to_pc[index]);
}
// Reset scope begin. // Reset scope begin.
curr_begin = -1; curr_begin = -1;
} }
@ -194,7 +196,7 @@ void CFG::LinkBlocks() {
const auto end_inst{block.end_inst}; const auto end_inst{block.end_inst};
// Handle divergence block inserted here. // Handle divergence block inserted here.
if (end_inst.opcode == Opcode::S_AND_SAVEEXEC_B64 || if (end_inst.opcode == Opcode::S_AND_SAVEEXEC_B64 ||
end_inst.opcode == Opcode::S_ANDN2_B64 || end_inst.opcode == Opcode::V_CMPX_NE_U32) { end_inst.opcode == Opcode::S_ANDN2_B64 || end_inst.IsCmpx()) {
// Blocks are stored ordered by address in the set // Blocks are stored ordered by address in the set
auto next_it = std::next(it); auto next_it = std::next(it);
auto* target_block = &(*next_it); auto* target_block = &(*next_it);

View File

@ -7,6 +7,7 @@
#include <boost/container/static_vector.hpp> #include <boost/container/static_vector.hpp>
#include "common/assert.h" #include "common/assert.h"
#include "common/types.h" #include "common/types.h"
#include "shader_recompiler/backend/bindings.h"
#include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/ir/reg.h" #include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/ir/type.h" #include "shader_recompiler/ir/type.h"
@ -85,11 +86,14 @@ struct SamplerResource {
using SamplerResourceList = boost::container::small_vector<SamplerResource, 16>; using SamplerResourceList = boost::container::small_vector<SamplerResource, 16>;
struct PushData { struct PushData {
static constexpr size_t BufOffsetIndex = 2; static constexpr u32 BufOffsetIndex = 2;
static constexpr u32 UdRegsIndex = 4;
static constexpr u32 MaxUdRegs = 16;
u32 step0; u32 step0;
u32 step1; u32 step1;
std::array<u8, 48> buf_offsets; std::array<u8, 32> buf_offsets;
std::array<u32, MaxUdRegs> ud_regs;
void AddOffset(u32 binding, u32 offset) { void AddOffset(u32 binding, u32 offset) {
ASSERT(offset < 256 && binding < buf_offsets.size()); ASSERT(offset < 256 && binding < buf_offsets.size());
@ -145,6 +149,24 @@ struct Info {
AttributeFlags loads{}; AttributeFlags loads{};
AttributeFlags stores{}; AttributeFlags stores{};
struct UserDataMask {
void Set(IR::ScalarReg reg) noexcept {
mask |= 1 << static_cast<u32>(reg);
}
u32 Index(IR::ScalarReg reg) const noexcept {
const u32 reg_mask = (1 << static_cast<u32>(reg)) - 1;
return std::popcount(mask & reg_mask);
}
u32 NumRegs() const noexcept {
return std::popcount(mask);
}
u32 mask;
};
UserDataMask ud_mask{};
s8 vertex_offset_sgpr = -1; s8 vertex_offset_sgpr = -1;
s8 instance_offset_sgpr = -1; s8 instance_offset_sgpr = -1;
@ -190,11 +212,22 @@ struct Info {
return data; return data;
} }
size_t NumBindings() const noexcept { void PushUd(Backend::Bindings& bnd, PushData& push) const {
return buffers.size() + texture_buffers.size() + images.size() + samplers.size(); u32 mask = ud_mask.mask;
while (mask) {
const u32 index = std::countr_zero(mask);
mask &= ~(1U << index);
push.ud_regs[bnd.user_data++] = user_data[index];
}
} }
[[nodiscard]] std::pair<u32, u32> GetDrawOffsets() const noexcept { void AddBindings(Backend::Bindings& bnd) const {
bnd.buffer += buffers.size() + texture_buffers.size();
bnd.unified += bnd.buffer + images.size() + samplers.size();
bnd.user_data += ud_mask.NumRegs();
}
[[nodiscard]] std::pair<u32, u32> GetDrawOffsets() const {
u32 vertex_offset = 0; u32 vertex_offset = 0;
u32 instance_offset = 0; u32 instance_offset = 0;
if (vertex_offset_sgpr != -1) { if (vertex_offset_sgpr != -1) {

View File

@ -8,14 +8,15 @@ namespace Shader::Optimization {
void Visit(Info& info, IR::Inst& inst) { void Visit(Info& info, IR::Inst& inst) {
switch (inst.GetOpcode()) { switch (inst.GetOpcode()) {
case IR::Opcode::GetAttribute: case IR::Opcode::GetAttribute:
case IR::Opcode::GetAttributeU32: { case IR::Opcode::GetAttributeU32:
info.loads.Set(inst.Arg(0).Attribute(), inst.Arg(1).U32()); info.loads.Set(inst.Arg(0).Attribute(), inst.Arg(1).U32());
break; break;
} case IR::Opcode::SetAttribute:
case IR::Opcode::SetAttribute: {
info.stores.Set(inst.Arg(0).Attribute(), inst.Arg(2).U32()); info.stores.Set(inst.Arg(0).Attribute(), inst.Arg(2).U32());
break; break;
} case IR::Opcode::GetUserData:
info.ud_mask.Set(inst.Arg(0).ScalarReg());
break;
case IR::Opcode::LoadSharedU32: case IR::Opcode::LoadSharedU32:
case IR::Opcode::LoadSharedU64: case IR::Opcode::LoadSharedU64:
case IR::Opcode::WriteSharedU32: case IR::Opcode::WriteSharedU32:

View File

@ -6,6 +6,7 @@
#include <bitset> #include <bitset>
#include "common/types.h" #include "common/types.h"
#include "shader_recompiler/backend/bindings.h"
#include "shader_recompiler/info.h" #include "shader_recompiler/info.h"
namespace Shader { namespace Shader {
@ -45,11 +46,11 @@ struct StageSpecialization {
boost::container::small_vector<BufferSpecialization, 16> buffers; boost::container::small_vector<BufferSpecialization, 16> buffers;
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers; boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
boost::container::small_vector<ImageSpecialization, 16> images; boost::container::small_vector<ImageSpecialization, 16> images;
u32 start_binding{}; Backend::Bindings start{};
explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_, explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_,
u32 start_binding_) Backend::Bindings start_)
: info{&info_}, runtime_info{runtime_info_}, start_binding{start_binding_} { : info{&info_}, runtime_info{runtime_info_}, start{start_} {
u32 binding{}; u32 binding{};
ForEachSharp(binding, buffers, info->buffers, ForEachSharp(binding, buffers, info->buffers,
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
@ -82,7 +83,7 @@ struct StageSpecialization {
} }
bool operator==(const StageSpecialization& other) const { bool operator==(const StageSpecialization& other) const {
if (start_binding != other.start_binding) { if (start != other.start) {
return false; return false;
} }
if (runtime_info != other.runtime_info) { if (runtime_info != other.runtime_info) {

View File

@ -112,10 +112,11 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes; boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
boost::container::small_vector<vk::BufferMemoryBarrier2, 16> buffer_barriers; boost::container::small_vector<vk::BufferMemoryBarrier2, 16> buffer_barriers;
Shader::PushData push_data{}; Shader::PushData push_data{};
u32 binding{}; Shader::Backend::Bindings binding{};
image_infos.clear(); image_infos.clear();
info->PushUd(binding, push_data);
for (const auto& desc : info->buffers) { for (const auto& desc : info->buffers) {
bool is_storage = true; bool is_storage = true;
if (desc.is_gds_buffer) { if (desc.is_gds_buffer) {
@ -147,21 +148,20 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
buffer_cache.ObtainBuffer(address, size, desc.is_written); buffer_cache.ObtainBuffer(address, size, desc.is_written);
const u32 offset_aligned = Common::AlignDown(offset, alignment); const u32 offset_aligned = Common::AlignDown(offset, alignment);
const u32 adjust = offset - offset_aligned; const u32 adjust = offset - offset_aligned;
if (adjust != 0) { ASSERT(adjust % 4 == 0);
ASSERT(adjust % 4 == 0); push_data.AddOffset(binding.buffer, adjust);
push_data.AddOffset(binding, adjust);
}
buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust); buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust);
} }
set_writes.push_back({ set_writes.push_back({
.dstSet = VK_NULL_HANDLE, .dstSet = VK_NULL_HANDLE,
.dstBinding = binding++, .dstBinding = binding.unified++,
.dstArrayElement = 0, .dstArrayElement = 0,
.descriptorCount = 1, .descriptorCount = 1,
.descriptorType = is_storage ? vk::DescriptorType::eStorageBuffer .descriptorType = is_storage ? vk::DescriptorType::eStorageBuffer
: vk::DescriptorType::eUniformBuffer, : vk::DescriptorType::eUniformBuffer,
.pBufferInfo = &buffer_infos.back(), .pBufferInfo = &buffer_infos.back(),
}); });
++binding.buffer;
} }
for (const auto& desc : info->texture_buffers) { for (const auto& desc : info->texture_buffers) {
@ -188,10 +188,8 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
"Texel buffer stride must match format stride"); "Texel buffer stride must match format stride");
const u32 offset_aligned = Common::AlignDown(offset, alignment); const u32 offset_aligned = Common::AlignDown(offset, alignment);
const u32 adjust = offset - offset_aligned; const u32 adjust = offset - offset_aligned;
if (adjust != 0) { ASSERT(adjust % fmt_stride == 0);
ASSERT(adjust % fmt_stride == 0); push_data.AddOffset(binding.buffer, adjust / fmt_stride);
push_data.AddOffset(binding, adjust / fmt_stride);
}
buffer_view = vk_buffer->View(offset_aligned, size + adjust, desc.is_written, buffer_view = vk_buffer->View(offset_aligned, size + adjust, desc.is_written,
vsharp.GetDataFmt(), vsharp.GetNumberFmt()); vsharp.GetDataFmt(), vsharp.GetNumberFmt());
if (auto barrier = if (auto barrier =
@ -206,13 +204,14 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
} }
set_writes.push_back({ set_writes.push_back({
.dstSet = VK_NULL_HANDLE, .dstSet = VK_NULL_HANDLE,
.dstBinding = binding++, .dstBinding = binding.unified++,
.dstArrayElement = 0, .dstArrayElement = 0,
.descriptorCount = 1, .descriptorCount = 1,
.descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer .descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer
: vk::DescriptorType::eUniformTexelBuffer, : vk::DescriptorType::eUniformTexelBuffer,
.pTexelBufferView = &buffer_view, .pTexelBufferView = &buffer_view,
}); });
++binding.buffer;
} }
BindTextures(texture_cache, *info, binding, set_writes); BindTextures(texture_cache, *info, binding, set_writes);
@ -226,7 +225,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
set_writes.push_back({ set_writes.push_back({
.dstSet = VK_NULL_HANDLE, .dstSet = VK_NULL_HANDLE,
.dstBinding = binding++, .dstBinding = binding.unified++,
.dstArrayElement = 0, .dstArrayElement = 0,
.descriptorCount = 1, .descriptorCount = 1,
.descriptorType = vk::DescriptorType::eSampler, .descriptorType = vk::DescriptorType::eSampler,

View File

@ -356,7 +356,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes; boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
boost::container::small_vector<vk::BufferMemoryBarrier2, 16> buffer_barriers; boost::container::small_vector<vk::BufferMemoryBarrier2, 16> buffer_barriers;
Shader::PushData push_data{}; Shader::PushData push_data{};
u32 binding{}; Shader::Backend::Bindings binding{};
image_infos.clear(); image_infos.clear();
@ -368,6 +368,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
push_data.step0 = regs.vgt_instance_step_rate_0; push_data.step0 = regs.vgt_instance_step_rate_0;
push_data.step1 = regs.vgt_instance_step_rate_1; push_data.step1 = regs.vgt_instance_step_rate_1;
} }
stage->PushUd(binding, push_data);
for (const auto& buffer : stage->buffers) { for (const auto& buffer : stage->buffers) {
const auto vsharp = buffer.GetSharp(*stage); const auto vsharp = buffer.GetSharp(*stage);
const bool is_storage = buffer.IsStorage(vsharp); const bool is_storage = buffer.IsStorage(vsharp);
@ -383,10 +384,8 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
buffer_cache.ObtainBuffer(address, size, buffer.is_written); buffer_cache.ObtainBuffer(address, size, buffer.is_written);
const u32 offset_aligned = Common::AlignDown(offset, alignment); const u32 offset_aligned = Common::AlignDown(offset, alignment);
const u32 adjust = offset - offset_aligned; const u32 adjust = offset - offset_aligned;
if (adjust != 0) { ASSERT(adjust % 4 == 0);
ASSERT(adjust % 4 == 0); push_data.AddOffset(binding.buffer, adjust);
push_data.AddOffset(binding, adjust);
}
buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust); buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust);
} else if (instance.IsNullDescriptorSupported()) { } else if (instance.IsNullDescriptorSupported()) {
buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE); buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE);
@ -396,13 +395,14 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
} }
set_writes.push_back({ set_writes.push_back({
.dstSet = VK_NULL_HANDLE, .dstSet = VK_NULL_HANDLE,
.dstBinding = binding++, .dstBinding = binding.unified++,
.dstArrayElement = 0, .dstArrayElement = 0,
.descriptorCount = 1, .descriptorCount = 1,
.descriptorType = is_storage ? vk::DescriptorType::eStorageBuffer .descriptorType = is_storage ? vk::DescriptorType::eStorageBuffer
: vk::DescriptorType::eUniformBuffer, : vk::DescriptorType::eUniformBuffer,
.pBufferInfo = &buffer_infos.back(), .pBufferInfo = &buffer_infos.back(),
}); });
++binding.buffer;
} }
for (const auto& desc : stage->texture_buffers) { for (const auto& desc : stage->texture_buffers) {
@ -419,10 +419,8 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
"Texel buffer stride must match format stride"); "Texel buffer stride must match format stride");
const u32 offset_aligned = Common::AlignDown(offset, alignment); const u32 offset_aligned = Common::AlignDown(offset, alignment);
const u32 adjust = offset - offset_aligned; const u32 adjust = offset - offset_aligned;
if (adjust != 0) { ASSERT(adjust % fmt_stride == 0);
ASSERT(adjust % fmt_stride == 0); push_data.AddOffset(binding.buffer, adjust / fmt_stride);
push_data.AddOffset(binding, adjust / fmt_stride);
}
buffer_view = vk_buffer->View(offset_aligned, size + adjust, desc.is_written, buffer_view = vk_buffer->View(offset_aligned, size + adjust, desc.is_written,
vsharp.GetDataFmt(), vsharp.GetNumberFmt()); vsharp.GetDataFmt(), vsharp.GetNumberFmt());
const auto dst_access = desc.is_written ? vk::AccessFlagBits2::eShaderWrite const auto dst_access = desc.is_written ? vk::AccessFlagBits2::eShaderWrite
@ -437,13 +435,14 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
} }
set_writes.push_back({ set_writes.push_back({
.dstSet = VK_NULL_HANDLE, .dstSet = VK_NULL_HANDLE,
.dstBinding = binding++, .dstBinding = binding.unified++,
.dstArrayElement = 0, .dstArrayElement = 0,
.descriptorCount = 1, .descriptorCount = 1,
.descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer .descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer
: vk::DescriptorType::eUniformTexelBuffer, : vk::DescriptorType::eUniformTexelBuffer,
.pTexelBufferView = &buffer_view, .pTexelBufferView = &buffer_view,
}); });
++binding.buffer;
} }
BindTextures(texture_cache, *stage, binding, set_writes); BindTextures(texture_cache, *stage, binding, set_writes);
@ -463,7 +462,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
set_writes.push_back({ set_writes.push_back({
.dstSet = VK_NULL_HANDLE, .dstSet = VK_NULL_HANDLE,
.dstBinding = binding++, .dstBinding = binding.unified++,
.dstArrayElement = 0, .dstArrayElement = 0,
.descriptorCount = 1, .descriptorCount = 1,
.descriptorType = vk::DescriptorType::eSampler, .descriptorType = vk::DescriptorType::eSampler,

View File

@ -264,7 +264,7 @@ bool PipelineCache::RefreshGraphicsKey() {
++remapped_cb; ++remapped_cb;
} }
u32 binding{}; Shader::Backend::Bindings binding{};
for (u32 i = 0; i < MaxShaderStages; i++) { for (u32 i = 0; i < MaxShaderStages; i++) {
if (!regs.stage_enable.IsStageEnabled(i)) { if (!regs.stage_enable.IsStageEnabled(i)) {
key.stage_hashes[i] = 0; key.stage_hashes[i] = 0;
@ -332,7 +332,7 @@ bool PipelineCache::RefreshGraphicsKey() {
} }
bool PipelineCache::RefreshComputeKey() { bool PipelineCache::RefreshComputeKey() {
u32 binding{}; Shader::Backend::Bindings binding{};
const auto* cs_pgm = &liverpool->regs.cs_program; const auto* cs_pgm = &liverpool->regs.cs_program;
const auto cs_params = Liverpool::GetParams(*cs_pgm); const auto cs_params = Liverpool::GetParams(*cs_pgm);
if (ShouldSkipShader(cs_params.hash, "compute")) { if (ShouldSkipShader(cs_params.hash, "compute")) {
@ -346,7 +346,7 @@ bool PipelineCache::RefreshComputeKey() {
vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info,
const Shader::RuntimeInfo& runtime_info, const Shader::RuntimeInfo& runtime_info,
std::span<const u32> code, size_t perm_idx, std::span<const u32> code, size_t perm_idx,
u32& binding) { Shader::Backend::Bindings& binding) {
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash, LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash,
perm_idx != 0 ? "(permutation)" : ""); perm_idx != 0 ? "(permutation)" : "");
if (Config::dumpShaders()) { if (Config::dumpShaders()) {
@ -366,14 +366,14 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info,
} }
std::tuple<const Shader::Info*, vk::ShaderModule, u64> PipelineCache::GetProgram( std::tuple<const Shader::Info*, vk::ShaderModule, u64> PipelineCache::GetProgram(
Shader::Stage stage, Shader::ShaderParams params, u32& binding) { Shader::Stage stage, Shader::ShaderParams params, Shader::Backend::Bindings& binding) {
const auto runtime_info = BuildRuntimeInfo(stage); const auto runtime_info = BuildRuntimeInfo(stage);
auto [it_pgm, new_program] = program_cache.try_emplace(params.hash); auto [it_pgm, new_program] = program_cache.try_emplace(params.hash);
if (new_program) { if (new_program) {
Program* program = program_pool.Create(stage, params); Program* program = program_pool.Create(stage, params);
u32 start_binding = binding; auto start = binding;
const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding); const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding);
const auto spec = Shader::StageSpecialization(program->info, runtime_info, start_binding); const auto spec = Shader::StageSpecialization(program->info, runtime_info, start);
program->AddPermut(module, std::move(spec)); program->AddPermut(module, std::move(spec));
it_pgm.value() = program; it_pgm.value() = program;
return std::make_tuple(&program->info, module, HashCombine(params.hash, 0)); return std::make_tuple(&program->info, module, HashCombine(params.hash, 0));
@ -391,7 +391,7 @@ std::tuple<const Shader::Info*, vk::ShaderModule, u64> PipelineCache::GetProgram
module = CompileModule(new_info, runtime_info, params.code, perm_idx, binding); module = CompileModule(new_info, runtime_info, params.code, perm_idx, binding);
program->AddPermut(module, std::move(spec)); program->AddPermut(module, std::move(spec));
} else { } else {
binding += info.NumBindings(); info.AddBindings(binding);
module = it->module; module = it->module;
perm_idx = std::distance(program->modules.begin(), it); perm_idx = std::distance(program->modules.begin(), it);
} }

View File

@ -49,9 +49,8 @@ public:
const ComputePipeline* GetComputePipeline(); const ComputePipeline* GetComputePipeline();
std::tuple<const Shader::Info*, vk::ShaderModule, u64> GetProgram(Shader::Stage stage, std::tuple<const Shader::Info*, vk::ShaderModule, u64> GetProgram(
Shader::ShaderParams params, Shader::Stage stage, Shader::ShaderParams params, Shader::Backend::Bindings& binding);
u32& binding);
private: private:
bool RefreshGraphicsKey(); bool RefreshGraphicsKey();
@ -60,7 +59,8 @@ private:
void DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage, size_t perm_idx, void DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage, size_t perm_idx,
std::string_view ext); std::string_view ext);
vk::ShaderModule CompileModule(Shader::Info& info, const Shader::RuntimeInfo& runtime_info, vk::ShaderModule CompileModule(Shader::Info& info, const Shader::RuntimeInfo& runtime_info,
std::span<const u32> code, size_t perm_idx, u32& binding); std::span<const u32> code, size_t perm_idx,
Shader::Backend::Bindings& binding);
Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage); Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage);
private: private:

View File

@ -20,7 +20,7 @@ Pipeline::Pipeline(const Instance& instance_, Scheduler& scheduler_, DescriptorH
Pipeline::~Pipeline() = default; Pipeline::~Pipeline() = default;
void Pipeline::BindTextures(VideoCore::TextureCache& texture_cache, const Shader::Info& stage, void Pipeline::BindTextures(VideoCore::TextureCache& texture_cache, const Shader::Info& stage,
u32& binding, DescriptorWrites& set_writes) const { Shader::Backend::Bindings& binding, DescriptorWrites& set_writes) const {
using ImageBindingInfo = std::tuple<VideoCore::ImageId, AmdGpu::Image, Shader::ImageResource>; using ImageBindingInfo = std::tuple<VideoCore::ImageId, AmdGpu::Image, Shader::ImageResource>;
boost::container::static_vector<ImageBindingInfo, 32> image_bindings; boost::container::static_vector<ImageBindingInfo, 32> image_bindings;
@ -67,7 +67,7 @@ void Pipeline::BindTextures(VideoCore::TextureCache& texture_cache, const Shader
set_writes.push_back({ set_writes.push_back({
.dstSet = VK_NULL_HANDLE, .dstSet = VK_NULL_HANDLE,
.dstBinding = binding++, .dstBinding = binding.unified++,
.dstArrayElement = 0, .dstArrayElement = 0,
.descriptorCount = 1, .descriptorCount = 1,
.descriptorType = desc.is_storage ? vk::DescriptorType::eStorageImage .descriptorType = desc.is_storage ? vk::DescriptorType::eStorageImage

View File

@ -4,6 +4,7 @@
#pragma once #pragma once
#include "shader_recompiler/info.h" #include "shader_recompiler/info.h"
#include "shader_recompiler/backend/bindings.h"
#include "video_core/renderer_vulkan/vk_common.h" #include "video_core/renderer_vulkan/vk_common.h"
namespace VideoCore { namespace VideoCore {
@ -33,7 +34,7 @@ public:
using DescriptorWrites = boost::container::small_vector<vk::WriteDescriptorSet, 16>; using DescriptorWrites = boost::container::small_vector<vk::WriteDescriptorSet, 16>;
void BindTextures(VideoCore::TextureCache& texture_cache, const Shader::Info& stage, void BindTextures(VideoCore::TextureCache& texture_cache, const Shader::Info& stage,
u32& binding, DescriptorWrites& set_writes) const; Shader::Backend::Bindings& binding, DescriptorWrites& set_writes) const;
protected: protected:
const Instance& instance; const Instance& instance;