shader_recompiler: Use SRT flatbuf for bounds check size.

This commit is contained in:
squidbus 2025-02-15 07:44:15 -08:00
parent c35bcad851
commit 7c3c0183e9
6 changed files with 61 additions and 49 deletions

View File

@ -178,7 +178,14 @@ Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords); index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords);
const auto [id, pointer_type] = buffer[BufferAlias::U32]; const auto [id, pointer_type] = buffer[BufferAlias::U32];
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
return ctx.OpLoad(ctx.U32[1], ptr); const Id result{ctx.OpLoad(ctx.U32[1], ptr)};
if (Sirit::ValidId(buffer.size_dwords)) {
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, buffer.size_dwords);
return ctx.OpSelect(ctx.U32[1], in_bounds, result, ctx.u32_zero_value);
} else {
return result;
}
} }
Id EmitReadStepRate(EmitContext& ctx, int rate_idx) { Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {

View File

@ -192,32 +192,49 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
UNREACHABLE_MSG("Invalid attribute type {}", fmt); UNREACHABLE_MSG("Invalid attribute type {}", fmt);
} }
Id EmitContext::GetBufferSize(const u32 sharp_idx) {
const auto& srt_flatbuf = buffers.back();
ASSERT(srt_flatbuf.buffer_type == BufferType::ReadConstUbo);
const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32];
const auto rsrc1{
OpLoad(U32[1], OpAccessChain(pointer_type, id, u32_zero_value, ConstU32(sharp_idx + 1)))};
const auto rsrc2{
OpLoad(U32[1], OpAccessChain(pointer_type, id, u32_zero_value, ConstU32(sharp_idx + 2)))};
const auto stride{OpBitFieldUExtract(U32[1], rsrc1, ConstU32(16u), ConstU32(14u))};
const auto num_records{rsrc2};
const auto stride_zero{OpIEqual(U1[1], stride, u32_zero_value)};
const auto stride_size{OpIMul(U32[1], num_records, stride)};
return OpSelect(U32[1], stride_zero, num_records, stride_size);
}
void EmitContext::DefineBufferProperties() { void EmitContext::DefineBufferProperties() {
for (BufferDefinition& buffer : buffers) { for (BufferDefinition& buffer : buffers) {
if (buffer.buffer_type != BufferType::Guest) { if (buffer.buffer_type != BufferType::Guest) {
continue; continue;
} }
const u32 binding = buffer.binding; const u32 binding = buffer.binding;
const u32 offset_half = PushData::BufOffsetIndex + (binding >> 4); const u32 half = PushData::BufOffsetIndex + (binding >> 4);
const u32 offset_comp = (binding & 0xf) >> 2; const u32 comp = (binding & 0xf) >> 2;
const u32 offset_bit = (binding & 0x3) << 3; const u32 offset = (binding & 0x3) << 3;
const Id offset_ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]), const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]),
push_data_block, ConstU32(offset_half), push_data_block, ConstU32(half), ConstU32(comp))};
ConstU32(offset_comp))}; const Id value{OpLoad(U32[1], ptr)};
const Id offset_value{OpLoad(U32[1], offset_ptr)}; buffer.offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U));
buffer.offset =
OpBitFieldUExtract(U32[1], offset_value, ConstU32(offset_bit), ConstU32(8U));
Name(buffer.offset, fmt::format("buf{}_off", binding)); Name(buffer.offset, fmt::format("buf{}_off", binding));
buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U)); buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U));
Name(buffer.offset_dwords, fmt::format("buf{}_dword_off", binding)); Name(buffer.offset_dwords, fmt::format("buf{}_dword_off", binding));
if (!profile.supports_robust_buffer_access) { // Only need to load size if performing bounds checks and the buffer is both guest and not
const u32 size_field = PushData::BufSizesIndex + (binding >> 2); // inline.
const u32 size_comp = binding & 0x3; if (!profile.supports_robust_buffer_access && buffer.buffer_type == BufferType::Guest) {
const Id size_ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]), if (buffer.desc.sharp_idx == std::numeric_limits<u32>::max()) {
push_data_block, ConstU32(size_field), buffer.size = ConstU32(buffer.desc.inline_cbuf.GetSize());
ConstU32(size_comp))}; } else {
buffer.size = OpLoad(U32[1], size_ptr); buffer.size = GetBufferSize(buffer.desc.sharp_idx);
}
Name(buffer.size, fmt::format("buf{}_size", binding)); Name(buffer.size, fmt::format("buf{}_size", binding));
buffer.size_shorts = OpShiftRightLogical(U32[1], buffer.size, ConstU32(1U)); buffer.size_shorts = OpShiftRightLogical(U32[1], buffer.size, ConstU32(1U));
Name(buffer.size_shorts, fmt::format("buf{}_short_size", binding)); Name(buffer.size_shorts, fmt::format("buf{}_short_size", binding));
@ -606,8 +623,7 @@ void EmitContext::DefineOutputs() {
void EmitContext::DefinePushDataBlock() { void EmitContext::DefinePushDataBlock() {
// Create push constants block for instance steps rates // Create push constants block for instance steps rates
const Id struct_type{Name(TypeStruct(U32[1], U32[1], F32[1], F32[1], F32[1], F32[1], U32[4], const Id struct_type{Name(TypeStruct(U32[1], U32[1], F32[1], F32[1], F32[1], F32[1], U32[4],
U32[4], U32[4], U32[4], U32[4], U32[4], U32[4], U32[4], U32[4], U32[4], U32[4], U32[4], U32[4]),
U32[4], U32[4], U32[4], U32[4], U32[4], U32[4]),
"AuxData")}; "AuxData")};
Decorate(struct_type, spv::Decoration::Block); Decorate(struct_type, spv::Decoration::Block);
MemberName(struct_type, PushData::Step0Index, "sr0"); MemberName(struct_type, PushData::Step0Index, "sr0");
@ -622,14 +638,6 @@ void EmitContext::DefinePushDataBlock() {
MemberName(struct_type, PushData::UdRegsIndex + 3, "ud_regs3"); MemberName(struct_type, PushData::UdRegsIndex + 3, "ud_regs3");
MemberName(struct_type, PushData::BufOffsetIndex + 0, "buf_offsets0"); MemberName(struct_type, PushData::BufOffsetIndex + 0, "buf_offsets0");
MemberName(struct_type, PushData::BufOffsetIndex + 1, "buf_offsets1"); MemberName(struct_type, PushData::BufOffsetIndex + 1, "buf_offsets1");
MemberName(struct_type, PushData::BufSizesIndex + 0, "buf_sizes0");
MemberName(struct_type, PushData::BufSizesIndex + 1, "buf_sizes1");
MemberName(struct_type, PushData::BufSizesIndex + 2, "buf_sizes2");
MemberName(struct_type, PushData::BufSizesIndex + 3, "buf_sizes3");
MemberName(struct_type, PushData::BufSizesIndex + 4, "buf_sizes4");
MemberName(struct_type, PushData::BufSizesIndex + 5, "buf_sizes5");
MemberName(struct_type, PushData::BufSizesIndex + 6, "buf_sizes6");
MemberName(struct_type, PushData::BufSizesIndex + 7, "buf_sizes7");
MemberDecorate(struct_type, PushData::Step0Index, spv::Decoration::Offset, 0U); MemberDecorate(struct_type, PushData::Step0Index, spv::Decoration::Offset, 0U);
MemberDecorate(struct_type, PushData::Step1Index, spv::Decoration::Offset, 4U); MemberDecorate(struct_type, PushData::Step1Index, spv::Decoration::Offset, 4U);
MemberDecorate(struct_type, PushData::XOffsetIndex, spv::Decoration::Offset, 8U); MemberDecorate(struct_type, PushData::XOffsetIndex, spv::Decoration::Offset, 8U);
@ -642,14 +650,6 @@ void EmitContext::DefinePushDataBlock() {
MemberDecorate(struct_type, PushData::UdRegsIndex + 3, spv::Decoration::Offset, 72U); MemberDecorate(struct_type, PushData::UdRegsIndex + 3, spv::Decoration::Offset, 72U);
MemberDecorate(struct_type, PushData::BufOffsetIndex + 0, spv::Decoration::Offset, 88U); MemberDecorate(struct_type, PushData::BufOffsetIndex + 0, spv::Decoration::Offset, 88U);
MemberDecorate(struct_type, PushData::BufOffsetIndex + 1, spv::Decoration::Offset, 104U); MemberDecorate(struct_type, PushData::BufOffsetIndex + 1, spv::Decoration::Offset, 104U);
MemberDecorate(struct_type, PushData::BufSizesIndex + 0, spv::Decoration::Offset, 120U);
MemberDecorate(struct_type, PushData::BufSizesIndex + 1, spv::Decoration::Offset, 136U);
MemberDecorate(struct_type, PushData::BufSizesIndex + 2, spv::Decoration::Offset, 152U);
MemberDecorate(struct_type, PushData::BufSizesIndex + 3, spv::Decoration::Offset, 168U);
MemberDecorate(struct_type, PushData::BufSizesIndex + 4, spv::Decoration::Offset, 184U);
MemberDecorate(struct_type, PushData::BufSizesIndex + 5, spv::Decoration::Offset, 200U);
MemberDecorate(struct_type, PushData::BufSizesIndex + 6, spv::Decoration::Offset, 216U);
MemberDecorate(struct_type, PushData::BufSizesIndex + 7, spv::Decoration::Offset, 232U);
push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant); push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant);
Name(push_data_block, "push_data"); Name(push_data_block, "push_data");
interfaces.push_back(push_data_block); interfaces.push_back(push_data_block);
@ -694,18 +694,28 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte
break; break;
default: default:
Name(id, fmt::format("{}_{}", is_storage ? "ssbo" : "ubo", binding.buffer)); Name(id, fmt::format("{}_{}", is_storage ? "ssbo" : "ubo", binding.buffer));
break;
} }
interfaces.push_back(id); interfaces.push_back(id);
return {id, pointer_type}; return {id, pointer_type};
}; };
void EmitContext::DefineBuffers() { void EmitContext::DefineBuffers() {
if (!profile.supports_robust_buffer_access && !info.has_readconst) {
// In case ReadConstUbo has not already been bound by IR and is needed
// to query buffer sizes, bind it now.
info.buffers.push_back({
.used_types = IR::Type::U32,
.inline_cbuf = AmdGpu::Buffer::Null(),
.buffer_type = BufferType::ReadConstUbo,
});
}
for (const auto& desc : info.buffers) { for (const auto& desc : info.buffers) {
const auto buf_sharp = desc.GetSharp(info); const auto buf_sharp = desc.GetSharp(info);
const bool is_storage = desc.IsStorage(buf_sharp, profile); const bool is_storage = desc.IsStorage(buf_sharp, profile);
// Define aliases depending on the shader usage. // Define aliases depending on the shader usage.
auto& spv_buffer = buffers.emplace_back(binding.buffer++, desc.buffer_type); auto& spv_buffer = buffers.emplace_back(binding.buffer++, desc.buffer_type, desc);
if (True(desc.used_types & IR::Type::U32)) { if (True(desc.used_types & IR::Type::U32)) {
spv_buffer[BufferAlias::U32] = spv_buffer[BufferAlias::U32] =
DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, U32[1]); DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, U32[1]);

View File

@ -246,6 +246,7 @@ public:
struct BufferDefinition { struct BufferDefinition {
u32 binding; u32 binding;
BufferType buffer_type; BufferType buffer_type;
const BufferResource& desc;
Id offset; Id offset;
Id offset_dwords; Id offset_dwords;
Id size; Id size;
@ -310,6 +311,8 @@ private:
Id DefineFloat32ToUfloatM5(u32 mantissa_bits, std::string_view name); Id DefineFloat32ToUfloatM5(u32 mantissa_bits, std::string_view name);
Id DefineUfloatM5ToFloat32(u32 mantissa_bits, std::string_view name); Id DefineUfloatM5ToFloat32(u32 mantissa_bits, std::string_view name);
Id GetBufferSize(u32 sharp_idx);
}; };
} // namespace Shader::Backend::SPIRV } // namespace Shader::Backend::SPIRV

View File

@ -106,7 +106,6 @@ struct PushData {
static constexpr u32 YScaleIndex = 5; static constexpr u32 YScaleIndex = 5;
static constexpr u32 UdRegsIndex = 6; static constexpr u32 UdRegsIndex = 6;
static constexpr u32 BufOffsetIndex = UdRegsIndex + NumUserDataRegs / 4; static constexpr u32 BufOffsetIndex = UdRegsIndex + NumUserDataRegs / 4;
static constexpr u32 BufSizesIndex = BufOffsetIndex + NumBuffers / sizeof(u32) / 4;
u32 step0; u32 step0;
u32 step1; u32 step1;
@ -116,17 +115,14 @@ struct PushData {
float yscale; float yscale;
std::array<u32, NumUserDataRegs> ud_regs; std::array<u32, NumUserDataRegs> ud_regs;
std::array<u8, NumBuffers> buf_offsets; std::array<u8, NumBuffers> buf_offsets;
std::array<u32, NumBuffers> buf_sizes;
void AddBuffer(u32 binding, u32 offset, u32 size) { void AddOffset(u32 binding, u32 offset) {
ASSERT(offset < 256 && binding < buf_offsets.size()); ASSERT(offset < 256 && binding < buf_offsets.size());
buf_offsets[binding] = offset; buf_offsets[binding] = offset;
buf_sizes[binding] = size;
} }
}; };
static_assert(offsetof(PushData, buf_sizes) <= 128, static_assert(sizeof(PushData) <= 128,
"PushData size without buf_sizes is greater than guaranteed by Vulkan spec"); "PushData size is greater than minimum size guaranteed by Vulkan spec");
static_assert(sizeof(PushData) <= 256, "PushData size is greater than guaranteed by most GPUs");
/** /**
* Contains general information generated by the shader recompiler for an input program. * Contains general information generated by the shader recompiler for an input program.

View File

@ -38,11 +38,7 @@ void Pipeline::BindResources(DescriptorWrites& set_writes, const BufferBarriers&
} }
const auto stage_flags = IsCompute() ? vk::ShaderStageFlagBits::eCompute : AllGraphicsStageBits; const auto stage_flags = IsCompute() ? vk::ShaderStageFlagBits::eCompute : AllGraphicsStageBits;
// If not emulating buffer bounds checks, buffer sizes are not needed. cmdbuf.pushConstants(*pipeline_layout, stage_flags, 0u, sizeof(push_data), &push_data);
const auto push_constants_size = instance.IsRobustBufferAccess2Supported()
? offsetof(Shader::PushData, buf_sizes)
: sizeof(Shader::PushData);
cmdbuf.pushConstants(*pipeline_layout, stage_flags, 0u, push_constants_size, &push_data);
// Bind descriptor set. // Bind descriptor set.
if (set_writes.empty()) { if (set_writes.empty()) {

View File

@ -548,7 +548,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
const u32 offset_aligned = Common::AlignDown(offset, alignment); const u32 offset_aligned = Common::AlignDown(offset, alignment);
const u32 adjust = offset - offset_aligned; const u32 adjust = offset - offset_aligned;
ASSERT(adjust % 4 == 0); ASSERT(adjust % 4 == 0);
push_data.AddBuffer(binding.buffer, adjust, vsharp.GetSize()); push_data.AddOffset(binding.buffer, adjust);
buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned,
vsharp.GetSize() + adjust); vsharp.GetSize() + adjust);
if (auto barrier = if (auto barrier =