shader_recompiler: Merge srt_flatbuf into buffers list

* Its no longer a special case, yay
This commit is contained in:
IndecisiveTurtle 2025-02-14 11:21:31 +02:00
parent 027c198d74
commit 7423ccfe88
11 changed files with 52 additions and 92 deletions

View File

@ -164,9 +164,9 @@ using BufferAlias = EmitContext::BufferAlias;
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst) {
const u32 flatbuf_off_dw = inst->Flags<u32>();
ASSERT(ctx.srt_flatbuf.binding >= 0 && flatbuf_off_dw > 0);
const auto& buffer = ctx.srt_flatbuf;
const auto [id, pointer_type] = buffer[BufferAlias::U32];
const auto& srt_flatbuf = ctx.buffers.back();
ASSERT(srt_flatbuf.binding >= 0 && flatbuf_off_dw > 0 && srt_flatbuf.buffer_type == BufferType::ReadConstUbo);
const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32];
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(flatbuf_off_dw))};
return ctx.OpLoad(ctx.U32[1], ptr);
}

View File

@ -193,6 +193,9 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
void EmitContext::DefineBufferOffsets() {
for (BufferDefinition& buffer : buffers) {
if (buffer.buffer_type != BufferType::Guest) {
continue;
}
const u32 binding = buffer.binding;
const u32 half = PushData::BufOffsetIndex + (binding >> 4);
const u32 comp = (binding & 0xf) >> 2;
@ -632,12 +635,6 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte
};
void EmitContext::DefineBuffers() {
if (info.has_readconst) {
srt_flatbuf[BufferAlias::U32] = DefineBuffer(false, false, 2, BufferType::ReadConstUbo, U32[1]);
srt_flatbuf.binding = binding.buffer++;
++binding.unified;
}
for (const auto& desc : info.buffers) {
const auto buf_sharp = desc.GetSharp(info);
const bool is_storage = desc.IsStorage(buf_sharp, profile);

View File

@ -255,7 +255,6 @@ public:
Bindings& binding;
boost::container::small_vector<Id, 16> buf_type_ids;
boost::container::small_vector<BufferDefinition, 16> buffers;
BufferDefinition srt_flatbuf;
boost::container::small_vector<TextureDefinition, 8> images;
boost::container::small_vector<Id, 4> samplers;

View File

@ -51,15 +51,17 @@ struct BufferResource {
IR::Type used_types;
AmdGpu::Buffer inline_cbuf;
BufferType buffer_type;
bool is_gds_buffer{};
bool is_instance_data{};
u8 instance_attrib{};
bool is_written{};
bool is_formatted{};
[[nodiscard]] bool IsStorage(const AmdGpu::Buffer& buffer,
const Profile& profile) const noexcept {
return buffer.GetSize() > profile.max_ubo_size || is_written || is_gds_buffer;
bool IsSpecial() const noexcept {
return buffer_type != BufferType::Guest;
}
bool IsStorage(const AmdGpu::Buffer& buffer, const Profile& profile) const noexcept {
return buffer.GetSize() > profile.max_ubo_size || is_written;
}
[[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept;
@ -242,7 +244,7 @@ struct Info {
void AddBindings(Backend::Bindings& bnd) const {
const auto total_buffers =
buffers.size() + (has_readconst ? 1 : 0) + (has_emulated_shared_memory ? 1 : 0);
buffers.size() + (has_emulated_shared_memory ? 1 : 0);
bnd.buffer += total_buffers;
bnd.unified += total_buffers + images.size() + samplers.size();
bnd.user_data += ud_mask.NumRegs();

View File

@ -121,11 +121,8 @@ public:
u32 Add(const BufferResource& desc) {
const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) {
// Only one GDS binding can exist.
if (desc.is_gds_buffer && existing.is_gds_buffer) {
return true;
}
return desc.sharp_idx == existing.sharp_idx && desc.inline_cbuf == existing.inline_cbuf;
return desc.sharp_idx == existing.sharp_idx && desc.inline_cbuf == existing.inline_cbuf &&
desc.buffer_type == existing.buffer_type;
})};
auto& buffer = buffer_resources[index];
buffer.used_types |= desc.used_types;
@ -272,6 +269,7 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
.sharp_idx = std::numeric_limits<u32>::max(),
.used_types = BufferDataType(inst, cbuf.GetNumberFmt()),
.inline_cbuf = cbuf,
.buffer_type = BufferType::Guest,
});
}
@ -286,6 +284,7 @@ void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
binding = descriptors.Add(BufferResource{
.sharp_idx = sharp,
.used_types = BufferDataType(inst, buffer.GetNumberFmt()),
.buffer_type = BufferType::Guest,
.is_written = IsBufferStore(inst),
.is_formatted = inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32 ||
inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32,
@ -402,13 +401,10 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
}
void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
// Insert gds binding in the shader if it doesn't exist already.
// The buffer is used for append/consume counters.
constexpr static AmdGpu::Buffer GdsSharp{.base_address = 1};
const u32 binding = descriptors.Add(BufferResource{
.used_types = IR::Type::U32,
.inline_cbuf = GdsSharp,
.is_gds_buffer = true,
.inline_cbuf = AmdGpu::Buffer::Null(),
.buffer_type = BufferType::GdsBuffer,
.is_written = true,
});
@ -420,12 +416,12 @@ void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descripto
};
// Attempt to deduce the GDS address of counter at compile time.
const u32 gds_addr = [&] {
const IR::Value& gds_offset = inst.Arg(0);
if (gds_offset.IsImmediate()) {
// Nothing to do, offset is known.
return gds_offset.U32() & 0xFFFF;
}
u32 gds_addr = 0;
const IR::Value& gds_offset = inst.Arg(0);
if (gds_offset.IsImmediate()) {
// Nothing to do, offset is known.
gds_addr = gds_offset.U32() & 0xFFFF;
} else {
const auto result = IR::BreadthFirstSearch(&inst, pred);
ASSERT_MSG(result, "Unable to track M0 source");
@ -436,8 +432,8 @@ void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descripto
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
m0_val += prod->Arg(1).U32();
}
return m0_val & 0xFFFF;
}();
gds_addr = m0_val & 0xFFFF;
}
// Patch instruction.
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};

View File

@ -74,7 +74,14 @@ void Visit(Info& info, const IR::Inst& inst) {
info.uses_lane_id = true;
break;
case IR::Opcode::ReadConst:
info.has_readconst = true;
if (!info.has_readconst) {
info.buffers.push_back({
.used_types = IR::Type::U32,
.inline_cbuf = AmdGpu::Buffer::Null(),
.buffer_type = BufferType::ReadConstUbo,
});
info.has_readconst = true;
}
break;
case IR::Opcode::PackUfloat10_11_11:
info.uses_pack_10_11_11 = true;

View File

@ -101,9 +101,6 @@ struct StageSpecialization {
if (info->has_emulated_shared_memory) {
binding++;
}
if (info->has_readconst) {
binding++;
}
ForEachSharp(binding, buffers, info->buffers,
[profile_](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
spec.stride = sharp.GetStride();
@ -198,15 +195,9 @@ struct StageSpecialization {
if (info->has_emulated_shared_memory != other.info->has_emulated_shared_memory) {
return false;
}
if (info->has_readconst != other.info->has_readconst) {
return false;
}
if (info->has_emulated_shared_memory) {
binding++;
}
if (info->has_readconst) {
binding++;
}
for (u32 i = 0; i < buffers.size(); i++) {
if (other.bitset[binding++] && buffers[i] != other.buffers[i]) {
return false;

View File

@ -31,6 +31,12 @@ struct Buffer {
u32 _padding1 : 6;
u32 type : 2; // overlaps with T# type, so should be 0 for buffer
static constexpr Buffer Null() {
Buffer buffer{};
buffer.base_address = 1;
return buffer;
}
bool Valid() const {
return type == 0u;
}

View File

@ -3,11 +3,9 @@
#include <boost/container/small_vector.hpp>
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/texture_cache/texture_cache.h"
namespace Vulkan {
@ -38,14 +36,6 @@ ComputePipeline::ComputePipeline(const Instance& instance, Scheduler& scheduler,
.stageFlags = vk::ShaderStageFlagBits::eCompute,
});
}
if (info->has_readconst) {
bindings.push_back({
.binding = binding++,
.descriptorType = vk::DescriptorType::eUniformBuffer,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute,
});
}
for (const auto& buffer : info->buffers) {
const auto sharp = buffer.GetSharp(*info);
bindings.push_back({

View File

@ -7,18 +7,13 @@
#include <boost/container/static_vector.hpp>
#include "common/assert.h"
#include "common/io_file.h"
#include "shader_recompiler/backend/spirv/emit_spirv_quad_rect.h"
#include "shader_recompiler/frontend/fetch_shader.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/texture_cache/texture_cache.h"
namespace Vulkan {
@ -357,14 +352,6 @@ void GraphicsPipeline::BuildDescSetLayout() {
if (!stage) {
continue;
}
if (stage->has_readconst) {
bindings.push_back({
.binding = binding++,
.descriptorType = vk::DescriptorType::eUniformBuffer,
.descriptorCount = 1,
.stageFlags = gp_stage_flags,
});
}
for (const auto& buffer : stage->buffers) {
const auto sharp = buffer.GetSharp(*stage);
bindings.push_back({

View File

@ -436,16 +436,13 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
const auto& info = pipeline->GetStage(Shader::LogicalStage::Compute);
// Assume if a shader reads and writes metas at the same time, it is a copy shader.
bool meta_read = false;
for (const auto& desc : info.buffers) {
if (desc.is_gds_buffer) {
continue;
}
if (!desc.is_written) {
const bool meta_read = std::ranges::any_of(info.buffers, [&](const auto& desc) {
if (!desc.IsSpecial() && !desc.is_written) {
const VAddr address = desc.GetSharp(info).base_address;
meta_read = texture_cache.IsMeta(address);
return texture_cache.IsMeta(address);
}
}
return false;
});
// Most of the time when a metadata is updated with a shader it gets cleared. It means
// we can skip the whole dispatch and update the tracked state instead. Also, it is not
@ -514,7 +511,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
for (const auto& desc : stage.buffers) {
const auto vsharp = desc.GetSharp(stage);
if (!desc.is_gds_buffer && vsharp.base_address != 0 && vsharp.GetSize() > 0) {
if (!desc.IsSpecial() && vsharp.base_address != 0 && vsharp.GetSize() > 0) {
const auto buffer_id = buffer_cache.FindBuffer(vsharp.base_address, vsharp.GetSize());
buffer_bindings.emplace_back(buffer_id, vsharp);
} else {
@ -538,31 +535,19 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
++binding.buffer;
}
// Bind the flattened user data buffer as a UBO so it's accessible to the shader
if (stage.has_readconst) {
const auto [vk_buffer, offset] = buffer_cache.ObtainHostUBO(stage.flattened_ud_buf);
buffer_infos.emplace_back(vk_buffer->Handle(), offset,
stage.flattened_ud_buf.size() * sizeof(u32));
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding.unified++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eUniformBuffer,
.pBufferInfo = &buffer_infos.back(),
});
++binding.buffer;
}
// Second pass to re-bind buffers that were updated after binding
for (u32 i = 0; i < buffer_bindings.size(); i++) {
const auto& [buffer_id, vsharp] = buffer_bindings[i];
const auto& desc = stage.buffers[i];
const bool is_storage = desc.IsStorage(vsharp, pipeline_cache.GetProfile());
if (!buffer_id) {
if (desc.is_gds_buffer) {
if (desc.buffer_type == Shader::BufferType::GdsBuffer) {
const auto* gds_buf = buffer_cache.GetGdsBuffer();
buffer_infos.emplace_back(gds_buf->Handle(), 0, gds_buf->SizeBytes());
} else if (desc.buffer_type == Shader::BufferType::ReadConstUbo) {
const auto [vk_buffer, offset] = buffer_cache.ObtainHostUBO(stage.flattened_ud_buf);
buffer_infos.emplace_back(vk_buffer->Handle(), offset,
stage.flattened_ud_buf.size() * sizeof(u32));
} else if (instance.IsNullDescriptorSupported()) {
buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE);
} else {