diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 92bb7ec53..5dffd1be4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -164,9 +164,9 @@ using BufferAlias = EmitContext::BufferAlias; Id EmitReadConst(EmitContext& ctx, IR::Inst* inst) { const u32 flatbuf_off_dw = inst->Flags(); - ASSERT(ctx.srt_flatbuf.binding >= 0 && flatbuf_off_dw > 0); - const auto& buffer = ctx.srt_flatbuf; - const auto [id, pointer_type] = buffer[BufferAlias::U32]; + const auto& srt_flatbuf = ctx.buffers.back(); + ASSERT(srt_flatbuf.binding >= 0 && flatbuf_off_dw > 0 && srt_flatbuf.buffer_type == BufferType::ReadConstUbo); + const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32]; const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(flatbuf_off_dw))}; return ctx.OpLoad(ctx.U32[1], ptr); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 194e6eaba..c95bc2560 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -193,6 +193,9 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f void EmitContext::DefineBufferOffsets() { for (BufferDefinition& buffer : buffers) { + if (buffer.buffer_type != BufferType::Guest) { + continue; + } const u32 binding = buffer.binding; const u32 half = PushData::BufOffsetIndex + (binding >> 4); const u32 comp = (binding & 0xf) >> 2; @@ -632,12 +635,6 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte }; void EmitContext::DefineBuffers() { - if (info.has_readconst) { - srt_flatbuf[BufferAlias::U32] = DefineBuffer(false, false, 2, BufferType::ReadConstUbo, U32[1]); - srt_flatbuf.binding = binding.buffer++; - ++binding.unified; - } - for (const auto& desc : info.buffers) { const auto buf_sharp = desc.GetSharp(info); const bool is_storage = desc.IsStorage(buf_sharp, profile); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 657438cf9..56f3f1656 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -255,7 +255,6 @@ public: Bindings& binding; boost::container::small_vector buf_type_ids; boost::container::small_vector buffers; - BufferDefinition srt_flatbuf; boost::container::small_vector images; boost::container::small_vector samplers; diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index c519d0917..813a1f74a 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -51,15 +51,17 @@ struct BufferResource { IR::Type used_types; AmdGpu::Buffer inline_cbuf; BufferType buffer_type; - bool is_gds_buffer{}; bool is_instance_data{}; u8 instance_attrib{}; bool is_written{}; bool is_formatted{}; - [[nodiscard]] bool IsStorage(const AmdGpu::Buffer& buffer, - const Profile& profile) const noexcept { - return buffer.GetSize() > profile.max_ubo_size || is_written || is_gds_buffer; + bool IsSpecial() const noexcept { + return buffer_type != BufferType::Guest; + } + + bool IsStorage(const AmdGpu::Buffer& buffer, const Profile& profile) const noexcept { + return buffer.GetSize() > profile.max_ubo_size || is_written; } [[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept; @@ -242,7 +244,7 @@ struct Info { void AddBindings(Backend::Bindings& bnd) const { const auto total_buffers = - buffers.size() + (has_readconst ? 1 : 0) + (has_emulated_shared_memory ? 1 : 0); + buffers.size() + (has_emulated_shared_memory ? 1 : 0); bnd.buffer += total_buffers; bnd.unified += total_buffers + images.size() + samplers.size(); bnd.user_data += ud_mask.NumRegs(); diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 029558d9e..5737707b0 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -121,11 +121,8 @@ public: u32 Add(const BufferResource& desc) { const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) { - // Only one GDS binding can exist. - if (desc.is_gds_buffer && existing.is_gds_buffer) { - return true; - } - return desc.sharp_idx == existing.sharp_idx && desc.inline_cbuf == existing.inline_cbuf; + return desc.sharp_idx == existing.sharp_idx && desc.inline_cbuf == existing.inline_cbuf && + desc.buffer_type == existing.buffer_type; })}; auto& buffer = buffer_resources[index]; buffer.used_types |= desc.used_types; @@ -272,6 +269,7 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors, .sharp_idx = std::numeric_limits::max(), .used_types = BufferDataType(inst, cbuf.GetNumberFmt()), .inline_cbuf = cbuf, + .buffer_type = BufferType::Guest, }); } @@ -286,6 +284,7 @@ void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& binding = descriptors.Add(BufferResource{ .sharp_idx = sharp, .used_types = BufferDataType(inst, buffer.GetNumberFmt()), + .buffer_type = BufferType::Guest, .is_written = IsBufferStore(inst), .is_formatted = inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32 || inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32, @@ -402,13 +401,10 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& } void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { - // Insert gds binding in the shader if it doesn't exist already. - // The buffer is used for append/consume counters. - constexpr static AmdGpu::Buffer GdsSharp{.base_address = 1}; const u32 binding = descriptors.Add(BufferResource{ .used_types = IR::Type::U32, - .inline_cbuf = GdsSharp, - .is_gds_buffer = true, + .inline_cbuf = AmdGpu::Buffer::Null(), + .buffer_type = BufferType::GdsBuffer, .is_written = true, }); @@ -420,12 +416,12 @@ void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descripto }; // Attempt to deduce the GDS address of counter at compile time. - const u32 gds_addr = [&] { - const IR::Value& gds_offset = inst.Arg(0); - if (gds_offset.IsImmediate()) { - // Nothing to do, offset is known. - return gds_offset.U32() & 0xFFFF; - } + u32 gds_addr = 0; + const IR::Value& gds_offset = inst.Arg(0); + if (gds_offset.IsImmediate()) { + // Nothing to do, offset is known. + gds_addr = gds_offset.U32() & 0xFFFF; + } else { const auto result = IR::BreadthFirstSearch(&inst, pred); ASSERT_MSG(result, "Unable to track M0 source"); @@ -436,8 +432,8 @@ void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descripto if (prod->GetOpcode() == IR::Opcode::IAdd32) { m0_val += prod->Arg(1).U32(); } - return m0_val & 0xFFFF; - }(); + gds_addr = m0_val & 0xFFFF; + } // Patch instruction. IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index f3a1fc9a8..0b0a62f6a 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -74,7 +74,14 @@ void Visit(Info& info, const IR::Inst& inst) { info.uses_lane_id = true; break; case IR::Opcode::ReadConst: - info.has_readconst = true; + if (!info.has_readconst) { + info.buffers.push_back({ + .used_types = IR::Type::U32, + .inline_cbuf = AmdGpu::Buffer::Null(), + .buffer_type = BufferType::ReadConstUbo, + }); + info.has_readconst = true; + } break; case IR::Opcode::PackUfloat10_11_11: info.uses_pack_10_11_11 = true; diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index 9bf9e71e4..8b66cb4de 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -101,9 +101,6 @@ struct StageSpecialization { if (info->has_emulated_shared_memory) { binding++; } - if (info->has_readconst) { - binding++; - } ForEachSharp(binding, buffers, info->buffers, [profile_](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { spec.stride = sharp.GetStride(); @@ -198,15 +195,9 @@ struct StageSpecialization { if (info->has_emulated_shared_memory != other.info->has_emulated_shared_memory) { return false; } - if (info->has_readconst != other.info->has_readconst) { - return false; - } if (info->has_emulated_shared_memory) { binding++; } - if (info->has_readconst) { - binding++; - } for (u32 i = 0; i < buffers.size(); i++) { if (other.bitset[binding++] && buffers[i] != other.buffers[i]) { return false; diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index fa8edb3e2..64a85c812 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -31,6 +31,12 @@ struct Buffer { u32 _padding1 : 6; u32 type : 2; // overlaps with T# type, so should be 0 for buffer + static constexpr Buffer Null() { + Buffer buffer{}; + buffer.base_address = 1; + return buffer; + } + bool Valid() const { return type == 0u; } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index f0346559d..232cc46fd 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -3,11 +3,9 @@ #include -#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/texture_cache/texture_cache.h" namespace Vulkan { @@ -38,14 +36,6 @@ ComputePipeline::ComputePipeline(const Instance& instance, Scheduler& scheduler, .stageFlags = vk::ShaderStageFlagBits::eCompute, }); } - if (info->has_readconst) { - bindings.push_back({ - .binding = binding++, - .descriptorType = vk::DescriptorType::eUniformBuffer, - .descriptorCount = 1, - .stageFlags = vk::ShaderStageFlagBits::eCompute, - }); - } for (const auto& buffer : info->buffers) { const auto sharp = buffer.GetSharp(*info); bindings.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 4eecd1edf..2c432e9bf 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -7,18 +7,13 @@ #include #include "common/assert.h" -#include "common/io_file.h" #include "shader_recompiler/backend/spirv/emit_spirv_quad_rect.h" #include "shader_recompiler/frontend/fetch_shader.h" -#include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/resource.h" -#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_instance.h" -#include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" -#include "video_core/texture_cache/texture_cache.h" namespace Vulkan { @@ -357,14 +352,6 @@ void GraphicsPipeline::BuildDescSetLayout() { if (!stage) { continue; } - if (stage->has_readconst) { - bindings.push_back({ - .binding = binding++, - .descriptorType = vk::DescriptorType::eUniformBuffer, - .descriptorCount = 1, - .stageFlags = gp_stage_flags, - }); - } for (const auto& buffer : stage->buffers) { const auto sharp = buffer.GetSharp(*stage); bindings.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index ac6aac7b3..f8faec056 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -436,16 +436,13 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { const auto& info = pipeline->GetStage(Shader::LogicalStage::Compute); // Assume if a shader reads and writes metas at the same time, it is a copy shader. - bool meta_read = false; - for (const auto& desc : info.buffers) { - if (desc.is_gds_buffer) { - continue; - } - if (!desc.is_written) { + const bool meta_read = std::ranges::any_of(info.buffers, [&](const auto& desc) { + if (!desc.IsSpecial() && !desc.is_written) { const VAddr address = desc.GetSharp(info).base_address; - meta_read = texture_cache.IsMeta(address); + return texture_cache.IsMeta(address); } - } + return false; + }); // Most of the time when a metadata is updated with a shader it gets cleared. It means // we can skip the whole dispatch and update the tracked state instead. Also, it is not @@ -514,7 +511,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding for (const auto& desc : stage.buffers) { const auto vsharp = desc.GetSharp(stage); - if (!desc.is_gds_buffer && vsharp.base_address != 0 && vsharp.GetSize() > 0) { + if (!desc.IsSpecial() && vsharp.base_address != 0 && vsharp.GetSize() > 0) { const auto buffer_id = buffer_cache.FindBuffer(vsharp.base_address, vsharp.GetSize()); buffer_bindings.emplace_back(buffer_id, vsharp); } else { @@ -538,31 +535,19 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding ++binding.buffer; } - // Bind the flattened user data buffer as a UBO so it's accessible to the shader - if (stage.has_readconst) { - const auto [vk_buffer, offset] = buffer_cache.ObtainHostUBO(stage.flattened_ud_buf); - buffer_infos.emplace_back(vk_buffer->Handle(), offset, - stage.flattened_ud_buf.size() * sizeof(u32)); - set_writes.push_back({ - .dstSet = VK_NULL_HANDLE, - .dstBinding = binding.unified++, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = vk::DescriptorType::eUniformBuffer, - .pBufferInfo = &buffer_infos.back(), - }); - ++binding.buffer; - } - // Second pass to re-bind buffers that were updated after binding for (u32 i = 0; i < buffer_bindings.size(); i++) { const auto& [buffer_id, vsharp] = buffer_bindings[i]; const auto& desc = stage.buffers[i]; const bool is_storage = desc.IsStorage(vsharp, pipeline_cache.GetProfile()); if (!buffer_id) { - if (desc.is_gds_buffer) { + if (desc.buffer_type == Shader::BufferType::GdsBuffer) { const auto* gds_buf = buffer_cache.GetGdsBuffer(); buffer_infos.emplace_back(gds_buf->Handle(), 0, gds_buf->SizeBytes()); + } else if (desc.buffer_type == Shader::BufferType::ReadConstUbo) { + const auto [vk_buffer, offset] = buffer_cache.ObtainHostUBO(stage.flattened_ud_buf); + buffer_infos.emplace_back(vk_buffer->Handle(), offset, + stage.flattened_ud_buf.size() * sizeof(u32)); } else if (instance.IsNullDescriptorSupported()) { buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE); } else {