diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 9b8f034f0..a0dbde129 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -280,34 +280,42 @@ void EmitContext::DefineInputs() { base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input); instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input); - for (const auto& input : info.vs_inputs) { - ASSERT(input.binding < IR::NumParams); - const auto sharp = input.GetSharp(info); + const auto fetch_shader = info.LoadFetchShader(); + if (!fetch_shader) { + break; + } + for (const auto& attrib : fetch_shader->attributes) { + ASSERT(attrib.semantic < IR::NumParams); + const auto sharp = info.GetSharp(attrib); const Id type{GetAttributeType(*this, sharp.GetNumberFmt())[4]}; - if (input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 || - input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate1) { - + if (attrib.UsesStepRates()) { const u32 rate_idx = - input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ? 0 - : 1; + attrib.GetStepRate() == Gcn::VertexAttribute::InstanceIdType::OverStepRate0 ? 0 + : 1; + const u32 num_components = AmdGpu::NumComponents(sharp.GetDataFmt()); + const auto buffer = + std::ranges::find_if(info.buffers, [&attrib](const auto& buffer) { + return buffer.instance_attrib == attrib.semantic; + }); // Note that we pass index rather than Id - input_params[input.binding] = SpirvAttribute{ + input_params[attrib.semantic] = SpirvAttribute{ .id = rate_idx, .pointer_type = input_u32, .component_type = U32[1], - .num_components = input.num_components, + .num_components = std::min(attrib.num_elements, num_components), .is_integer = true, .is_loaded = false, - .buffer_handle = input.instance_data_buf, + .buffer_handle = int(buffer - info.buffers.begin()), }; } else { - Id id{DefineInput(type, input.binding)}; - if (input.instance_step_rate == Info::VsInput::InstanceIdType::Plain) { - Name(id, fmt::format("vs_instance_attr{}", input.binding)); + Id id{DefineInput(type, attrib.semantic)}; + if (attrib.GetStepRate() == Gcn::VertexAttribute::InstanceIdType::Plain) { + Name(id, fmt::format("vs_instance_attr{}", attrib.semantic)); } else { - Name(id, fmt::format("vs_in_attr{}", input.binding)); + Name(id, fmt::format("vs_in_attr{}", attrib.semantic)); } - input_params[input.binding] = GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false); + input_params[attrib.semantic] = + GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false); interfaces.push_back(id); } } diff --git a/src/shader_recompiler/frontend/fetch_shader.cpp b/src/shader_recompiler/frontend/fetch_shader.cpp index 16938410c..d302bdeaf 100644 --- a/src/shader_recompiler/frontend/fetch_shader.cpp +++ b/src/shader_recompiler/frontend/fetch_shader.cpp @@ -34,8 +34,8 @@ namespace Shader::Gcn { * We take the reverse way, extract the original input semantics from these instructions. **/ -FetchShaderData ParseFetchShader(const u32* code, u32* out_size) { - FetchShaderData data{}; +FetchShaderData ParseFetchShader(const u32* code) { + FetchShaderData data{.code = code}; GcnCodeSlice code_slice(code, code + std::numeric_limits::max()); GcnDecodeContext decoder; @@ -49,7 +49,7 @@ FetchShaderData ParseFetchShader(const u32* code, u32* out_size) { u32 semantic_index = 0; while (!code_slice.atEnd()) { const auto inst = decoder.decodeInstruction(code_slice); - *out_size += inst.length; + data.size += inst.length; if (inst.opcode == Opcode::S_SETPC_B64) { break; diff --git a/src/shader_recompiler/frontend/fetch_shader.h b/src/shader_recompiler/frontend/fetch_shader.h index 0e5d15419..213b19e07 100644 --- a/src/shader_recompiler/frontend/fetch_shader.h +++ b/src/shader_recompiler/frontend/fetch_shader.h @@ -3,26 +3,62 @@ #pragma once +#include #include #include "common/types.h" namespace Shader::Gcn { struct VertexAttribute { + enum InstanceIdType : u8 { + None = 0, + OverStepRate0 = 1, + OverStepRate1 = 2, + Plain = 3, + }; + u8 semantic; ///< Semantic index of the attribute u8 dest_vgpr; ///< Destination VGPR to load first component. u8 num_elements; ///< Number of components to load u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V# u8 dword_offset; ///< The dword offset of the V# that describes this attribute. u8 instance_data; ///< Indicates that the buffer will be accessed in instance rate + + [[nodiscard]] InstanceIdType GetStepRate() const { + return static_cast(instance_data); + } + + [[nodiscard]] bool UsesStepRates() const { + const auto step_rate = GetStepRate(); + return step_rate == OverStepRate0 || step_rate == OverStepRate1; + } + + bool operator==(const VertexAttribute& other) const { + return semantic == other.semantic && dest_vgpr == other.dest_vgpr && + num_elements == other.num_elements && sgpr_base == other.sgpr_base && + dword_offset == other.dword_offset && instance_data == other.instance_data; + } }; struct FetchShaderData { + const u32* code; + u32 size = 0; std::vector attributes; s8 vertex_offset_sgpr = -1; ///< SGPR of vertex offset from VADDR s8 instance_offset_sgpr = -1; ///< SGPR of instance offset from VADDR + + [[nodiscard]] bool UsesStepRates() const { + return std::ranges::find_if(attributes, [](const VertexAttribute& attribute) { + return attribute.UsesStepRates(); + }) != attributes.end(); + } + + bool operator==(const FetchShaderData& other) const { + return attributes == other.attributes && vertex_offset_sgpr == other.vertex_offset_sgpr && + instance_offset_sgpr == other.instance_offset_sgpr; + } }; -FetchShaderData ParseFetchShader(const u32* code, u32* out_size); +FetchShaderData ParseFetchShader(const u32* code); } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 06167011a..142ba64b1 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -368,13 +368,9 @@ void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_ra void Translator::EmitFetch(const GcnInst& inst) { // Read the pointer to the fetch shader assembly. - const u32 sgpr_base = inst.src[0].code; - const u32* code; - std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code)); - - // Parse the assembly to generate a list of attributes. - u32 fetch_size{}; - const auto fetch_data = ParseFetchShader(code, &fetch_size); + info.has_fetch_shader = true; + info.fetch_shader_sgpr_base = inst.src[0].code; + const auto fetch_data = info.LoadFetchShader(); if (Config::dumpShaders()) { using namespace Common::FS; @@ -384,13 +380,10 @@ void Translator::EmitFetch(const GcnInst& inst) { } const auto filename = fmt::format("vs_{:#018x}.fetch.bin", info.pgm_hash); const auto file = IOFile{dump_dir / filename, FileAccessMode::Write}; - file.WriteRaw(code, fetch_size); + file.WriteRaw(fetch_data->code, fetch_data->size); } - info.vertex_offset_sgpr = fetch_data.vertex_offset_sgpr; - info.instance_offset_sgpr = fetch_data.instance_offset_sgpr; - - for (const auto& attrib : fetch_data.attributes) { + for (const auto& attrib : fetch_data->attributes) { const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic}; IR::VectorReg dst_reg{attrib.dest_vgpr}; @@ -420,28 +413,14 @@ void Translator::EmitFetch(const GcnInst& inst) { // In case of programmable step rates we need to fallback to instance data pulling in // shader, so VBs should be bound as regular data buffers - s32 instance_buf_handle = -1; - const auto step_rate = static_cast(attrib.instance_data); - if (step_rate == Info::VsInput::OverStepRate0 || - step_rate == Info::VsInput::OverStepRate1) { + if (attrib.UsesStepRates()) { info.buffers.push_back({ .sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4), .used_types = IR::Type::F32, .is_instance_data = true, + .instance_attrib = attrib.semantic, }); - instance_buf_handle = s32(info.buffers.size() - 1); - info.uses_step_rates = true; } - - const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt()); - info.vs_inputs.push_back({ - .binding = attrib.semantic, - .num_components = std::min(attrib.num_elements, num_components), - .sgpr_base = attrib.sgpr_base, - .dword_offset = attrib.dword_offset, - .instance_step_rate = step_rate, - .instance_data_buf = instance_buf_handle, - }); } } diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 127f0f8ab..f41b4d990 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -9,6 +9,7 @@ #include #include "common/assert.h" #include "common/types.h" +#include "frontend/fetch_shader.h" #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/frontend/copy_shader.h" #include "shader_recompiler/ir/attribute.h" @@ -45,6 +46,7 @@ struct BufferResource { AmdGpu::Buffer inline_cbuf; bool is_gds_buffer{}; bool is_instance_data{}; + u8 instance_attrib{}; bool is_written{}; bool IsStorage(AmdGpu::Buffer buffer) const noexcept { @@ -112,27 +114,6 @@ static_assert(sizeof(PushData) <= 128, * Contains general information generated by the shader recompiler for an input program. */ struct Info { - struct VsInput { - enum InstanceIdType : u8 { - None = 0, - OverStepRate0 = 1, - OverStepRate1 = 2, - Plain = 3, - }; - - u16 binding; - u16 num_components; - u8 sgpr_base; - u8 dword_offset; - InstanceIdType instance_step_rate; - s32 instance_data_buf; - - [[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept { - return info.ReadUdReg(sgpr_base, dword_offset); - } - }; - boost::container::static_vector vs_inputs{}; - struct AttributeFlags { bool Get(IR::Attribute attrib, u32 comp = 0) const { return flags[Index(attrib)] & (1 << comp); @@ -179,9 +160,6 @@ struct Info { CopyShaderData gs_copy_data; - s8 vertex_offset_sgpr = -1; - s8 instance_offset_sgpr = -1; - BufferResourceList buffers; TextureBufferResourceList texture_buffers; ImageResourceList images; @@ -208,10 +186,11 @@ struct Info { bool uses_shared{}; bool uses_fp16{}; bool uses_fp64{}; - bool uses_step_rates{}; bool translation_failed{}; // indicates that shader has unsupported instructions bool has_readconst{}; u8 mrt_mask{0u}; + bool has_fetch_shader{false}; + u32 fetch_shader_sgpr_base{0u}; explicit Info(Stage stage_, ShaderParams params) : stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()}, @@ -252,14 +231,18 @@ struct Info { bnd.user_data += ud_mask.NumRegs(); } - [[nodiscard]] std::pair GetDrawOffsets(const AmdGpu::Liverpool::Regs& regs) const { + [[nodiscard]] std::pair GetDrawOffsets( + const AmdGpu::Liverpool::Regs& regs, + const std::optional& fetch_shader) const { u32 vertex_offset = regs.index_offset; u32 instance_offset = 0; - if (vertex_offset == 0 && vertex_offset_sgpr != -1) { - vertex_offset = user_data[vertex_offset_sgpr]; - } - if (instance_offset_sgpr != -1) { - instance_offset = user_data[instance_offset_sgpr]; + if (fetch_shader) { + if (vertex_offset == 0 && fetch_shader->vertex_offset_sgpr != -1) { + vertex_offset = user_data[fetch_shader->vertex_offset_sgpr]; + } + if (fetch_shader->instance_offset_sgpr != -1) { + instance_offset = user_data[fetch_shader->instance_offset_sgpr]; + } } return {vertex_offset, instance_offset}; } @@ -273,6 +256,20 @@ struct Info { srt_info.walker_func(user_data.data(), flattened_ud_buf.data()); } } + + [[nodiscard]] std::optional LoadFetchShader() const { + if (!has_fetch_shader) { + return std::nullopt; + } + const u32* code; + std::memcpy(&code, &user_data[fetch_shader_sgpr_base], sizeof(code)); + return Gcn::ParseFetchShader(code); + } + + [[nodiscard]] constexpr AmdGpu::Buffer GetSharp( + const Gcn::VertexAttribute& attrib) const noexcept { + return ReadUdReg(attrib.sgpr_base, attrib.dword_offset); + } }; constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept { diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index 20d3e93c9..299e6bd44 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -6,6 +6,7 @@ #include #include "common/types.h" +#include "frontend/fetch_shader.h" #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/info.h" #include "shader_recompiler/ir/passes/srt.h" @@ -50,6 +51,7 @@ struct StageSpecialization { const Shader::Info* info; RuntimeInfo runtime_info; + Gcn::FetchShaderData fetch_shader_data{}; std::bitset bitset{}; boost::container::small_vector buffers; boost::container::small_vector tex_buffers; @@ -60,6 +62,9 @@ struct StageSpecialization { explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_, Backend::Bindings start_) : info{&info_}, runtime_info{runtime_info_}, start{start_} { + if (const auto fetch_shader = info_.LoadFetchShader()) { + fetch_shader_data = *fetch_shader; + } u32 binding{}; if (info->has_readconst) { binding++; @@ -105,6 +110,9 @@ struct StageSpecialization { if (runtime_info != other.runtime_info) { return false; } + if (fetch_shader_data != other.fetch_shader_data) { + return false; + } u32 binding{}; if (info->has_readconst != other.info->has_readconst) { return false; diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 63d8ea632..0beb16d27 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -5,6 +5,7 @@ #include "common/alignment.h" #include "common/scope_exit.h" #include "common/types.h" +#include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/info.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/buffer_cache/buffer_cache.h" @@ -107,7 +108,8 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si } } -bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) { +bool BufferCache::BindVertexBuffers( + const Shader::Info& vs_info, const std::optional& fetch_shader) { boost::container::small_vector attributes; boost::container::small_vector bindings; SCOPE_EXIT { @@ -126,7 +128,7 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) { } }; - if (vs_info.vs_inputs.empty()) { + if (!fetch_shader || fetch_shader->attributes.empty()) { return false; } @@ -150,30 +152,29 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) { // Calculate buffers memory overlaps bool has_step_rate = false; boost::container::static_vector ranges{}; - for (const auto& input : vs_info.vs_inputs) { - if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 || - input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) { + for (const auto& attrib : fetch_shader->attributes) { + if (attrib.UsesStepRates()) { has_step_rate = true; continue; } - const auto& buffer = input.GetSharp(vs_info); + const auto& buffer = vs_info.GetSharp(attrib); if (buffer.GetSize() == 0) { continue; } guest_buffers.emplace_back(buffer); ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize()); attributes.push_back({ - .location = input.binding, - .binding = input.binding, + .location = attrib.semantic, + .binding = attrib.semantic, .format = Vulkan::LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()), .offset = 0, }); bindings.push_back({ - .binding = input.binding, + .binding = attrib.semantic, .stride = buffer.GetStride(), - .inputRate = input.instance_step_rate == Shader::Info::VsInput::None + .inputRate = attrib.GetStepRate() == Shader::Gcn::VertexAttribute::InstanceIdType::None ? vk::VertexInputRate::eVertex : vk::VertexInputRate::eInstance, .divisor = 1, diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index e2519e942..b1bf77f8a 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -20,8 +20,11 @@ struct Liverpool; } namespace Shader { -struct Info; +namespace Gcn { +struct FetchShaderData; } +struct Info; +} // namespace Shader namespace VideoCore { @@ -76,7 +79,8 @@ public: void InvalidateMemory(VAddr device_addr, u64 size); /// Binds host vertex buffers for the current draw. - bool BindVertexBuffers(const Shader::Info& vs_info); + bool BindVertexBuffers(const Shader::Info& vs_info, + const std::optional& fetch_shader); /// Bind host index buffer for the current draw. u32 BindIndexBuffer(bool& is_indexed, u32 index_offset); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index b0aac2dbb..44b57d537 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include #include @@ -10,6 +11,8 @@ #include "video_core/amdgpu/resource.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" + +#include "shader_recompiler/frontend/fetch_shader.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/texture_cache/texture_cache.h" @@ -20,8 +23,10 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul DescriptorHeap& desc_heap_, const GraphicsPipelineKey& key_, vk::PipelineCache pipeline_cache, std::span infos, + std::optional fetch_shader_, std::span modules) - : Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache}, key{key_} { + : Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache}, key{key_}, + fetch_shader{std::move(fetch_shader_)} { const vk::Device device = instance.GetDevice(); std::ranges::copy(infos, stages.begin()); BuildDescSetLayout(); @@ -46,31 +51,31 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul boost::container::static_vector vertex_bindings; boost::container::static_vector vertex_attributes; - if (!instance.IsVertexInputDynamicState()) { - const auto& vs_info = stages[u32(Shader::Stage::Vertex)]; - for (const auto& input : vs_info->vs_inputs) { - if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 || - input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) { + if (fetch_shader && !instance.IsVertexInputDynamicState()) { + const auto& vs_info = GetStage(Shader::Stage::Vertex); + for (const auto& attrib : fetch_shader->attributes) { + if (attrib.UsesStepRates()) { // Skip attribute binding as the data will be pulled by shader continue; } - const auto buffer = input.GetSharp(*vs_info); + const auto buffer = vs_info.GetSharp(attrib); if (buffer.GetSize() == 0) { continue; } vertex_attributes.push_back({ - .location = input.binding, - .binding = input.binding, + .location = attrib.semantic, + .binding = attrib.semantic, .format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()), .offset = 0, }); vertex_bindings.push_back({ - .binding = input.binding, + .binding = attrib.semantic, .stride = buffer.GetStride(), - .inputRate = input.instance_step_rate == Shader::Info::VsInput::None - ? vk::VertexInputRate::eVertex - : vk::VertexInputRate::eInstance, + .inputRate = + attrib.GetStepRate() == Shader::Gcn::VertexAttribute::InstanceIdType::None + ? vk::VertexInputRate::eVertex + : vk::VertexInputRate::eInstance, }); } } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 4f4abfd16..826d8e5d2 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -59,9 +59,14 @@ public: GraphicsPipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap, const GraphicsPipelineKey& key, vk::PipelineCache pipeline_cache, std::span stages, + std::optional fetch_shader, std::span modules); ~GraphicsPipeline(); + const std::optional& GetFetchShader() const noexcept { + return fetch_shader; + } + bool IsEmbeddedVs() const noexcept { static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f; return key.stage_hashes[u32(Shader::Stage::Vertex)] == EmbeddedVsHash; @@ -94,6 +99,7 @@ private: private: GraphicsPipelineKey key; + std::optional fetch_shader{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index e4350d400..a5552c513 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -187,7 +187,7 @@ const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() { const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key); if (is_new) { it.value() = graphics_pipeline_pool.Create(instance, scheduler, desc_heap, graphics_key, - *pipeline_cache, infos, modules); + *pipeline_cache, infos, fetch_shader, modules); } return it->second; } @@ -304,8 +304,8 @@ bool PipelineCache::RefreshGraphicsKey() { } auto params = Liverpool::GetParams(*pgm); - std::tie(infos[stage_out_idx], modules[stage_out_idx], key.stage_hashes[stage_out_idx]) = - GetProgram(stage_in, params, binding); + std::tie(infos[stage_out_idx], modules[stage_out_idx], fetch_shader, + key.stage_hashes[stage_out_idx]) = GetProgram(stage_in, params, binding); return true; }; @@ -341,15 +341,14 @@ bool PipelineCache::RefreshGraphicsKey() { } } - const auto* vs_info = infos[static_cast(Shader::Stage::Vertex)]; - if (vs_info && !instance.IsVertexInputDynamicState()) { + const auto vs_info = infos[static_cast(Shader::Stage::Vertex)]; + if (vs_info && fetch_shader && !instance.IsVertexInputDynamicState()) { u32 vertex_binding = 0; - for (const auto& input : vs_info->vs_inputs) { - if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 || - input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) { + for (const auto& attrib : fetch_shader->attributes) { + if (attrib.UsesStepRates()) { continue; } - const auto& buffer = input.GetSharp(*vs_info); + const auto& buffer = vs_info->GetSharp(attrib); if (buffer.GetSize() == 0) { continue; } @@ -393,7 +392,7 @@ bool PipelineCache::RefreshComputeKey() { Shader::Backend::Bindings binding{}; const auto* cs_pgm = &liverpool->regs.cs_program; const auto cs_params = Liverpool::GetParams(*cs_pgm); - std::tie(infos[0], modules[0], compute_key) = + std::tie(infos[0], modules[0], fetch_shader, compute_key) = GetProgram(Shader::Stage::Compute, cs_params, binding); return true; } @@ -424,8 +423,9 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, return module; } -std::tuple PipelineCache::GetProgram( - Shader::Stage stage, Shader::ShaderParams params, Shader::Backend::Bindings& binding) { +std::tuple, u64> +PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params, + Shader::Backend::Bindings& binding) { const auto runtime_info = BuildRuntimeInfo(stage); auto [it_pgm, new_program] = program_cache.try_emplace(params.hash); if (new_program) { @@ -435,7 +435,8 @@ std::tuple PipelineCache::GetProgram const auto spec = Shader::StageSpecialization(program->info, runtime_info, start); program->AddPermut(module, std::move(spec)); it_pgm.value() = program; - return std::make_tuple(&program->info, module, HashCombine(params.hash, 0)); + return std::make_tuple(&program->info, module, spec.fetch_shader_data, + HashCombine(params.hash, 0)); } Program* program = it_pgm->second; @@ -455,7 +456,8 @@ std::tuple PipelineCache::GetProgram module = it->module; perm_idx = std::distance(program->modules.begin(), it); } - return std::make_tuple(&info, module, HashCombine(params.hash, perm_idx)); + return std::make_tuple(&info, module, spec.fetch_shader_data, + HashCombine(params.hash, perm_idx)); } void PipelineCache::DumpShader(std::span code, u64 hash, Shader::Stage stage, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 662bcbd80..e4a8abd4f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -47,8 +47,10 @@ public: const ComputePipeline* GetComputePipeline(); - std::tuple GetProgram( - Shader::Stage stage, Shader::ShaderParams params, Shader::Backend::Bindings& binding); + std::tuple, + u64> + GetProgram(Shader::Stage stage, Shader::ShaderParams params, + Shader::Backend::Bindings& binding); private: bool RefreshGraphicsKey(); @@ -80,6 +82,7 @@ private: tsl::robin_map graphics_pipelines; std::array infos{}; std::array modules{}; + std::optional fetch_shader{}; GraphicsPipelineKey graphics_key{}; u64 compute_key{}; }; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index ff5e88141..50534cdbe 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -187,13 +187,14 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { } const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex); - buffer_cache.BindVertexBuffers(vs_info); + const auto& fetch_shader = pipeline->GetFetchShader(); + buffer_cache.BindVertexBuffers(vs_info, fetch_shader); const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset); BeginRendering(*pipeline, state); UpdateDynamicState(*pipeline); - const auto [vertex_offset, instance_offset] = vs_info.GetDrawOffsets(regs); + const auto [vertex_offset, instance_offset] = vs_info.GetDrawOffsets(regs, fetch_shader); const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); @@ -243,7 +244,8 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 } const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex); - buffer_cache.BindVertexBuffers(vs_info); + const auto& fetch_shader = pipeline->GetFetchShader(); + buffer_cache.BindVertexBuffers(vs_info, fetch_shader); buffer_cache.BindIndexBuffer(is_indexed, 0); const auto& [buffer, base] = @@ -397,10 +399,8 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { if (!stage) { continue; } - if (stage->uses_step_rates) { - push_data.step0 = regs.vgt_instance_step_rate_0; - push_data.step1 = regs.vgt_instance_step_rate_1; - } + push_data.step0 = regs.vgt_instance_step_rate_0; + push_data.step1 = regs.vgt_instance_step_rate_1; stage->PushUd(binding, push_data); BindBuffers(*stage, binding, push_data, set_writes, buffer_barriers);