From 30b292a787f282d4fd494dfdf3a7a1f4aaa2fc5d Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sun, 1 Dec 2024 10:56:01 -0800 Subject: [PATCH] shader: Specialize on vertex input number types if needed. --- src/shader_recompiler/info.h | 4 +++ src/shader_recompiler/profile.h | 1 + src/shader_recompiler/specialization.h | 34 +++++++++++++++++-- src/video_core/amdgpu/pixel_format.h | 17 ++++++++++ src/video_core/buffer_cache/buffer_cache.cpp | 2 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 3 +- .../renderer_vulkan/vk_instance.cpp | 7 ++++ src/video_core/renderer_vulkan/vk_instance.h | 7 ++++ .../renderer_vulkan/vk_pipeline_cache.cpp | 8 ++--- 9 files changed, 74 insertions(+), 9 deletions(-) diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index c7ae2a1e5..fa61d62a9 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -130,6 +130,10 @@ struct Info { u8 dword_offset; InstanceIdType instance_step_rate; s32 instance_data_buf; + + [[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept { + return info.ReadUdReg(sgpr_base, dword_offset); + } }; boost::container::static_vector vs_inputs{}; diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index bbda731e0..a2b62fb55 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -24,6 +24,7 @@ struct Profile { bool support_explicit_workgroup_layout{}; bool has_broken_spirv_clamp{}; bool lower_left_origin_mode{}; + bool support_legacy_vertex_attributes{}; u64 min_ssbo_alignment{}; }; diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index 225b164b5..222cc0192 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -12,6 +12,12 @@ namespace Shader { +struct VsInputSpecialization { + AmdGpu::NumberClass num_class{}; + + auto operator<=>(const VsInputSpecialization&) const = default; +}; + struct BufferSpecialization { u16 stride : 14; u16 is_storage : 1; @@ -51,19 +57,27 @@ struct StageSpecialization { const Shader::Info* info; RuntimeInfo runtime_info; std::bitset bitset{}; + boost::container::small_vector vs_inputs; boost::container::small_vector buffers; boost::container::small_vector tex_buffers; boost::container::small_vector images; boost::container::small_vector fmasks; Backend::Bindings start{}; - explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_, - Backend::Bindings start_) + explicit StageSpecialization(const Info& info_, RuntimeInfo runtime_info_, + const Profile& profile_, Backend::Bindings start_) : info{&info_}, runtime_info{runtime_info_}, start{start_} { u32 binding{}; if (info->has_readconst) { binding++; } + if (info_.stage == Stage::Vertex && !profile_.support_legacy_vertex_attributes) { + // Specialize shader on VS input number types to follow spec. + ForEachSharp(vs_inputs, info->vs_inputs, + [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { + spec.num_class = AmdGpu::GetNumberClass(sharp.GetNumberFmt()); + }); + } ForEachSharp(binding, buffers, info->buffers, [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { spec.stride = sharp.GetStride(); @@ -86,6 +100,17 @@ struct StageSpecialization { }); } + void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) { + for (const auto& desc : desc_list) { + auto& spec = spec_list.emplace_back(); + const auto sharp = desc.GetSharp(*info); + if (!sharp) { + continue; + } + func(spec, desc, sharp); + } + } + void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) { for (const auto& desc : desc_list) { auto& spec = spec_list.emplace_back(); @@ -113,6 +138,11 @@ struct StageSpecialization { if (info->has_readconst) { binding++; } + for (u32 i = 0; i < vs_inputs.size(); i++) { + if (vs_inputs[i] != other.vs_inputs[i]) { + return false; + } + } for (u32 i = 0; i < buffers.size(); i++) { if (other.bitset[binding++] && buffers[i] != other.buffers[i]) { return false; diff --git a/src/video_core/amdgpu/pixel_format.h b/src/video_core/amdgpu/pixel_format.h index e83313ea4..b8f8aa1a4 100644 --- a/src/video_core/amdgpu/pixel_format.h +++ b/src/video_core/amdgpu/pixel_format.h @@ -10,10 +10,27 @@ namespace AmdGpu { +enum NumberClass { + Float, + Sint, + Uint, +}; + [[nodiscard]] constexpr bool IsInteger(NumberFormat nfmt) { return nfmt == AmdGpu::NumberFormat::Sint || nfmt == AmdGpu::NumberFormat::Uint; } +[[nodiscard]] constexpr NumberClass GetNumberClass(NumberFormat nfmt) { + switch (nfmt) { + case NumberFormat::Sint: + return Sint; + case NumberFormat::Uint: + return Uint; + default: + return Float; + } +} + [[nodiscard]] std::string_view NameOf(DataFormat fmt); [[nodiscard]] std::string_view NameOf(NumberFormat fmt); diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 77b353c2f..63d8ea632 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -157,7 +157,7 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) { continue; } - const auto& buffer = vs_info.ReadUdReg(input.sgpr_base, input.dword_offset); + const auto& buffer = input.GetSharp(vs_info); if (buffer.GetSize() == 0) { continue; } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index d0d16ac75..b0aac2dbb 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -55,8 +55,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul continue; } - const auto buffer = - vs_info->ReadUdReg(input.sgpr_base, input.dword_offset); + const auto buffer = input.GetSharp(*vs_info); if (buffer.GetSize() == 0) { continue; } diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 580458e7e..dfd0f447c 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -264,6 +264,7 @@ bool Instance::CreateDevice() { const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME); list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME); maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME); + legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME); // These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2 // with extensions. @@ -399,6 +400,9 @@ bool Instance::CreateDevice() { vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT{ .primitiveTopologyListRestart = true, }, + vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT{ + .legacyVertexAttributes = true, + }, #ifdef __APPLE__ feature_chain.get(), #endif @@ -438,6 +442,9 @@ bool Instance::CreateDevice() { if (!vertex_input_dynamic_state) { device_chain.unlink(); } + if (!legacy_vertex_attributes) { + device_chain.unlink(); + } auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get()); if (device_result != vk::Result::eSuccess) { diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 51c2c57c5..0e33b8443 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -143,10 +143,16 @@ public: return maintenance5; } + /// Returns true when VK_EXT_primitive_topology_list_restart is supported. bool IsListRestartSupported() const { return list_restart; } + /// Returns true when VK_EXT_legacy_vertex_attributes is supported. + bool IsLegacyVertexAttributesSupported() const { + return legacy_vertex_attributes; + } + /// Returns true when geometry shaders are supported by the device bool IsGeometryStageSupported() const { return features.geometryShader; @@ -315,6 +321,7 @@ private: bool null_descriptor{}; bool maintenance5{}; bool list_restart{}; + bool legacy_vertex_attributes{}; u64 min_imported_host_pointer_alignment{}; u32 subgroup_size{}; bool tooling_info{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 612e950bb..6dc03aec8 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -169,6 +169,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, .support_fp32_denorm_preserve = bool(vk12_props.shaderDenormPreserveFloat32), .support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32), .support_explicit_workgroup_layout = true, + .support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(), }; auto [cache_result, cache] = instance.GetDevice().createPipelineCacheUnique({}); ASSERT_MSG(cache_result == vk::Result::eSuccess, "Failed to create pipeline cache: {}", @@ -347,8 +348,7 @@ bool PipelineCache::RefreshGraphicsKey() { input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) { continue; } - const auto& buffer = - vs_info->ReadUdReg(input.sgpr_base, input.dword_offset); + const auto& buffer = input.GetSharp(*vs_info); if (buffer.GetSize() == 0) { continue; } @@ -431,7 +431,7 @@ std::tuple PipelineCache::GetProgram Program* program = program_pool.Create(stage, params); auto start = binding; const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding); - const auto spec = Shader::StageSpecialization(program->info, runtime_info, start); + const auto spec = Shader::StageSpecialization(program->info, runtime_info, profile, start); program->AddPermut(module, std::move(spec)); it_pgm.value() = program; return std::make_tuple(&program->info, module, HashCombine(params.hash, 0)); @@ -440,7 +440,7 @@ std::tuple PipelineCache::GetProgram Program* program = it_pgm->second; auto& info = program->info; info.RefreshFlatBuf(); - const auto spec = Shader::StageSpecialization(info, runtime_info, binding); + const auto spec = Shader::StageSpecialization(info, runtime_info, profile, binding); size_t perm_idx = program->modules.size(); vk::ShaderModule module{};