From 028df5dfef3e0b0ab115abfd53852368d9fd0132 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Tue, 3 Dec 2024 08:12:27 -0800 Subject: [PATCH] shader_recompiler: Specialize on vertex attribute number types. --- .../backend/spirv/spirv_emit_context.cpp | 33 ++++++----------- .../frontend/fetch_shader.cpp | 8 ++++- src/shader_recompiler/frontend/fetch_shader.h | 7 +++- .../frontend/translate/translate.cpp | 4 ++- src/shader_recompiler/info.h | 31 ---------------- src/shader_recompiler/profile.h | 1 + src/shader_recompiler/specialization.h | 36 +++++++++++++++++-- src/video_core/amdgpu/pixel_format.h | 19 +++++++++- src/video_core/buffer_cache/buffer_cache.cpp | 2 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- .../renderer_vulkan/vk_graphics_pipeline.h | 1 + .../renderer_vulkan/vk_instance.cpp | 7 ++++ src/video_core/renderer_vulkan/vk_instance.h | 7 ++++ .../renderer_vulkan/vk_pipeline_cache.cpp | 7 ++-- .../renderer_vulkan/vk_rasterizer.cpp | 11 +++++- 15 files changed, 110 insertions(+), 66 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 4b3aa0621..4ce9f4221 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -4,6 +4,7 @@ #include "common/assert.h" #include "common/div_ceil.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h" +#include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/ir/passes/srt.h" #include "video_core/amdgpu/types.h" @@ -155,18 +156,12 @@ void EmitContext::DefineInterfaces() { } const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) { - switch (fmt) { - case AmdGpu::NumberFormat::Float: - case AmdGpu::NumberFormat::Unorm: - case AmdGpu::NumberFormat::Snorm: - case AmdGpu::NumberFormat::SnormNz: - case AmdGpu::NumberFormat::Sscaled: - case AmdGpu::NumberFormat::Uscaled: - case AmdGpu::NumberFormat::Srgb: + switch (GetNumberClass(fmt)) { + case AmdGpu::NumberClass::Float: return ctx.F32; - case AmdGpu::NumberFormat::Sint: + case AmdGpu::NumberClass::Sint: return ctx.S32; - case AmdGpu::NumberFormat::Uint: + case AmdGpu::NumberClass::Uint: return ctx.U32; default: break; @@ -176,18 +171,12 @@ const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) { EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id, u32 num_components, bool output) { - switch (fmt) { - case AmdGpu::NumberFormat::Float: - case AmdGpu::NumberFormat::Unorm: - case AmdGpu::NumberFormat::Snorm: - case AmdGpu::NumberFormat::SnormNz: - case AmdGpu::NumberFormat::Sscaled: - case AmdGpu::NumberFormat::Uscaled: - case AmdGpu::NumberFormat::Srgb: + switch (GetNumberClass(fmt)) { + case AmdGpu::NumberClass::Float: return {id, output ? output_f32 : input_f32, F32[1], num_components, false}; - case AmdGpu::NumberFormat::Uint: + case AmdGpu::NumberClass::Uint: return {id, output ? output_u32 : input_u32, U32[1], num_components, true}; - case AmdGpu::NumberFormat::Sint: + case AmdGpu::NumberClass::Sint: return {id, output ? output_s32 : input_s32, S32[1], num_components, true}; default: break; @@ -280,13 +269,13 @@ void EmitContext::DefineInputs() { base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input); instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input); - const auto fetch_shader = info.LoadFetchShader(); + const auto fetch_shader = Gcn::ParseFetchShader(info); if (!fetch_shader) { break; } for (const auto& attrib : fetch_shader->attributes) { ASSERT(attrib.semantic < IR::NumParams); - const auto sharp = info.GetSharp(attrib); + const auto sharp = attrib.GetSharp(info); const Id type{GetAttributeType(*this, sharp.GetNumberFmt())[4]}; if (attrib.UsesStepRates()) { const u32 rate_idx = diff --git a/src/shader_recompiler/frontend/fetch_shader.cpp b/src/shader_recompiler/frontend/fetch_shader.cpp index d302bdeaf..8ae664d79 100644 --- a/src/shader_recompiler/frontend/fetch_shader.cpp +++ b/src/shader_recompiler/frontend/fetch_shader.cpp @@ -34,7 +34,13 @@ namespace Shader::Gcn { * We take the reverse way, extract the original input semantics from these instructions. **/ -FetchShaderData ParseFetchShader(const u32* code) { +std::optional ParseFetchShader(const Shader::Info& info) { + if (!info.has_fetch_shader) { + return std::nullopt; + } + const u32* code; + std::memcpy(&code, &info.user_data[info.fetch_shader_sgpr_base], sizeof(code)); + FetchShaderData data{.code = code}; GcnCodeSlice code_slice(code, code + std::numeric_limits::max()); GcnDecodeContext decoder; diff --git a/src/shader_recompiler/frontend/fetch_shader.h b/src/shader_recompiler/frontend/fetch_shader.h index 213b19e07..080b0eb22 100644 --- a/src/shader_recompiler/frontend/fetch_shader.h +++ b/src/shader_recompiler/frontend/fetch_shader.h @@ -6,6 +6,7 @@ #include #include #include "common/types.h" +#include "shader_recompiler/info.h" namespace Shader::Gcn { @@ -33,6 +34,10 @@ struct VertexAttribute { return step_rate == OverStepRate0 || step_rate == OverStepRate1; } + [[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Shader::Info& info) const noexcept { + return info.ReadUdReg(sgpr_base, dword_offset); + } + bool operator==(const VertexAttribute& other) const { return semantic == other.semantic && dest_vgpr == other.dest_vgpr && num_elements == other.num_elements && sgpr_base == other.sgpr_base && @@ -59,6 +64,6 @@ struct FetchShaderData { } }; -FetchShaderData ParseFetchShader(const u32* code); +std::optional ParseFetchShader(const Shader::Info& info); } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 142ba64b1..68625a12b 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -370,7 +370,9 @@ void Translator::EmitFetch(const GcnInst& inst) { // Read the pointer to the fetch shader assembly. info.has_fetch_shader = true; info.fetch_shader_sgpr_base = inst.src[0].code; - const auto fetch_data = info.LoadFetchShader(); + + const auto fetch_data = ParseFetchShader(info); + ASSERT(fetch_data.has_value()); if (Config::dumpShaders()) { using namespace Common::FS; diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index b1660a5c3..d382d0e7c 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -9,7 +9,6 @@ #include #include "common/assert.h" #include "common/types.h" -#include "frontend/fetch_shader.h" #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/frontend/copy_shader.h" #include "shader_recompiler/ir/attribute.h" @@ -231,22 +230,6 @@ struct Info { bnd.user_data += ud_mask.NumRegs(); } - [[nodiscard]] std::pair GetDrawOffsets( - const AmdGpu::Liverpool::Regs& regs, - const std::optional& fetch_shader) const { - u32 vertex_offset = regs.index_offset; - u32 instance_offset = 0; - if (fetch_shader) { - if (vertex_offset == 0 && fetch_shader->vertex_offset_sgpr != -1) { - vertex_offset = user_data[fetch_shader->vertex_offset_sgpr]; - } - if (fetch_shader->instance_offset_sgpr != -1) { - instance_offset = user_data[fetch_shader->instance_offset_sgpr]; - } - } - return {vertex_offset, instance_offset}; - } - void RefreshFlatBuf() { flattened_ud_buf.resize(srt_info.flattened_bufsize_dw); ASSERT(user_data.size() <= NumUserDataRegs); @@ -256,20 +239,6 @@ struct Info { srt_info.walker_func(user_data.data(), flattened_ud_buf.data()); } } - - [[nodiscard]] std::optional LoadFetchShader() const { - if (!has_fetch_shader) { - return std::nullopt; - } - const u32* code; - std::memcpy(&code, &user_data[fetch_shader_sgpr_base], sizeof(code)); - return Gcn::ParseFetchShader(code); - } - - [[nodiscard]] constexpr AmdGpu::Buffer GetSharp( - const Gcn::VertexAttribute& attrib) const noexcept { - return ReadUdReg(attrib.sgpr_base, attrib.dword_offset); - } }; constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index a868ab76c..96c458d44 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -22,6 +22,7 @@ struct Profile { bool support_fp32_denorm_preserve{}; bool support_fp32_denorm_flush{}; bool support_explicit_workgroup_layout{}; + bool support_legacy_vertex_attributes{}; bool has_broken_spirv_clamp{}; bool lower_left_origin_mode{}; bool needs_manual_interpolation{}; diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index 299e6bd44..740b89dda 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -13,6 +13,12 @@ namespace Shader { +struct VsAttribSpecialization { + AmdGpu::NumberClass num_class{}; + + auto operator<=>(const VsAttribSpecialization&) const = default; +}; + struct BufferSpecialization { u16 stride : 14; u16 is_storage : 1; @@ -52,6 +58,7 @@ struct StageSpecialization { const Shader::Info* info; RuntimeInfo runtime_info; Gcn::FetchShaderData fetch_shader_data{}; + boost::container::small_vector vs_attribs; std::bitset bitset{}; boost::container::small_vector buffers; boost::container::small_vector tex_buffers; @@ -59,11 +66,18 @@ struct StageSpecialization { boost::container::small_vector fmasks; Backend::Bindings start{}; - explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_, - Backend::Bindings start_) + explicit StageSpecialization(const Info& info_, RuntimeInfo runtime_info_, + const Profile& profile_, Backend::Bindings start_) : info{&info_}, runtime_info{runtime_info_}, start{start_} { - if (const auto fetch_shader = info_.LoadFetchShader()) { + if (const auto fetch_shader = Gcn::ParseFetchShader(info_)) { fetch_shader_data = *fetch_shader; + if (info_.stage == Stage::Vertex && !profile_.support_legacy_vertex_attributes) { + // Specialize shader on VS input number types to follow spec. + ForEachSharp(vs_attribs, fetch_shader_data.attributes, + [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { + spec.num_class = AmdGpu::GetNumberClass(sharp.GetNumberFmt()); + }); + } } u32 binding{}; if (info->has_readconst) { @@ -90,6 +104,17 @@ struct StageSpecialization { }); } + void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) { + for (const auto& desc : desc_list) { + auto& spec = spec_list.emplace_back(); + const auto sharp = desc.GetSharp(*info); + if (!sharp) { + continue; + } + func(spec, desc, sharp); + } + } + void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) { for (const auto& desc : desc_list) { auto& spec = spec_list.emplace_back(); @@ -113,6 +138,11 @@ struct StageSpecialization { if (fetch_shader_data != other.fetch_shader_data) { return false; } + for (u32 i = 0; i < vs_attribs.size(); i++) { + if (vs_attribs[i] != other.vs_attribs[i]) { + return false; + } + } u32 binding{}; if (info->has_readconst != other.info->has_readconst) { return false; diff --git a/src/video_core/amdgpu/pixel_format.h b/src/video_core/amdgpu/pixel_format.h index e83313ea4..38c81ba5f 100644 --- a/src/video_core/amdgpu/pixel_format.h +++ b/src/video_core/amdgpu/pixel_format.h @@ -10,7 +10,24 @@ namespace AmdGpu { -[[nodiscard]] constexpr bool IsInteger(NumberFormat nfmt) { +enum NumberClass { + Float, + Sint, + Uint, +}; + +[[nodiscard]] constexpr NumberClass GetNumberClass(const NumberFormat nfmt) { + switch (nfmt) { + case NumberFormat::Sint: + return Sint; + case NumberFormat::Uint: + return Uint; + default: + return Float; + } +} + +[[nodiscard]] constexpr bool IsInteger(const NumberFormat nfmt) { return nfmt == AmdGpu::NumberFormat::Sint || nfmt == AmdGpu::NumberFormat::Uint; } diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 0beb16d27..890dbfec8 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -158,7 +158,7 @@ bool BufferCache::BindVertexBuffers( continue; } - const auto& buffer = vs_info.GetSharp(attrib); + const auto& buffer = attrib.GetSharp(vs_info); if (buffer.GetSize() == 0) { continue; } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 44b57d537..d53204c77 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -59,7 +59,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul continue; } - const auto buffer = vs_info.GetSharp(attrib); + const auto buffer = attrib.GetSharp(vs_info); if (buffer.GetSize() == 0) { continue; } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 826d8e5d2..91ffe4ea4 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -4,6 +4,7 @@ #include #include "common/types.h" +#include "shader_recompiler/frontend/fetch_shader.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_common.h" #include "video_core/renderer_vulkan/vk_pipeline_common.h" diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 1c150ce28..49e4987db 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -265,6 +265,7 @@ bool Instance::CreateDevice() { const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME); list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME); maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME); + legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME); // These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2 // with extensions. @@ -403,6 +404,9 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceFragmentShaderBarycentricFeaturesKHR{ .fragmentShaderBarycentric = true, }, + vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT{ + .legacyVertexAttributes = true, + }, #ifdef __APPLE__ feature_chain.get(), #endif @@ -445,6 +449,9 @@ bool Instance::CreateDevice() { if (!fragment_shader_barycentric) { device_chain.unlink(); } + if (!legacy_vertex_attributes) { + device_chain.unlink(); + } auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get()); if (device_result != vk::Result::eSuccess) { diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 5a46ef6fe..81303c9cc 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -148,10 +148,16 @@ public: return fragment_shader_barycentric; } + /// Returns true when VK_EXT_primitive_topology_list_restart is supported. bool IsListRestartSupported() const { return list_restart; } + /// Returns true when VK_EXT_legacy_vertex_attributes is supported. + bool IsLegacyVertexAttributesSupported() const { + return legacy_vertex_attributes; + } + /// Returns true when geometry shaders are supported by the device bool IsGeometryStageSupported() const { return features.geometryShader; @@ -320,6 +326,7 @@ private: bool null_descriptor{}; bool maintenance5{}; bool list_restart{}; + bool legacy_vertex_attributes{}; u64 min_imported_host_pointer_alignment{}; u32 subgroup_size{}; bool tooling_info{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 276410d2f..47713f0ff 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -169,6 +169,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, .support_fp32_denorm_preserve = bool(vk12_props.shaderDenormPreserveFloat32), .support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32), .support_explicit_workgroup_layout = true, + .support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(), .needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() && instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, }; @@ -352,7 +353,7 @@ bool PipelineCache::RefreshGraphicsKey() { if (attrib.UsesStepRates()) { continue; } - const auto& buffer = vs_info->GetSharp(attrib); + const auto& buffer = attrib.GetSharp(*vs_info); if (buffer.GetSize() == 0) { continue; } @@ -436,7 +437,7 @@ PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params, Program* program = program_pool.Create(stage, params); auto start = binding; const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding); - const auto spec = Shader::StageSpecialization(program->info, runtime_info, start); + const auto spec = Shader::StageSpecialization(program->info, runtime_info, profile, start); program->AddPermut(module, std::move(spec)); it_pgm.value() = program; return std::make_tuple(&program->info, module, spec.fetch_shader_data, @@ -446,7 +447,7 @@ PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params, Program* program = it_pgm->second; auto& info = program->info; info.RefreshFlatBuf(); - const auto spec = Shader::StageSpecialization(info, runtime_info, binding); + const auto spec = Shader::StageSpecialization(info, runtime_info, profile, binding); size_t perm_idx = program->modules.size(); vk::ShaderModule module{}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 50534cdbe..e14c5b437 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -194,7 +194,16 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { BeginRendering(*pipeline, state); UpdateDynamicState(*pipeline); - const auto [vertex_offset, instance_offset] = vs_info.GetDrawOffsets(regs, fetch_shader); + u32 vertex_offset = regs.index_offset; + u32 instance_offset = 0; + if (fetch_shader) { + if (vertex_offset == 0 && fetch_shader->vertex_offset_sgpr != -1) { + vertex_offset = vs_info.user_data[fetch_shader->vertex_offset_sgpr]; + } + if (fetch_shader->instance_offset_sgpr != -1) { + instance_offset = vs_info.user_data[fetch_shader->instance_offset_sgpr]; + } + } const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());