shader: Specialize on vertex input number types if needed.

This commit is contained in:
squidbus 2024-12-01 10:56:01 -08:00
parent 0835dc71b3
commit 30b292a787
9 changed files with 74 additions and 9 deletions

View File

@ -130,6 +130,10 @@ struct Info {
u8 dword_offset; u8 dword_offset;
InstanceIdType instance_step_rate; InstanceIdType instance_step_rate;
s32 instance_data_buf; s32 instance_data_buf;
[[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept {
return info.ReadUdReg<AmdGpu::Buffer>(sgpr_base, dword_offset);
}
}; };
boost::container::static_vector<VsInput, 32> vs_inputs{}; boost::container::static_vector<VsInput, 32> vs_inputs{};

View File

@ -24,6 +24,7 @@ struct Profile {
bool support_explicit_workgroup_layout{}; bool support_explicit_workgroup_layout{};
bool has_broken_spirv_clamp{}; bool has_broken_spirv_clamp{};
bool lower_left_origin_mode{}; bool lower_left_origin_mode{};
bool support_legacy_vertex_attributes{};
u64 min_ssbo_alignment{}; u64 min_ssbo_alignment{};
}; };

View File

@ -12,6 +12,12 @@
namespace Shader { namespace Shader {
struct VsInputSpecialization {
AmdGpu::NumberClass num_class{};
auto operator<=>(const VsInputSpecialization&) const = default;
};
struct BufferSpecialization { struct BufferSpecialization {
u16 stride : 14; u16 stride : 14;
u16 is_storage : 1; u16 is_storage : 1;
@ -51,19 +57,27 @@ struct StageSpecialization {
const Shader::Info* info; const Shader::Info* info;
RuntimeInfo runtime_info; RuntimeInfo runtime_info;
std::bitset<MaxStageResources> bitset{}; std::bitset<MaxStageResources> bitset{};
boost::container::small_vector<VsInputSpecialization, 32> vs_inputs;
boost::container::small_vector<BufferSpecialization, 16> buffers; boost::container::small_vector<BufferSpecialization, 16> buffers;
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers; boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
boost::container::small_vector<ImageSpecialization, 16> images; boost::container::small_vector<ImageSpecialization, 16> images;
boost::container::small_vector<FMaskSpecialization, 8> fmasks; boost::container::small_vector<FMaskSpecialization, 8> fmasks;
Backend::Bindings start{}; Backend::Bindings start{};
explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_, explicit StageSpecialization(const Info& info_, RuntimeInfo runtime_info_,
Backend::Bindings start_) const Profile& profile_, Backend::Bindings start_)
: info{&info_}, runtime_info{runtime_info_}, start{start_} { : info{&info_}, runtime_info{runtime_info_}, start{start_} {
u32 binding{}; u32 binding{};
if (info->has_readconst) { if (info->has_readconst) {
binding++; binding++;
} }
if (info_.stage == Stage::Vertex && !profile_.support_legacy_vertex_attributes) {
// Specialize shader on VS input number types to follow spec.
ForEachSharp(vs_inputs, info->vs_inputs,
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
spec.num_class = AmdGpu::GetNumberClass(sharp.GetNumberFmt());
});
}
ForEachSharp(binding, buffers, info->buffers, ForEachSharp(binding, buffers, info->buffers,
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
spec.stride = sharp.GetStride(); spec.stride = sharp.GetStride();
@ -86,6 +100,17 @@ struct StageSpecialization {
}); });
} }
void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) {
for (const auto& desc : desc_list) {
auto& spec = spec_list.emplace_back();
const auto sharp = desc.GetSharp(*info);
if (!sharp) {
continue;
}
func(spec, desc, sharp);
}
}
void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) { void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) {
for (const auto& desc : desc_list) { for (const auto& desc : desc_list) {
auto& spec = spec_list.emplace_back(); auto& spec = spec_list.emplace_back();
@ -113,6 +138,11 @@ struct StageSpecialization {
if (info->has_readconst) { if (info->has_readconst) {
binding++; binding++;
} }
for (u32 i = 0; i < vs_inputs.size(); i++) {
if (vs_inputs[i] != other.vs_inputs[i]) {
return false;
}
}
for (u32 i = 0; i < buffers.size(); i++) { for (u32 i = 0; i < buffers.size(); i++) {
if (other.bitset[binding++] && buffers[i] != other.buffers[i]) { if (other.bitset[binding++] && buffers[i] != other.buffers[i]) {
return false; return false;

View File

@ -10,10 +10,27 @@
namespace AmdGpu { namespace AmdGpu {
enum NumberClass {
Float,
Sint,
Uint,
};
[[nodiscard]] constexpr bool IsInteger(NumberFormat nfmt) { [[nodiscard]] constexpr bool IsInteger(NumberFormat nfmt) {
return nfmt == AmdGpu::NumberFormat::Sint || nfmt == AmdGpu::NumberFormat::Uint; return nfmt == AmdGpu::NumberFormat::Sint || nfmt == AmdGpu::NumberFormat::Uint;
} }
[[nodiscard]] constexpr NumberClass GetNumberClass(NumberFormat nfmt) {
switch (nfmt) {
case NumberFormat::Sint:
return Sint;
case NumberFormat::Uint:
return Uint;
default:
return Float;
}
}
[[nodiscard]] std::string_view NameOf(DataFormat fmt); [[nodiscard]] std::string_view NameOf(DataFormat fmt);
[[nodiscard]] std::string_view NameOf(NumberFormat fmt); [[nodiscard]] std::string_view NameOf(NumberFormat fmt);

View File

@ -157,7 +157,7 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
continue; continue;
} }
const auto& buffer = vs_info.ReadUdReg<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset); const auto& buffer = input.GetSharp(vs_info);
if (buffer.GetSize() == 0) { if (buffer.GetSize() == 0) {
continue; continue;
} }

View File

@ -55,8 +55,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
continue; continue;
} }
const auto buffer = const auto buffer = input.GetSharp(*vs_info);
vs_info->ReadUdReg<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
if (buffer.GetSize() == 0) { if (buffer.GetSize() == 0) {
continue; continue;
} }

View File

@ -264,6 +264,7 @@ bool Instance::CreateDevice() {
const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME); const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME); list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME);
maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME); maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME);
legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME);
// These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2 // These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2
// with extensions. // with extensions.
@ -399,6 +400,9 @@ bool Instance::CreateDevice() {
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT{ vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT{
.primitiveTopologyListRestart = true, .primitiveTopologyListRestart = true,
}, },
vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT{
.legacyVertexAttributes = true,
},
#ifdef __APPLE__ #ifdef __APPLE__
feature_chain.get<vk::PhysicalDevicePortabilitySubsetFeaturesKHR>(), feature_chain.get<vk::PhysicalDevicePortabilitySubsetFeaturesKHR>(),
#endif #endif
@ -438,6 +442,9 @@ bool Instance::CreateDevice() {
if (!vertex_input_dynamic_state) { if (!vertex_input_dynamic_state) {
device_chain.unlink<vk::PhysicalDeviceVertexInputDynamicStateFeaturesEXT>(); device_chain.unlink<vk::PhysicalDeviceVertexInputDynamicStateFeaturesEXT>();
} }
if (!legacy_vertex_attributes) {
device_chain.unlink<vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT>();
}
auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get()); auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get());
if (device_result != vk::Result::eSuccess) { if (device_result != vk::Result::eSuccess) {

View File

@ -143,10 +143,16 @@ public:
return maintenance5; return maintenance5;
} }
/// Returns true when VK_EXT_primitive_topology_list_restart is supported.
bool IsListRestartSupported() const { bool IsListRestartSupported() const {
return list_restart; return list_restart;
} }
/// Returns true when VK_EXT_legacy_vertex_attributes is supported.
bool IsLegacyVertexAttributesSupported() const {
return legacy_vertex_attributes;
}
/// Returns true when geometry shaders are supported by the device /// Returns true when geometry shaders are supported by the device
bool IsGeometryStageSupported() const { bool IsGeometryStageSupported() const {
return features.geometryShader; return features.geometryShader;
@ -315,6 +321,7 @@ private:
bool null_descriptor{}; bool null_descriptor{};
bool maintenance5{}; bool maintenance5{};
bool list_restart{}; bool list_restart{};
bool legacy_vertex_attributes{};
u64 min_imported_host_pointer_alignment{}; u64 min_imported_host_pointer_alignment{};
u32 subgroup_size{}; u32 subgroup_size{};
bool tooling_info{}; bool tooling_info{};

View File

@ -169,6 +169,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
.support_fp32_denorm_preserve = bool(vk12_props.shaderDenormPreserveFloat32), .support_fp32_denorm_preserve = bool(vk12_props.shaderDenormPreserveFloat32),
.support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32), .support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32),
.support_explicit_workgroup_layout = true, .support_explicit_workgroup_layout = true,
.support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(),
}; };
auto [cache_result, cache] = instance.GetDevice().createPipelineCacheUnique({}); auto [cache_result, cache] = instance.GetDevice().createPipelineCacheUnique({});
ASSERT_MSG(cache_result == vk::Result::eSuccess, "Failed to create pipeline cache: {}", ASSERT_MSG(cache_result == vk::Result::eSuccess, "Failed to create pipeline cache: {}",
@ -347,8 +348,7 @@ bool PipelineCache::RefreshGraphicsKey() {
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) { input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
continue; continue;
} }
const auto& buffer = const auto& buffer = input.GetSharp(*vs_info);
vs_info->ReadUdReg<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
if (buffer.GetSize() == 0) { if (buffer.GetSize() == 0) {
continue; continue;
} }
@ -431,7 +431,7 @@ std::tuple<const Shader::Info*, vk::ShaderModule, u64> PipelineCache::GetProgram
Program* program = program_pool.Create(stage, params); Program* program = program_pool.Create(stage, params);
auto start = binding; auto start = binding;
const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding); const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding);
const auto spec = Shader::StageSpecialization(program->info, runtime_info, start); const auto spec = Shader::StageSpecialization(program->info, runtime_info, profile, start);
program->AddPermut(module, std::move(spec)); program->AddPermut(module, std::move(spec));
it_pgm.value() = program; it_pgm.value() = program;
return std::make_tuple(&program->info, module, HashCombine(params.hash, 0)); return std::make_tuple(&program->info, module, HashCombine(params.hash, 0));
@ -440,7 +440,7 @@ std::tuple<const Shader::Info*, vk::ShaderModule, u64> PipelineCache::GetProgram
Program* program = it_pgm->second; Program* program = it_pgm->second;
auto& info = program->info; auto& info = program->info;
info.RefreshFlatBuf(); info.RefreshFlatBuf();
const auto spec = Shader::StageSpecialization(info, runtime_info, binding); const auto spec = Shader::StageSpecialization(info, runtime_info, profile, binding);
size_t perm_idx = program->modules.size(); size_t perm_idx = program->modules.size();
vk::ShaderModule module{}; vk::ShaderModule module{};