shader: Specialize on vertex input number types if needed.

This commit is contained in:
squidbus 2024-12-01 10:56:01 -08:00
parent 0835dc71b3
commit 30b292a787
9 changed files with 74 additions and 9 deletions

View File

@ -130,6 +130,10 @@ struct Info {
u8 dword_offset;
InstanceIdType instance_step_rate;
s32 instance_data_buf;
[[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept {
return info.ReadUdReg<AmdGpu::Buffer>(sgpr_base, dword_offset);
}
};
boost::container::static_vector<VsInput, 32> vs_inputs{};

View File

@ -24,6 +24,7 @@ struct Profile {
bool support_explicit_workgroup_layout{};
bool has_broken_spirv_clamp{};
bool lower_left_origin_mode{};
bool support_legacy_vertex_attributes{};
u64 min_ssbo_alignment{};
};

View File

@ -12,6 +12,12 @@
namespace Shader {
struct VsInputSpecialization {
AmdGpu::NumberClass num_class{};
auto operator<=>(const VsInputSpecialization&) const = default;
};
struct BufferSpecialization {
u16 stride : 14;
u16 is_storage : 1;
@ -51,19 +57,27 @@ struct StageSpecialization {
const Shader::Info* info;
RuntimeInfo runtime_info;
std::bitset<MaxStageResources> bitset{};
boost::container::small_vector<VsInputSpecialization, 32> vs_inputs;
boost::container::small_vector<BufferSpecialization, 16> buffers;
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
boost::container::small_vector<ImageSpecialization, 16> images;
boost::container::small_vector<FMaskSpecialization, 8> fmasks;
Backend::Bindings start{};
explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_,
Backend::Bindings start_)
explicit StageSpecialization(const Info& info_, RuntimeInfo runtime_info_,
const Profile& profile_, Backend::Bindings start_)
: info{&info_}, runtime_info{runtime_info_}, start{start_} {
u32 binding{};
if (info->has_readconst) {
binding++;
}
if (info_.stage == Stage::Vertex && !profile_.support_legacy_vertex_attributes) {
// Specialize shader on VS input number types to follow spec.
ForEachSharp(vs_inputs, info->vs_inputs,
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
spec.num_class = AmdGpu::GetNumberClass(sharp.GetNumberFmt());
});
}
ForEachSharp(binding, buffers, info->buffers,
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
spec.stride = sharp.GetStride();
@ -86,6 +100,17 @@ struct StageSpecialization {
});
}
void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) {
for (const auto& desc : desc_list) {
auto& spec = spec_list.emplace_back();
const auto sharp = desc.GetSharp(*info);
if (!sharp) {
continue;
}
func(spec, desc, sharp);
}
}
void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) {
for (const auto& desc : desc_list) {
auto& spec = spec_list.emplace_back();
@ -113,6 +138,11 @@ struct StageSpecialization {
if (info->has_readconst) {
binding++;
}
for (u32 i = 0; i < vs_inputs.size(); i++) {
if (vs_inputs[i] != other.vs_inputs[i]) {
return false;
}
}
for (u32 i = 0; i < buffers.size(); i++) {
if (other.bitset[binding++] && buffers[i] != other.buffers[i]) {
return false;

View File

@ -10,10 +10,27 @@
namespace AmdGpu {
enum NumberClass {
Float,
Sint,
Uint,
};
[[nodiscard]] constexpr bool IsInteger(NumberFormat nfmt) {
return nfmt == AmdGpu::NumberFormat::Sint || nfmt == AmdGpu::NumberFormat::Uint;
}
[[nodiscard]] constexpr NumberClass GetNumberClass(NumberFormat nfmt) {
switch (nfmt) {
case NumberFormat::Sint:
return Sint;
case NumberFormat::Uint:
return Uint;
default:
return Float;
}
}
[[nodiscard]] std::string_view NameOf(DataFormat fmt);
[[nodiscard]] std::string_view NameOf(NumberFormat fmt);

View File

@ -157,7 +157,7 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
continue;
}
const auto& buffer = vs_info.ReadUdReg<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
const auto& buffer = input.GetSharp(vs_info);
if (buffer.GetSize() == 0) {
continue;
}

View File

@ -55,8 +55,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
continue;
}
const auto buffer =
vs_info->ReadUdReg<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
const auto buffer = input.GetSharp(*vs_info);
if (buffer.GetSize() == 0) {
continue;
}

View File

@ -264,6 +264,7 @@ bool Instance::CreateDevice() {
const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME);
maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME);
legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME);
// These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2
// with extensions.
@ -399,6 +400,9 @@ bool Instance::CreateDevice() {
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT{
.primitiveTopologyListRestart = true,
},
vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT{
.legacyVertexAttributes = true,
},
#ifdef __APPLE__
feature_chain.get<vk::PhysicalDevicePortabilitySubsetFeaturesKHR>(),
#endif
@ -438,6 +442,9 @@ bool Instance::CreateDevice() {
if (!vertex_input_dynamic_state) {
device_chain.unlink<vk::PhysicalDeviceVertexInputDynamicStateFeaturesEXT>();
}
if (!legacy_vertex_attributes) {
device_chain.unlink<vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT>();
}
auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get());
if (device_result != vk::Result::eSuccess) {

View File

@ -143,10 +143,16 @@ public:
return maintenance5;
}
/// Returns true when VK_EXT_primitive_topology_list_restart is supported.
bool IsListRestartSupported() const {
return list_restart;
}
/// Returns true when VK_EXT_legacy_vertex_attributes is supported.
bool IsLegacyVertexAttributesSupported() const {
return legacy_vertex_attributes;
}
/// Returns true when geometry shaders are supported by the device
bool IsGeometryStageSupported() const {
return features.geometryShader;
@ -315,6 +321,7 @@ private:
bool null_descriptor{};
bool maintenance5{};
bool list_restart{};
bool legacy_vertex_attributes{};
u64 min_imported_host_pointer_alignment{};
u32 subgroup_size{};
bool tooling_info{};

View File

@ -169,6 +169,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
.support_fp32_denorm_preserve = bool(vk12_props.shaderDenormPreserveFloat32),
.support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32),
.support_explicit_workgroup_layout = true,
.support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(),
};
auto [cache_result, cache] = instance.GetDevice().createPipelineCacheUnique({});
ASSERT_MSG(cache_result == vk::Result::eSuccess, "Failed to create pipeline cache: {}",
@ -347,8 +348,7 @@ bool PipelineCache::RefreshGraphicsKey() {
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
continue;
}
const auto& buffer =
vs_info->ReadUdReg<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
const auto& buffer = input.GetSharp(*vs_info);
if (buffer.GetSize() == 0) {
continue;
}
@ -431,7 +431,7 @@ std::tuple<const Shader::Info*, vk::ShaderModule, u64> PipelineCache::GetProgram
Program* program = program_pool.Create(stage, params);
auto start = binding;
const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding);
const auto spec = Shader::StageSpecialization(program->info, runtime_info, start);
const auto spec = Shader::StageSpecialization(program->info, runtime_info, profile, start);
program->AddPermut(module, std::move(spec));
it_pgm.value() = program;
return std::make_tuple(&program->info, module, HashCombine(params.hash, 0));
@ -440,7 +440,7 @@ std::tuple<const Shader::Info*, vk::ShaderModule, u64> PipelineCache::GetProgram
Program* program = it_pgm->second;
auto& info = program->info;
info.RefreshFlatBuf();
const auto spec = Shader::StageSpecialization(info, runtime_info, binding);
const auto spec = Shader::StageSpecialization(info, runtime_info, profile, binding);
size_t perm_idx = program->modules.size();
vk::ShaderModule module{};