mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-08-04 16:32:39 +00:00
shader: Specialize on vertex input number types if needed.
This commit is contained in:
parent
0835dc71b3
commit
30b292a787
@ -130,6 +130,10 @@ struct Info {
|
|||||||
u8 dword_offset;
|
u8 dword_offset;
|
||||||
InstanceIdType instance_step_rate;
|
InstanceIdType instance_step_rate;
|
||||||
s32 instance_data_buf;
|
s32 instance_data_buf;
|
||||||
|
|
||||||
|
[[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept {
|
||||||
|
return info.ReadUdReg<AmdGpu::Buffer>(sgpr_base, dword_offset);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
boost::container::static_vector<VsInput, 32> vs_inputs{};
|
boost::container::static_vector<VsInput, 32> vs_inputs{};
|
||||||
|
|
||||||
|
@ -24,6 +24,7 @@ struct Profile {
|
|||||||
bool support_explicit_workgroup_layout{};
|
bool support_explicit_workgroup_layout{};
|
||||||
bool has_broken_spirv_clamp{};
|
bool has_broken_spirv_clamp{};
|
||||||
bool lower_left_origin_mode{};
|
bool lower_left_origin_mode{};
|
||||||
|
bool support_legacy_vertex_attributes{};
|
||||||
u64 min_ssbo_alignment{};
|
u64 min_ssbo_alignment{};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -12,6 +12,12 @@
|
|||||||
|
|
||||||
namespace Shader {
|
namespace Shader {
|
||||||
|
|
||||||
|
struct VsInputSpecialization {
|
||||||
|
AmdGpu::NumberClass num_class{};
|
||||||
|
|
||||||
|
auto operator<=>(const VsInputSpecialization&) const = default;
|
||||||
|
};
|
||||||
|
|
||||||
struct BufferSpecialization {
|
struct BufferSpecialization {
|
||||||
u16 stride : 14;
|
u16 stride : 14;
|
||||||
u16 is_storage : 1;
|
u16 is_storage : 1;
|
||||||
@ -51,19 +57,27 @@ struct StageSpecialization {
|
|||||||
const Shader::Info* info;
|
const Shader::Info* info;
|
||||||
RuntimeInfo runtime_info;
|
RuntimeInfo runtime_info;
|
||||||
std::bitset<MaxStageResources> bitset{};
|
std::bitset<MaxStageResources> bitset{};
|
||||||
|
boost::container::small_vector<VsInputSpecialization, 32> vs_inputs;
|
||||||
boost::container::small_vector<BufferSpecialization, 16> buffers;
|
boost::container::small_vector<BufferSpecialization, 16> buffers;
|
||||||
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
|
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
|
||||||
boost::container::small_vector<ImageSpecialization, 16> images;
|
boost::container::small_vector<ImageSpecialization, 16> images;
|
||||||
boost::container::small_vector<FMaskSpecialization, 8> fmasks;
|
boost::container::small_vector<FMaskSpecialization, 8> fmasks;
|
||||||
Backend::Bindings start{};
|
Backend::Bindings start{};
|
||||||
|
|
||||||
explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_,
|
explicit StageSpecialization(const Info& info_, RuntimeInfo runtime_info_,
|
||||||
Backend::Bindings start_)
|
const Profile& profile_, Backend::Bindings start_)
|
||||||
: info{&info_}, runtime_info{runtime_info_}, start{start_} {
|
: info{&info_}, runtime_info{runtime_info_}, start{start_} {
|
||||||
u32 binding{};
|
u32 binding{};
|
||||||
if (info->has_readconst) {
|
if (info->has_readconst) {
|
||||||
binding++;
|
binding++;
|
||||||
}
|
}
|
||||||
|
if (info_.stage == Stage::Vertex && !profile_.support_legacy_vertex_attributes) {
|
||||||
|
// Specialize shader on VS input number types to follow spec.
|
||||||
|
ForEachSharp(vs_inputs, info->vs_inputs,
|
||||||
|
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
||||||
|
spec.num_class = AmdGpu::GetNumberClass(sharp.GetNumberFmt());
|
||||||
|
});
|
||||||
|
}
|
||||||
ForEachSharp(binding, buffers, info->buffers,
|
ForEachSharp(binding, buffers, info->buffers,
|
||||||
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
||||||
spec.stride = sharp.GetStride();
|
spec.stride = sharp.GetStride();
|
||||||
@ -86,6 +100,17 @@ struct StageSpecialization {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) {
|
||||||
|
for (const auto& desc : desc_list) {
|
||||||
|
auto& spec = spec_list.emplace_back();
|
||||||
|
const auto sharp = desc.GetSharp(*info);
|
||||||
|
if (!sharp) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
func(spec, desc, sharp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) {
|
void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) {
|
||||||
for (const auto& desc : desc_list) {
|
for (const auto& desc : desc_list) {
|
||||||
auto& spec = spec_list.emplace_back();
|
auto& spec = spec_list.emplace_back();
|
||||||
@ -113,6 +138,11 @@ struct StageSpecialization {
|
|||||||
if (info->has_readconst) {
|
if (info->has_readconst) {
|
||||||
binding++;
|
binding++;
|
||||||
}
|
}
|
||||||
|
for (u32 i = 0; i < vs_inputs.size(); i++) {
|
||||||
|
if (vs_inputs[i] != other.vs_inputs[i]) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
for (u32 i = 0; i < buffers.size(); i++) {
|
for (u32 i = 0; i < buffers.size(); i++) {
|
||||||
if (other.bitset[binding++] && buffers[i] != other.buffers[i]) {
|
if (other.bitset[binding++] && buffers[i] != other.buffers[i]) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -10,10 +10,27 @@
|
|||||||
|
|
||||||
namespace AmdGpu {
|
namespace AmdGpu {
|
||||||
|
|
||||||
|
enum NumberClass {
|
||||||
|
Float,
|
||||||
|
Sint,
|
||||||
|
Uint,
|
||||||
|
};
|
||||||
|
|
||||||
[[nodiscard]] constexpr bool IsInteger(NumberFormat nfmt) {
|
[[nodiscard]] constexpr bool IsInteger(NumberFormat nfmt) {
|
||||||
return nfmt == AmdGpu::NumberFormat::Sint || nfmt == AmdGpu::NumberFormat::Uint;
|
return nfmt == AmdGpu::NumberFormat::Sint || nfmt == AmdGpu::NumberFormat::Uint;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] constexpr NumberClass GetNumberClass(NumberFormat nfmt) {
|
||||||
|
switch (nfmt) {
|
||||||
|
case NumberFormat::Sint:
|
||||||
|
return Sint;
|
||||||
|
case NumberFormat::Uint:
|
||||||
|
return Uint;
|
||||||
|
default:
|
||||||
|
return Float;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
[[nodiscard]] std::string_view NameOf(DataFormat fmt);
|
[[nodiscard]] std::string_view NameOf(DataFormat fmt);
|
||||||
[[nodiscard]] std::string_view NameOf(NumberFormat fmt);
|
[[nodiscard]] std::string_view NameOf(NumberFormat fmt);
|
||||||
|
|
||||||
|
@ -157,7 +157,7 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto& buffer = vs_info.ReadUdReg<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
const auto& buffer = input.GetSharp(vs_info);
|
||||||
if (buffer.GetSize() == 0) {
|
if (buffer.GetSize() == 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -55,8 +55,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto buffer =
|
const auto buffer = input.GetSharp(*vs_info);
|
||||||
vs_info->ReadUdReg<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
|
||||||
if (buffer.GetSize() == 0) {
|
if (buffer.GetSize() == 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -264,6 +264,7 @@ bool Instance::CreateDevice() {
|
|||||||
const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
|
const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
|
||||||
list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME);
|
list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME);
|
||||||
maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME);
|
maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME);
|
||||||
|
legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME);
|
||||||
|
|
||||||
// These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2
|
// These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2
|
||||||
// with extensions.
|
// with extensions.
|
||||||
@ -399,6 +400,9 @@ bool Instance::CreateDevice() {
|
|||||||
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT{
|
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT{
|
||||||
.primitiveTopologyListRestart = true,
|
.primitiveTopologyListRestart = true,
|
||||||
},
|
},
|
||||||
|
vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT{
|
||||||
|
.legacyVertexAttributes = true,
|
||||||
|
},
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
feature_chain.get<vk::PhysicalDevicePortabilitySubsetFeaturesKHR>(),
|
feature_chain.get<vk::PhysicalDevicePortabilitySubsetFeaturesKHR>(),
|
||||||
#endif
|
#endif
|
||||||
@ -438,6 +442,9 @@ bool Instance::CreateDevice() {
|
|||||||
if (!vertex_input_dynamic_state) {
|
if (!vertex_input_dynamic_state) {
|
||||||
device_chain.unlink<vk::PhysicalDeviceVertexInputDynamicStateFeaturesEXT>();
|
device_chain.unlink<vk::PhysicalDeviceVertexInputDynamicStateFeaturesEXT>();
|
||||||
}
|
}
|
||||||
|
if (!legacy_vertex_attributes) {
|
||||||
|
device_chain.unlink<vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT>();
|
||||||
|
}
|
||||||
|
|
||||||
auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get());
|
auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get());
|
||||||
if (device_result != vk::Result::eSuccess) {
|
if (device_result != vk::Result::eSuccess) {
|
||||||
|
@ -143,10 +143,16 @@ public:
|
|||||||
return maintenance5;
|
return maintenance5;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true when VK_EXT_primitive_topology_list_restart is supported.
|
||||||
bool IsListRestartSupported() const {
|
bool IsListRestartSupported() const {
|
||||||
return list_restart;
|
return list_restart;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true when VK_EXT_legacy_vertex_attributes is supported.
|
||||||
|
bool IsLegacyVertexAttributesSupported() const {
|
||||||
|
return legacy_vertex_attributes;
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns true when geometry shaders are supported by the device
|
/// Returns true when geometry shaders are supported by the device
|
||||||
bool IsGeometryStageSupported() const {
|
bool IsGeometryStageSupported() const {
|
||||||
return features.geometryShader;
|
return features.geometryShader;
|
||||||
@ -315,6 +321,7 @@ private:
|
|||||||
bool null_descriptor{};
|
bool null_descriptor{};
|
||||||
bool maintenance5{};
|
bool maintenance5{};
|
||||||
bool list_restart{};
|
bool list_restart{};
|
||||||
|
bool legacy_vertex_attributes{};
|
||||||
u64 min_imported_host_pointer_alignment{};
|
u64 min_imported_host_pointer_alignment{};
|
||||||
u32 subgroup_size{};
|
u32 subgroup_size{};
|
||||||
bool tooling_info{};
|
bool tooling_info{};
|
||||||
|
@ -169,6 +169,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
|||||||
.support_fp32_denorm_preserve = bool(vk12_props.shaderDenormPreserveFloat32),
|
.support_fp32_denorm_preserve = bool(vk12_props.shaderDenormPreserveFloat32),
|
||||||
.support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32),
|
.support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32),
|
||||||
.support_explicit_workgroup_layout = true,
|
.support_explicit_workgroup_layout = true,
|
||||||
|
.support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(),
|
||||||
};
|
};
|
||||||
auto [cache_result, cache] = instance.GetDevice().createPipelineCacheUnique({});
|
auto [cache_result, cache] = instance.GetDevice().createPipelineCacheUnique({});
|
||||||
ASSERT_MSG(cache_result == vk::Result::eSuccess, "Failed to create pipeline cache: {}",
|
ASSERT_MSG(cache_result == vk::Result::eSuccess, "Failed to create pipeline cache: {}",
|
||||||
@ -347,8 +348,7 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||||||
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
|
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const auto& buffer =
|
const auto& buffer = input.GetSharp(*vs_info);
|
||||||
vs_info->ReadUdReg<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
|
||||||
if (buffer.GetSize() == 0) {
|
if (buffer.GetSize() == 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -431,7 +431,7 @@ std::tuple<const Shader::Info*, vk::ShaderModule, u64> PipelineCache::GetProgram
|
|||||||
Program* program = program_pool.Create(stage, params);
|
Program* program = program_pool.Create(stage, params);
|
||||||
auto start = binding;
|
auto start = binding;
|
||||||
const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding);
|
const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding);
|
||||||
const auto spec = Shader::StageSpecialization(program->info, runtime_info, start);
|
const auto spec = Shader::StageSpecialization(program->info, runtime_info, profile, start);
|
||||||
program->AddPermut(module, std::move(spec));
|
program->AddPermut(module, std::move(spec));
|
||||||
it_pgm.value() = program;
|
it_pgm.value() = program;
|
||||||
return std::make_tuple(&program->info, module, HashCombine(params.hash, 0));
|
return std::make_tuple(&program->info, module, HashCombine(params.hash, 0));
|
||||||
@ -440,7 +440,7 @@ std::tuple<const Shader::Info*, vk::ShaderModule, u64> PipelineCache::GetProgram
|
|||||||
Program* program = it_pgm->second;
|
Program* program = it_pgm->second;
|
||||||
auto& info = program->info;
|
auto& info = program->info;
|
||||||
info.RefreshFlatBuf();
|
info.RefreshFlatBuf();
|
||||||
const auto spec = Shader::StageSpecialization(info, runtime_info, binding);
|
const auto spec = Shader::StageSpecialization(info, runtime_info, profile, binding);
|
||||||
size_t perm_idx = program->modules.size();
|
size_t perm_idx = program->modules.size();
|
||||||
vk::ShaderModule module{};
|
vk::ShaderModule module{};
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user