mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-08-04 16:32:39 +00:00
shader: Specialize on vertex input number types if needed.
This commit is contained in:
parent
0835dc71b3
commit
30b292a787
@ -130,6 +130,10 @@ struct Info {
|
||||
u8 dword_offset;
|
||||
InstanceIdType instance_step_rate;
|
||||
s32 instance_data_buf;
|
||||
|
||||
[[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept {
|
||||
return info.ReadUdReg<AmdGpu::Buffer>(sgpr_base, dword_offset);
|
||||
}
|
||||
};
|
||||
boost::container::static_vector<VsInput, 32> vs_inputs{};
|
||||
|
||||
|
@ -24,6 +24,7 @@ struct Profile {
|
||||
bool support_explicit_workgroup_layout{};
|
||||
bool has_broken_spirv_clamp{};
|
||||
bool lower_left_origin_mode{};
|
||||
bool support_legacy_vertex_attributes{};
|
||||
u64 min_ssbo_alignment{};
|
||||
};
|
||||
|
||||
|
@ -12,6 +12,12 @@
|
||||
|
||||
namespace Shader {
|
||||
|
||||
struct VsInputSpecialization {
|
||||
AmdGpu::NumberClass num_class{};
|
||||
|
||||
auto operator<=>(const VsInputSpecialization&) const = default;
|
||||
};
|
||||
|
||||
struct BufferSpecialization {
|
||||
u16 stride : 14;
|
||||
u16 is_storage : 1;
|
||||
@ -51,19 +57,27 @@ struct StageSpecialization {
|
||||
const Shader::Info* info;
|
||||
RuntimeInfo runtime_info;
|
||||
std::bitset<MaxStageResources> bitset{};
|
||||
boost::container::small_vector<VsInputSpecialization, 32> vs_inputs;
|
||||
boost::container::small_vector<BufferSpecialization, 16> buffers;
|
||||
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
|
||||
boost::container::small_vector<ImageSpecialization, 16> images;
|
||||
boost::container::small_vector<FMaskSpecialization, 8> fmasks;
|
||||
Backend::Bindings start{};
|
||||
|
||||
explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_,
|
||||
Backend::Bindings start_)
|
||||
explicit StageSpecialization(const Info& info_, RuntimeInfo runtime_info_,
|
||||
const Profile& profile_, Backend::Bindings start_)
|
||||
: info{&info_}, runtime_info{runtime_info_}, start{start_} {
|
||||
u32 binding{};
|
||||
if (info->has_readconst) {
|
||||
binding++;
|
||||
}
|
||||
if (info_.stage == Stage::Vertex && !profile_.support_legacy_vertex_attributes) {
|
||||
// Specialize shader on VS input number types to follow spec.
|
||||
ForEachSharp(vs_inputs, info->vs_inputs,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
||||
spec.num_class = AmdGpu::GetNumberClass(sharp.GetNumberFmt());
|
||||
});
|
||||
}
|
||||
ForEachSharp(binding, buffers, info->buffers,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
||||
spec.stride = sharp.GetStride();
|
||||
@ -86,6 +100,17 @@ struct StageSpecialization {
|
||||
});
|
||||
}
|
||||
|
||||
void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) {
|
||||
for (const auto& desc : desc_list) {
|
||||
auto& spec = spec_list.emplace_back();
|
||||
const auto sharp = desc.GetSharp(*info);
|
||||
if (!sharp) {
|
||||
continue;
|
||||
}
|
||||
func(spec, desc, sharp);
|
||||
}
|
||||
}
|
||||
|
||||
void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) {
|
||||
for (const auto& desc : desc_list) {
|
||||
auto& spec = spec_list.emplace_back();
|
||||
@ -113,6 +138,11 @@ struct StageSpecialization {
|
||||
if (info->has_readconst) {
|
||||
binding++;
|
||||
}
|
||||
for (u32 i = 0; i < vs_inputs.size(); i++) {
|
||||
if (vs_inputs[i] != other.vs_inputs[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
for (u32 i = 0; i < buffers.size(); i++) {
|
||||
if (other.bitset[binding++] && buffers[i] != other.buffers[i]) {
|
||||
return false;
|
||||
|
@ -10,10 +10,27 @@
|
||||
|
||||
namespace AmdGpu {
|
||||
|
||||
enum NumberClass {
|
||||
Float,
|
||||
Sint,
|
||||
Uint,
|
||||
};
|
||||
|
||||
[[nodiscard]] constexpr bool IsInteger(NumberFormat nfmt) {
|
||||
return nfmt == AmdGpu::NumberFormat::Sint || nfmt == AmdGpu::NumberFormat::Uint;
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr NumberClass GetNumberClass(NumberFormat nfmt) {
|
||||
switch (nfmt) {
|
||||
case NumberFormat::Sint:
|
||||
return Sint;
|
||||
case NumberFormat::Uint:
|
||||
return Uint;
|
||||
default:
|
||||
return Float;
|
||||
}
|
||||
}
|
||||
|
||||
[[nodiscard]] std::string_view NameOf(DataFormat fmt);
|
||||
[[nodiscard]] std::string_view NameOf(NumberFormat fmt);
|
||||
|
||||
|
@ -157,7 +157,7 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto& buffer = vs_info.ReadUdReg<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
||||
const auto& buffer = input.GetSharp(vs_info);
|
||||
if (buffer.GetSize() == 0) {
|
||||
continue;
|
||||
}
|
||||
|
@ -55,8 +55,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto buffer =
|
||||
vs_info->ReadUdReg<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
||||
const auto buffer = input.GetSharp(*vs_info);
|
||||
if (buffer.GetSize() == 0) {
|
||||
continue;
|
||||
}
|
||||
|
@ -264,6 +264,7 @@ bool Instance::CreateDevice() {
|
||||
const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
|
||||
list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME);
|
||||
maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME);
|
||||
legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME);
|
||||
|
||||
// These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2
|
||||
// with extensions.
|
||||
@ -399,6 +400,9 @@ bool Instance::CreateDevice() {
|
||||
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT{
|
||||
.primitiveTopologyListRestart = true,
|
||||
},
|
||||
vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT{
|
||||
.legacyVertexAttributes = true,
|
||||
},
|
||||
#ifdef __APPLE__
|
||||
feature_chain.get<vk::PhysicalDevicePortabilitySubsetFeaturesKHR>(),
|
||||
#endif
|
||||
@ -438,6 +442,9 @@ bool Instance::CreateDevice() {
|
||||
if (!vertex_input_dynamic_state) {
|
||||
device_chain.unlink<vk::PhysicalDeviceVertexInputDynamicStateFeaturesEXT>();
|
||||
}
|
||||
if (!legacy_vertex_attributes) {
|
||||
device_chain.unlink<vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT>();
|
||||
}
|
||||
|
||||
auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get());
|
||||
if (device_result != vk::Result::eSuccess) {
|
||||
|
@ -143,10 +143,16 @@ public:
|
||||
return maintenance5;
|
||||
}
|
||||
|
||||
/// Returns true when VK_EXT_primitive_topology_list_restart is supported.
|
||||
bool IsListRestartSupported() const {
|
||||
return list_restart;
|
||||
}
|
||||
|
||||
/// Returns true when VK_EXT_legacy_vertex_attributes is supported.
|
||||
bool IsLegacyVertexAttributesSupported() const {
|
||||
return legacy_vertex_attributes;
|
||||
}
|
||||
|
||||
/// Returns true when geometry shaders are supported by the device
|
||||
bool IsGeometryStageSupported() const {
|
||||
return features.geometryShader;
|
||||
@ -315,6 +321,7 @@ private:
|
||||
bool null_descriptor{};
|
||||
bool maintenance5{};
|
||||
bool list_restart{};
|
||||
bool legacy_vertex_attributes{};
|
||||
u64 min_imported_host_pointer_alignment{};
|
||||
u32 subgroup_size{};
|
||||
bool tooling_info{};
|
||||
|
@ -169,6 +169,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
||||
.support_fp32_denorm_preserve = bool(vk12_props.shaderDenormPreserveFloat32),
|
||||
.support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32),
|
||||
.support_explicit_workgroup_layout = true,
|
||||
.support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(),
|
||||
};
|
||||
auto [cache_result, cache] = instance.GetDevice().createPipelineCacheUnique({});
|
||||
ASSERT_MSG(cache_result == vk::Result::eSuccess, "Failed to create pipeline cache: {}",
|
||||
@ -347,8 +348,7 @@ bool PipelineCache::RefreshGraphicsKey() {
|
||||
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
|
||||
continue;
|
||||
}
|
||||
const auto& buffer =
|
||||
vs_info->ReadUdReg<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
|
||||
const auto& buffer = input.GetSharp(*vs_info);
|
||||
if (buffer.GetSize() == 0) {
|
||||
continue;
|
||||
}
|
||||
@ -431,7 +431,7 @@ std::tuple<const Shader::Info*, vk::ShaderModule, u64> PipelineCache::GetProgram
|
||||
Program* program = program_pool.Create(stage, params);
|
||||
auto start = binding;
|
||||
const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding);
|
||||
const auto spec = Shader::StageSpecialization(program->info, runtime_info, start);
|
||||
const auto spec = Shader::StageSpecialization(program->info, runtime_info, profile, start);
|
||||
program->AddPermut(module, std::move(spec));
|
||||
it_pgm.value() = program;
|
||||
return std::make_tuple(&program->info, module, HashCombine(params.hash, 0));
|
||||
@ -440,7 +440,7 @@ std::tuple<const Shader::Info*, vk::ShaderModule, u64> PipelineCache::GetProgram
|
||||
Program* program = it_pgm->second;
|
||||
auto& info = program->info;
|
||||
info.RefreshFlatBuf();
|
||||
const auto spec = Shader::StageSpecialization(info, runtime_info, binding);
|
||||
const auto spec = Shader::StageSpecialization(info, runtime_info, profile, binding);
|
||||
size_t perm_idx = program->modules.size();
|
||||
vk::ShaderModule module{};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user