shadPS4/src/shader_recompiler/specialization.h
TheTurtle 399a725343
shader_recompiler: Replace buffer pulling with attribute divisor for instance step rates (#3238)
* shader_recompiler: Replace buffer pulling with attribute divisor for instance step rates

* flatten_extended_userdata: Remove special step rate buffer handling

* Review comments

* spirv_emit_context: Name all instance rate attribs properly

* spirv: Merge ReadConstBuffer again

template function only has 1 user now

* attribute: Add missing attributes

* translate: Reimplement step rate instance id

* Resolve validation warnings

* shader_recompiler: Separate vertex inputs from LS stage, cleanup tess
2025-07-14 00:32:02 +03:00

230 lines
9.2 KiB
C++

// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <bitset>
#include "common/types.h"
#include "frontend/fetch_shader.h"
#include "shader_recompiler/backend/bindings.h"
#include "shader_recompiler/info.h"
namespace Shader {
struct VsAttribSpecialization {
u32 divisor{};
AmdGpu::NumberClass num_class{};
AmdGpu::CompMapping dst_select{};
auto operator<=>(const VsAttribSpecialization&) const = default;
};
struct BufferSpecialization {
u32 stride : 14;
u32 is_storage : 1;
u32 is_formatted : 1;
u32 swizzle_enable : 1;
u32 data_format : 6;
u32 num_format : 4;
u32 index_stride : 2;
u32 element_size : 2;
AmdGpu::CompMapping dst_select{};
AmdGpu::NumberConversion num_conversion{};
bool operator==(const BufferSpecialization& other) const {
return stride == other.stride && is_storage == other.is_storage &&
is_formatted == other.is_formatted && swizzle_enable == other.swizzle_enable &&
(!is_formatted ||
(data_format == other.data_format && num_format == other.num_format &&
dst_select == other.dst_select && num_conversion == other.num_conversion)) &&
(!swizzle_enable ||
(index_stride == other.index_stride && element_size == other.element_size));
}
};
struct ImageSpecialization {
AmdGpu::ImageType type = AmdGpu::ImageType::Color2D;
bool is_integer = false;
bool is_storage = false;
bool is_cube = false;
AmdGpu::CompMapping dst_select{};
AmdGpu::NumberConversion num_conversion{};
auto operator<=>(const ImageSpecialization&) const = default;
};
struct FMaskSpecialization {
u32 width;
u32 height;
auto operator<=>(const FMaskSpecialization&) const = default;
};
struct SamplerSpecialization {
bool force_unnormalized = false;
auto operator<=>(const SamplerSpecialization&) const = default;
};
/**
* Alongside runtime information, this structure also checks bound resources
* for compatibility. Can be used as a key for storing shader permutations.
* Is separate from runtime information, because resource layout can only be deduced
* after the first compilation of a module.
*/
struct StageSpecialization {
static constexpr size_t MaxStageResources = 128;
const Shader::Info* info;
RuntimeInfo runtime_info;
std::bitset<MaxStageResources> bitset{};
std::optional<Gcn::FetchShaderData> fetch_shader_data{};
boost::container::small_vector<VsAttribSpecialization, 32> vs_attribs;
boost::container::small_vector<BufferSpecialization, 16> buffers;
boost::container::small_vector<ImageSpecialization, 16> images;
boost::container::small_vector<FMaskSpecialization, 8> fmasks;
boost::container::small_vector<SamplerSpecialization, 16> samplers;
Backend::Bindings start{};
StageSpecialization(const Info& info_, RuntimeInfo runtime_info_, const Profile& profile_,
Backend::Bindings start_)
: info{&info_}, runtime_info{runtime_info_}, start{start_} {
fetch_shader_data = Gcn::ParseFetchShader(info_);
if (info_.stage == Stage::Vertex && fetch_shader_data) {
// Specialize shader on VS input number types to follow spec.
ForEachSharp(vs_attribs, fetch_shader_data->attributes,
[&profile_, this](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
using InstanceIdType = Shader::Gcn::VertexAttribute::InstanceIdType;
if (const auto step_rate = desc.GetStepRate();
step_rate != InstanceIdType::None) {
spec.divisor = step_rate == InstanceIdType::OverStepRate0
? runtime_info.vs_info.step_rate_0
: (step_rate == InstanceIdType::OverStepRate1
? runtime_info.vs_info.step_rate_1
: 1);
}
spec.num_class = profile_.support_legacy_vertex_attributes
? AmdGpu::NumberClass{}
: AmdGpu::GetNumberClass(sharp.GetNumberFmt());
spec.dst_select = sharp.DstSelect();
});
}
u32 binding{};
ForEachSharp(binding, buffers, info->buffers,
[profile_](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
spec.stride = sharp.GetStride();
spec.is_storage = desc.IsStorage(sharp, profile_);
spec.is_formatted = desc.is_formatted;
spec.swizzle_enable = sharp.swizzle_enable;
if (spec.is_formatted) {
spec.data_format = static_cast<u32>(sharp.GetDataFmt());
spec.num_format = static_cast<u32>(sharp.GetNumberFmt());
spec.dst_select = sharp.DstSelect();
spec.num_conversion = sharp.GetNumberConversion();
}
if (spec.swizzle_enable) {
spec.index_stride = sharp.index_stride;
spec.element_size = sharp.element_size;
}
});
ForEachSharp(binding, images, info->images,
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
spec.type = sharp.GetViewType(desc.is_array);
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
spec.is_storage = desc.is_written;
spec.is_cube = sharp.IsCube();
if (spec.is_storage) {
spec.dst_select = sharp.DstSelect();
}
spec.num_conversion = sharp.GetNumberConversion();
});
ForEachSharp(binding, fmasks, info->fmasks,
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
spec.width = sharp.width;
spec.height = sharp.height;
});
ForEachSharp(samplers, info->samplers,
[](auto& spec, const auto& desc, AmdGpu::Sampler sharp) {
spec.force_unnormalized = sharp.force_unnormalized;
});
// Initialize runtime_info fields that rely on analysis in tessellation passes
if (info->l_stage == LogicalStage::TessellationControl ||
info->l_stage == LogicalStage::TessellationEval) {
Shader::TessellationDataConstantBuffer tess_constants;
info->ReadTessConstantBuffer(tess_constants);
if (info->l_stage == LogicalStage::TessellationControl) {
runtime_info.hs_info.InitFromTessConstants(tess_constants);
} else {
runtime_info.vs_info.InitFromTessConstants(tess_constants);
}
}
}
void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) {
for (const auto& desc : desc_list) {
auto& spec = spec_list.emplace_back();
const auto sharp = desc.GetSharp(*info);
if (!sharp) {
continue;
}
func(spec, desc, sharp);
}
}
void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) {
for (const auto& desc : desc_list) {
auto& spec = spec_list.emplace_back();
const auto sharp = desc.GetSharp(*info);
if (!sharp) {
binding++;
continue;
}
bitset.set(binding++);
func(spec, desc, sharp);
}
}
bool operator==(const StageSpecialization& other) const {
if (start != other.start) {
return false;
}
if (runtime_info != other.runtime_info) {
return false;
}
if (fetch_shader_data != other.fetch_shader_data) {
return false;
}
for (u32 i = 0; i < vs_attribs.size(); i++) {
if (vs_attribs[i] != other.vs_attribs[i]) {
return false;
}
}
u32 binding{};
for (u32 i = 0; i < buffers.size(); i++) {
if (other.bitset[binding++] && buffers[i] != other.buffers[i]) {
return false;
}
}
for (u32 i = 0; i < images.size(); i++) {
if (other.bitset[binding++] && images[i] != other.images[i]) {
return false;
}
}
for (u32 i = 0; i < fmasks.size(); i++) {
if (other.bitset[binding++] && fmasks[i] != other.fmasks[i]) {
return false;
}
}
for (u32 i = 0; i < samplers.size(); i++) {
if (samplers[i] != other.samplers[i]) {
return false;
}
}
return true;
}
};
} // namespace Shader