shader_recompiler: Specialize on vertex attribute number types.

This commit is contained in:
squidbus 2024-12-03 08:12:27 -08:00
parent 063dc4afe3
commit 028df5dfef
15 changed files with 110 additions and 66 deletions

View File

@ -4,6 +4,7 @@
#include "common/assert.h"
#include "common/div_ceil.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
#include "shader_recompiler/frontend/fetch_shader.h"
#include "shader_recompiler/ir/passes/srt.h"
#include "video_core/amdgpu/types.h"
@ -155,18 +156,12 @@ void EmitContext::DefineInterfaces() {
}
const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
switch (fmt) {
case AmdGpu::NumberFormat::Float:
case AmdGpu::NumberFormat::Unorm:
case AmdGpu::NumberFormat::Snorm:
case AmdGpu::NumberFormat::SnormNz:
case AmdGpu::NumberFormat::Sscaled:
case AmdGpu::NumberFormat::Uscaled:
case AmdGpu::NumberFormat::Srgb:
switch (GetNumberClass(fmt)) {
case AmdGpu::NumberClass::Float:
return ctx.F32;
case AmdGpu::NumberFormat::Sint:
case AmdGpu::NumberClass::Sint:
return ctx.S32;
case AmdGpu::NumberFormat::Uint:
case AmdGpu::NumberClass::Uint:
return ctx.U32;
default:
break;
@ -176,18 +171,12 @@ const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id,
u32 num_components, bool output) {
switch (fmt) {
case AmdGpu::NumberFormat::Float:
case AmdGpu::NumberFormat::Unorm:
case AmdGpu::NumberFormat::Snorm:
case AmdGpu::NumberFormat::SnormNz:
case AmdGpu::NumberFormat::Sscaled:
case AmdGpu::NumberFormat::Uscaled:
case AmdGpu::NumberFormat::Srgb:
switch (GetNumberClass(fmt)) {
case AmdGpu::NumberClass::Float:
return {id, output ? output_f32 : input_f32, F32[1], num_components, false};
case AmdGpu::NumberFormat::Uint:
case AmdGpu::NumberClass::Uint:
return {id, output ? output_u32 : input_u32, U32[1], num_components, true};
case AmdGpu::NumberFormat::Sint:
case AmdGpu::NumberClass::Sint:
return {id, output ? output_s32 : input_s32, S32[1], num_components, true};
default:
break;
@ -280,13 +269,13 @@ void EmitContext::DefineInputs() {
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
const auto fetch_shader = info.LoadFetchShader();
const auto fetch_shader = Gcn::ParseFetchShader(info);
if (!fetch_shader) {
break;
}
for (const auto& attrib : fetch_shader->attributes) {
ASSERT(attrib.semantic < IR::NumParams);
const auto sharp = info.GetSharp(attrib);
const auto sharp = attrib.GetSharp(info);
const Id type{GetAttributeType(*this, sharp.GetNumberFmt())[4]};
if (attrib.UsesStepRates()) {
const u32 rate_idx =

View File

@ -34,7 +34,13 @@ namespace Shader::Gcn {
* We take the reverse way, extract the original input semantics from these instructions.
**/
FetchShaderData ParseFetchShader(const u32* code) {
std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info) {
if (!info.has_fetch_shader) {
return std::nullopt;
}
const u32* code;
std::memcpy(&code, &info.user_data[info.fetch_shader_sgpr_base], sizeof(code));
FetchShaderData data{.code = code};
GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max());
GcnDecodeContext decoder;

View File

@ -6,6 +6,7 @@
#include <ranges>
#include <vector>
#include "common/types.h"
#include "shader_recompiler/info.h"
namespace Shader::Gcn {
@ -33,6 +34,10 @@ struct VertexAttribute {
return step_rate == OverStepRate0 || step_rate == OverStepRate1;
}
[[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Shader::Info& info) const noexcept {
return info.ReadUdReg<AmdGpu::Buffer>(sgpr_base, dword_offset);
}
bool operator==(const VertexAttribute& other) const {
return semantic == other.semantic && dest_vgpr == other.dest_vgpr &&
num_elements == other.num_elements && sgpr_base == other.sgpr_base &&
@ -59,6 +64,6 @@ struct FetchShaderData {
}
};
FetchShaderData ParseFetchShader(const u32* code);
std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info);
} // namespace Shader::Gcn

View File

@ -370,7 +370,9 @@ void Translator::EmitFetch(const GcnInst& inst) {
// Read the pointer to the fetch shader assembly.
info.has_fetch_shader = true;
info.fetch_shader_sgpr_base = inst.src[0].code;
const auto fetch_data = info.LoadFetchShader();
const auto fetch_data = ParseFetchShader(info);
ASSERT(fetch_data.has_value());
if (Config::dumpShaders()) {
using namespace Common::FS;

View File

@ -9,7 +9,6 @@
#include <boost/container/static_vector.hpp>
#include "common/assert.h"
#include "common/types.h"
#include "frontend/fetch_shader.h"
#include "shader_recompiler/backend/bindings.h"
#include "shader_recompiler/frontend/copy_shader.h"
#include "shader_recompiler/ir/attribute.h"
@ -231,22 +230,6 @@ struct Info {
bnd.user_data += ud_mask.NumRegs();
}
[[nodiscard]] std::pair<u32, u32> GetDrawOffsets(
const AmdGpu::Liverpool::Regs& regs,
const std::optional<Gcn::FetchShaderData>& fetch_shader) const {
u32 vertex_offset = regs.index_offset;
u32 instance_offset = 0;
if (fetch_shader) {
if (vertex_offset == 0 && fetch_shader->vertex_offset_sgpr != -1) {
vertex_offset = user_data[fetch_shader->vertex_offset_sgpr];
}
if (fetch_shader->instance_offset_sgpr != -1) {
instance_offset = user_data[fetch_shader->instance_offset_sgpr];
}
}
return {vertex_offset, instance_offset};
}
void RefreshFlatBuf() {
flattened_ud_buf.resize(srt_info.flattened_bufsize_dw);
ASSERT(user_data.size() <= NumUserDataRegs);
@ -256,20 +239,6 @@ struct Info {
srt_info.walker_func(user_data.data(), flattened_ud_buf.data());
}
}
[[nodiscard]] std::optional<Gcn::FetchShaderData> LoadFetchShader() const {
if (!has_fetch_shader) {
return std::nullopt;
}
const u32* code;
std::memcpy(&code, &user_data[fetch_shader_sgpr_base], sizeof(code));
return Gcn::ParseFetchShader(code);
}
[[nodiscard]] constexpr AmdGpu::Buffer GetSharp(
const Gcn::VertexAttribute& attrib) const noexcept {
return ReadUdReg<AmdGpu::Buffer>(attrib.sgpr_base, attrib.dword_offset);
}
};
constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept {

View File

@ -22,6 +22,7 @@ struct Profile {
bool support_fp32_denorm_preserve{};
bool support_fp32_denorm_flush{};
bool support_explicit_workgroup_layout{};
bool support_legacy_vertex_attributes{};
bool has_broken_spirv_clamp{};
bool lower_left_origin_mode{};
bool needs_manual_interpolation{};

View File

@ -13,6 +13,12 @@
namespace Shader {
struct VsAttribSpecialization {
AmdGpu::NumberClass num_class{};
auto operator<=>(const VsAttribSpecialization&) const = default;
};
struct BufferSpecialization {
u16 stride : 14;
u16 is_storage : 1;
@ -52,6 +58,7 @@ struct StageSpecialization {
const Shader::Info* info;
RuntimeInfo runtime_info;
Gcn::FetchShaderData fetch_shader_data{};
boost::container::small_vector<VsAttribSpecialization, 32> vs_attribs;
std::bitset<MaxStageResources> bitset{};
boost::container::small_vector<BufferSpecialization, 16> buffers;
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
@ -59,11 +66,18 @@ struct StageSpecialization {
boost::container::small_vector<FMaskSpecialization, 8> fmasks;
Backend::Bindings start{};
explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_,
Backend::Bindings start_)
explicit StageSpecialization(const Info& info_, RuntimeInfo runtime_info_,
const Profile& profile_, Backend::Bindings start_)
: info{&info_}, runtime_info{runtime_info_}, start{start_} {
if (const auto fetch_shader = info_.LoadFetchShader()) {
if (const auto fetch_shader = Gcn::ParseFetchShader(info_)) {
fetch_shader_data = *fetch_shader;
if (info_.stage == Stage::Vertex && !profile_.support_legacy_vertex_attributes) {
// Specialize shader on VS input number types to follow spec.
ForEachSharp(vs_attribs, fetch_shader_data.attributes,
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
spec.num_class = AmdGpu::GetNumberClass(sharp.GetNumberFmt());
});
}
}
u32 binding{};
if (info->has_readconst) {
@ -90,6 +104,17 @@ struct StageSpecialization {
});
}
void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) {
for (const auto& desc : desc_list) {
auto& spec = spec_list.emplace_back();
const auto sharp = desc.GetSharp(*info);
if (!sharp) {
continue;
}
func(spec, desc, sharp);
}
}
void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) {
for (const auto& desc : desc_list) {
auto& spec = spec_list.emplace_back();
@ -113,6 +138,11 @@ struct StageSpecialization {
if (fetch_shader_data != other.fetch_shader_data) {
return false;
}
for (u32 i = 0; i < vs_attribs.size(); i++) {
if (vs_attribs[i] != other.vs_attribs[i]) {
return false;
}
}
u32 binding{};
if (info->has_readconst != other.info->has_readconst) {
return false;

View File

@ -10,7 +10,24 @@
namespace AmdGpu {
[[nodiscard]] constexpr bool IsInteger(NumberFormat nfmt) {
enum NumberClass {
Float,
Sint,
Uint,
};
[[nodiscard]] constexpr NumberClass GetNumberClass(const NumberFormat nfmt) {
switch (nfmt) {
case NumberFormat::Sint:
return Sint;
case NumberFormat::Uint:
return Uint;
default:
return Float;
}
}
[[nodiscard]] constexpr bool IsInteger(const NumberFormat nfmt) {
return nfmt == AmdGpu::NumberFormat::Sint || nfmt == AmdGpu::NumberFormat::Uint;
}

View File

@ -158,7 +158,7 @@ bool BufferCache::BindVertexBuffers(
continue;
}
const auto& buffer = vs_info.GetSharp(attrib);
const auto& buffer = attrib.GetSharp(vs_info);
if (buffer.GetSize() == 0) {
continue;
}

View File

@ -59,7 +59,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
continue;
}
const auto buffer = vs_info.GetSharp(attrib);
const auto buffer = attrib.GetSharp(vs_info);
if (buffer.GetSize() == 0) {
continue;
}

View File

@ -4,6 +4,7 @@
#include <xxhash.h>
#include "common/types.h"
#include "shader_recompiler/frontend/fetch_shader.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/renderer_vulkan/vk_pipeline_common.h"

View File

@ -265,6 +265,7 @@ bool Instance::CreateDevice() {
const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME);
maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME);
legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME);
// These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2
// with extensions.
@ -403,6 +404,9 @@ bool Instance::CreateDevice() {
vk::PhysicalDeviceFragmentShaderBarycentricFeaturesKHR{
.fragmentShaderBarycentric = true,
},
vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT{
.legacyVertexAttributes = true,
},
#ifdef __APPLE__
feature_chain.get<vk::PhysicalDevicePortabilitySubsetFeaturesKHR>(),
#endif
@ -445,6 +449,9 @@ bool Instance::CreateDevice() {
if (!fragment_shader_barycentric) {
device_chain.unlink<vk::PhysicalDeviceFragmentShaderBarycentricFeaturesKHR>();
}
if (!legacy_vertex_attributes) {
device_chain.unlink<vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT>();
}
auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get());
if (device_result != vk::Result::eSuccess) {

View File

@ -148,10 +148,16 @@ public:
return fragment_shader_barycentric;
}
/// Returns true when VK_EXT_primitive_topology_list_restart is supported.
bool IsListRestartSupported() const {
return list_restart;
}
/// Returns true when VK_EXT_legacy_vertex_attributes is supported.
bool IsLegacyVertexAttributesSupported() const {
return legacy_vertex_attributes;
}
/// Returns true when geometry shaders are supported by the device
bool IsGeometryStageSupported() const {
return features.geometryShader;
@ -320,6 +326,7 @@ private:
bool null_descriptor{};
bool maintenance5{};
bool list_restart{};
bool legacy_vertex_attributes{};
u64 min_imported_host_pointer_alignment{};
u32 subgroup_size{};
bool tooling_info{};

View File

@ -169,6 +169,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
.support_fp32_denorm_preserve = bool(vk12_props.shaderDenormPreserveFloat32),
.support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32),
.support_explicit_workgroup_layout = true,
.support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(),
.needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() &&
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
};
@ -352,7 +353,7 @@ bool PipelineCache::RefreshGraphicsKey() {
if (attrib.UsesStepRates()) {
continue;
}
const auto& buffer = vs_info->GetSharp(attrib);
const auto& buffer = attrib.GetSharp(*vs_info);
if (buffer.GetSize() == 0) {
continue;
}
@ -436,7 +437,7 @@ PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params,
Program* program = program_pool.Create(stage, params);
auto start = binding;
const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding);
const auto spec = Shader::StageSpecialization(program->info, runtime_info, start);
const auto spec = Shader::StageSpecialization(program->info, runtime_info, profile, start);
program->AddPermut(module, std::move(spec));
it_pgm.value() = program;
return std::make_tuple(&program->info, module, spec.fetch_shader_data,
@ -446,7 +447,7 @@ PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params,
Program* program = it_pgm->second;
auto& info = program->info;
info.RefreshFlatBuf();
const auto spec = Shader::StageSpecialization(info, runtime_info, binding);
const auto spec = Shader::StageSpecialization(info, runtime_info, profile, binding);
size_t perm_idx = program->modules.size();
vk::ShaderModule module{};

View File

@ -194,7 +194,16 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
BeginRendering(*pipeline, state);
UpdateDynamicState(*pipeline);
const auto [vertex_offset, instance_offset] = vs_info.GetDrawOffsets(regs, fetch_shader);
u32 vertex_offset = regs.index_offset;
u32 instance_offset = 0;
if (fetch_shader) {
if (vertex_offset == 0 && fetch_shader->vertex_offset_sgpr != -1) {
vertex_offset = vs_info.user_data[fetch_shader->vertex_offset_sgpr];
}
if (fetch_shader->instance_offset_sgpr != -1) {
instance_offset = vs_info.user_data[fetch_shader->instance_offset_sgpr];
}
}
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());