mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-08-04 16:32:39 +00:00
shader_recompiler: Specialize on vertex attribute number types.
This commit is contained in:
parent
063dc4afe3
commit
028df5dfef
@ -4,6 +4,7 @@
|
||||
#include "common/assert.h"
|
||||
#include "common/div_ceil.h"
|
||||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||
#include "shader_recompiler/ir/passes/srt.h"
|
||||
#include "video_core/amdgpu/types.h"
|
||||
|
||||
@ -155,18 +156,12 @@ void EmitContext::DefineInterfaces() {
|
||||
}
|
||||
|
||||
const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
|
||||
switch (fmt) {
|
||||
case AmdGpu::NumberFormat::Float:
|
||||
case AmdGpu::NumberFormat::Unorm:
|
||||
case AmdGpu::NumberFormat::Snorm:
|
||||
case AmdGpu::NumberFormat::SnormNz:
|
||||
case AmdGpu::NumberFormat::Sscaled:
|
||||
case AmdGpu::NumberFormat::Uscaled:
|
||||
case AmdGpu::NumberFormat::Srgb:
|
||||
switch (GetNumberClass(fmt)) {
|
||||
case AmdGpu::NumberClass::Float:
|
||||
return ctx.F32;
|
||||
case AmdGpu::NumberFormat::Sint:
|
||||
case AmdGpu::NumberClass::Sint:
|
||||
return ctx.S32;
|
||||
case AmdGpu::NumberFormat::Uint:
|
||||
case AmdGpu::NumberClass::Uint:
|
||||
return ctx.U32;
|
||||
default:
|
||||
break;
|
||||
@ -176,18 +171,12 @@ const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
|
||||
|
||||
EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id,
|
||||
u32 num_components, bool output) {
|
||||
switch (fmt) {
|
||||
case AmdGpu::NumberFormat::Float:
|
||||
case AmdGpu::NumberFormat::Unorm:
|
||||
case AmdGpu::NumberFormat::Snorm:
|
||||
case AmdGpu::NumberFormat::SnormNz:
|
||||
case AmdGpu::NumberFormat::Sscaled:
|
||||
case AmdGpu::NumberFormat::Uscaled:
|
||||
case AmdGpu::NumberFormat::Srgb:
|
||||
switch (GetNumberClass(fmt)) {
|
||||
case AmdGpu::NumberClass::Float:
|
||||
return {id, output ? output_f32 : input_f32, F32[1], num_components, false};
|
||||
case AmdGpu::NumberFormat::Uint:
|
||||
case AmdGpu::NumberClass::Uint:
|
||||
return {id, output ? output_u32 : input_u32, U32[1], num_components, true};
|
||||
case AmdGpu::NumberFormat::Sint:
|
||||
case AmdGpu::NumberClass::Sint:
|
||||
return {id, output ? output_s32 : input_s32, S32[1], num_components, true};
|
||||
default:
|
||||
break;
|
||||
@ -280,13 +269,13 @@ void EmitContext::DefineInputs() {
|
||||
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
|
||||
instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
|
||||
|
||||
const auto fetch_shader = info.LoadFetchShader();
|
||||
const auto fetch_shader = Gcn::ParseFetchShader(info);
|
||||
if (!fetch_shader) {
|
||||
break;
|
||||
}
|
||||
for (const auto& attrib : fetch_shader->attributes) {
|
||||
ASSERT(attrib.semantic < IR::NumParams);
|
||||
const auto sharp = info.GetSharp(attrib);
|
||||
const auto sharp = attrib.GetSharp(info);
|
||||
const Id type{GetAttributeType(*this, sharp.GetNumberFmt())[4]};
|
||||
if (attrib.UsesStepRates()) {
|
||||
const u32 rate_idx =
|
||||
|
@ -34,7 +34,13 @@ namespace Shader::Gcn {
|
||||
* We take the reverse way, extract the original input semantics from these instructions.
|
||||
**/
|
||||
|
||||
FetchShaderData ParseFetchShader(const u32* code) {
|
||||
std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info) {
|
||||
if (!info.has_fetch_shader) {
|
||||
return std::nullopt;
|
||||
}
|
||||
const u32* code;
|
||||
std::memcpy(&code, &info.user_data[info.fetch_shader_sgpr_base], sizeof(code));
|
||||
|
||||
FetchShaderData data{.code = code};
|
||||
GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max());
|
||||
GcnDecodeContext decoder;
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <ranges>
|
||||
#include <vector>
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
@ -33,6 +34,10 @@ struct VertexAttribute {
|
||||
return step_rate == OverStepRate0 || step_rate == OverStepRate1;
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Shader::Info& info) const noexcept {
|
||||
return info.ReadUdReg<AmdGpu::Buffer>(sgpr_base, dword_offset);
|
||||
}
|
||||
|
||||
bool operator==(const VertexAttribute& other) const {
|
||||
return semantic == other.semantic && dest_vgpr == other.dest_vgpr &&
|
||||
num_elements == other.num_elements && sgpr_base == other.sgpr_base &&
|
||||
@ -59,6 +64,6 @@ struct FetchShaderData {
|
||||
}
|
||||
};
|
||||
|
||||
FetchShaderData ParseFetchShader(const u32* code);
|
||||
std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info);
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
@ -370,7 +370,9 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
||||
// Read the pointer to the fetch shader assembly.
|
||||
info.has_fetch_shader = true;
|
||||
info.fetch_shader_sgpr_base = inst.src[0].code;
|
||||
const auto fetch_data = info.LoadFetchShader();
|
||||
|
||||
const auto fetch_data = ParseFetchShader(info);
|
||||
ASSERT(fetch_data.has_value());
|
||||
|
||||
if (Config::dumpShaders()) {
|
||||
using namespace Common::FS;
|
||||
|
@ -9,7 +9,6 @@
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include "common/assert.h"
|
||||
#include "common/types.h"
|
||||
#include "frontend/fetch_shader.h"
|
||||
#include "shader_recompiler/backend/bindings.h"
|
||||
#include "shader_recompiler/frontend/copy_shader.h"
|
||||
#include "shader_recompiler/ir/attribute.h"
|
||||
@ -231,22 +230,6 @@ struct Info {
|
||||
bnd.user_data += ud_mask.NumRegs();
|
||||
}
|
||||
|
||||
[[nodiscard]] std::pair<u32, u32> GetDrawOffsets(
|
||||
const AmdGpu::Liverpool::Regs& regs,
|
||||
const std::optional<Gcn::FetchShaderData>& fetch_shader) const {
|
||||
u32 vertex_offset = regs.index_offset;
|
||||
u32 instance_offset = 0;
|
||||
if (fetch_shader) {
|
||||
if (vertex_offset == 0 && fetch_shader->vertex_offset_sgpr != -1) {
|
||||
vertex_offset = user_data[fetch_shader->vertex_offset_sgpr];
|
||||
}
|
||||
if (fetch_shader->instance_offset_sgpr != -1) {
|
||||
instance_offset = user_data[fetch_shader->instance_offset_sgpr];
|
||||
}
|
||||
}
|
||||
return {vertex_offset, instance_offset};
|
||||
}
|
||||
|
||||
void RefreshFlatBuf() {
|
||||
flattened_ud_buf.resize(srt_info.flattened_bufsize_dw);
|
||||
ASSERT(user_data.size() <= NumUserDataRegs);
|
||||
@ -256,20 +239,6 @@ struct Info {
|
||||
srt_info.walker_func(user_data.data(), flattened_ud_buf.data());
|
||||
}
|
||||
}
|
||||
|
||||
[[nodiscard]] std::optional<Gcn::FetchShaderData> LoadFetchShader() const {
|
||||
if (!has_fetch_shader) {
|
||||
return std::nullopt;
|
||||
}
|
||||
const u32* code;
|
||||
std::memcpy(&code, &user_data[fetch_shader_sgpr_base], sizeof(code));
|
||||
return Gcn::ParseFetchShader(code);
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr AmdGpu::Buffer GetSharp(
|
||||
const Gcn::VertexAttribute& attrib) const noexcept {
|
||||
return ReadUdReg<AmdGpu::Buffer>(attrib.sgpr_base, attrib.dword_offset);
|
||||
}
|
||||
};
|
||||
|
||||
constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept {
|
||||
|
@ -22,6 +22,7 @@ struct Profile {
|
||||
bool support_fp32_denorm_preserve{};
|
||||
bool support_fp32_denorm_flush{};
|
||||
bool support_explicit_workgroup_layout{};
|
||||
bool support_legacy_vertex_attributes{};
|
||||
bool has_broken_spirv_clamp{};
|
||||
bool lower_left_origin_mode{};
|
||||
bool needs_manual_interpolation{};
|
||||
|
@ -13,6 +13,12 @@
|
||||
|
||||
namespace Shader {
|
||||
|
||||
struct VsAttribSpecialization {
|
||||
AmdGpu::NumberClass num_class{};
|
||||
|
||||
auto operator<=>(const VsAttribSpecialization&) const = default;
|
||||
};
|
||||
|
||||
struct BufferSpecialization {
|
||||
u16 stride : 14;
|
||||
u16 is_storage : 1;
|
||||
@ -52,6 +58,7 @@ struct StageSpecialization {
|
||||
const Shader::Info* info;
|
||||
RuntimeInfo runtime_info;
|
||||
Gcn::FetchShaderData fetch_shader_data{};
|
||||
boost::container::small_vector<VsAttribSpecialization, 32> vs_attribs;
|
||||
std::bitset<MaxStageResources> bitset{};
|
||||
boost::container::small_vector<BufferSpecialization, 16> buffers;
|
||||
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
|
||||
@ -59,11 +66,18 @@ struct StageSpecialization {
|
||||
boost::container::small_vector<FMaskSpecialization, 8> fmasks;
|
||||
Backend::Bindings start{};
|
||||
|
||||
explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_,
|
||||
Backend::Bindings start_)
|
||||
explicit StageSpecialization(const Info& info_, RuntimeInfo runtime_info_,
|
||||
const Profile& profile_, Backend::Bindings start_)
|
||||
: info{&info_}, runtime_info{runtime_info_}, start{start_} {
|
||||
if (const auto fetch_shader = info_.LoadFetchShader()) {
|
||||
if (const auto fetch_shader = Gcn::ParseFetchShader(info_)) {
|
||||
fetch_shader_data = *fetch_shader;
|
||||
if (info_.stage == Stage::Vertex && !profile_.support_legacy_vertex_attributes) {
|
||||
// Specialize shader on VS input number types to follow spec.
|
||||
ForEachSharp(vs_attribs, fetch_shader_data.attributes,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
||||
spec.num_class = AmdGpu::GetNumberClass(sharp.GetNumberFmt());
|
||||
});
|
||||
}
|
||||
}
|
||||
u32 binding{};
|
||||
if (info->has_readconst) {
|
||||
@ -90,6 +104,17 @@ struct StageSpecialization {
|
||||
});
|
||||
}
|
||||
|
||||
void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) {
|
||||
for (const auto& desc : desc_list) {
|
||||
auto& spec = spec_list.emplace_back();
|
||||
const auto sharp = desc.GetSharp(*info);
|
||||
if (!sharp) {
|
||||
continue;
|
||||
}
|
||||
func(spec, desc, sharp);
|
||||
}
|
||||
}
|
||||
|
||||
void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) {
|
||||
for (const auto& desc : desc_list) {
|
||||
auto& spec = spec_list.emplace_back();
|
||||
@ -113,6 +138,11 @@ struct StageSpecialization {
|
||||
if (fetch_shader_data != other.fetch_shader_data) {
|
||||
return false;
|
||||
}
|
||||
for (u32 i = 0; i < vs_attribs.size(); i++) {
|
||||
if (vs_attribs[i] != other.vs_attribs[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
u32 binding{};
|
||||
if (info->has_readconst != other.info->has_readconst) {
|
||||
return false;
|
||||
|
@ -10,7 +10,24 @@
|
||||
|
||||
namespace AmdGpu {
|
||||
|
||||
[[nodiscard]] constexpr bool IsInteger(NumberFormat nfmt) {
|
||||
enum NumberClass {
|
||||
Float,
|
||||
Sint,
|
||||
Uint,
|
||||
};
|
||||
|
||||
[[nodiscard]] constexpr NumberClass GetNumberClass(const NumberFormat nfmt) {
|
||||
switch (nfmt) {
|
||||
case NumberFormat::Sint:
|
||||
return Sint;
|
||||
case NumberFormat::Uint:
|
||||
return Uint;
|
||||
default:
|
||||
return Float;
|
||||
}
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr bool IsInteger(const NumberFormat nfmt) {
|
||||
return nfmt == AmdGpu::NumberFormat::Sint || nfmt == AmdGpu::NumberFormat::Uint;
|
||||
}
|
||||
|
||||
|
@ -158,7 +158,7 @@ bool BufferCache::BindVertexBuffers(
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto& buffer = vs_info.GetSharp(attrib);
|
||||
const auto& buffer = attrib.GetSharp(vs_info);
|
||||
if (buffer.GetSize() == 0) {
|
||||
continue;
|
||||
}
|
||||
|
@ -59,7 +59,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto buffer = vs_info.GetSharp(attrib);
|
||||
const auto buffer = attrib.GetSharp(vs_info);
|
||||
if (buffer.GetSize() == 0) {
|
||||
continue;
|
||||
}
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <xxhash.h>
|
||||
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_common.h"
|
||||
|
@ -265,6 +265,7 @@ bool Instance::CreateDevice() {
|
||||
const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
|
||||
list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME);
|
||||
maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME);
|
||||
legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME);
|
||||
|
||||
// These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2
|
||||
// with extensions.
|
||||
@ -403,6 +404,9 @@ bool Instance::CreateDevice() {
|
||||
vk::PhysicalDeviceFragmentShaderBarycentricFeaturesKHR{
|
||||
.fragmentShaderBarycentric = true,
|
||||
},
|
||||
vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT{
|
||||
.legacyVertexAttributes = true,
|
||||
},
|
||||
#ifdef __APPLE__
|
||||
feature_chain.get<vk::PhysicalDevicePortabilitySubsetFeaturesKHR>(),
|
||||
#endif
|
||||
@ -445,6 +449,9 @@ bool Instance::CreateDevice() {
|
||||
if (!fragment_shader_barycentric) {
|
||||
device_chain.unlink<vk::PhysicalDeviceFragmentShaderBarycentricFeaturesKHR>();
|
||||
}
|
||||
if (!legacy_vertex_attributes) {
|
||||
device_chain.unlink<vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT>();
|
||||
}
|
||||
|
||||
auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get());
|
||||
if (device_result != vk::Result::eSuccess) {
|
||||
|
@ -148,10 +148,16 @@ public:
|
||||
return fragment_shader_barycentric;
|
||||
}
|
||||
|
||||
/// Returns true when VK_EXT_primitive_topology_list_restart is supported.
|
||||
bool IsListRestartSupported() const {
|
||||
return list_restart;
|
||||
}
|
||||
|
||||
/// Returns true when VK_EXT_legacy_vertex_attributes is supported.
|
||||
bool IsLegacyVertexAttributesSupported() const {
|
||||
return legacy_vertex_attributes;
|
||||
}
|
||||
|
||||
/// Returns true when geometry shaders are supported by the device
|
||||
bool IsGeometryStageSupported() const {
|
||||
return features.geometryShader;
|
||||
@ -320,6 +326,7 @@ private:
|
||||
bool null_descriptor{};
|
||||
bool maintenance5{};
|
||||
bool list_restart{};
|
||||
bool legacy_vertex_attributes{};
|
||||
u64 min_imported_host_pointer_alignment{};
|
||||
u32 subgroup_size{};
|
||||
bool tooling_info{};
|
||||
|
@ -169,6 +169,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
||||
.support_fp32_denorm_preserve = bool(vk12_props.shaderDenormPreserveFloat32),
|
||||
.support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32),
|
||||
.support_explicit_workgroup_layout = true,
|
||||
.support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(),
|
||||
.needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() &&
|
||||
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
|
||||
};
|
||||
@ -352,7 +353,7 @@ bool PipelineCache::RefreshGraphicsKey() {
|
||||
if (attrib.UsesStepRates()) {
|
||||
continue;
|
||||
}
|
||||
const auto& buffer = vs_info->GetSharp(attrib);
|
||||
const auto& buffer = attrib.GetSharp(*vs_info);
|
||||
if (buffer.GetSize() == 0) {
|
||||
continue;
|
||||
}
|
||||
@ -436,7 +437,7 @@ PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params,
|
||||
Program* program = program_pool.Create(stage, params);
|
||||
auto start = binding;
|
||||
const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding);
|
||||
const auto spec = Shader::StageSpecialization(program->info, runtime_info, start);
|
||||
const auto spec = Shader::StageSpecialization(program->info, runtime_info, profile, start);
|
||||
program->AddPermut(module, std::move(spec));
|
||||
it_pgm.value() = program;
|
||||
return std::make_tuple(&program->info, module, spec.fetch_shader_data,
|
||||
@ -446,7 +447,7 @@ PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params,
|
||||
Program* program = it_pgm->second;
|
||||
auto& info = program->info;
|
||||
info.RefreshFlatBuf();
|
||||
const auto spec = Shader::StageSpecialization(info, runtime_info, binding);
|
||||
const auto spec = Shader::StageSpecialization(info, runtime_info, profile, binding);
|
||||
size_t perm_idx = program->modules.size();
|
||||
vk::ShaderModule module{};
|
||||
|
||||
|
@ -194,7 +194,16 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
||||
BeginRendering(*pipeline, state);
|
||||
UpdateDynamicState(*pipeline);
|
||||
|
||||
const auto [vertex_offset, instance_offset] = vs_info.GetDrawOffsets(regs, fetch_shader);
|
||||
u32 vertex_offset = regs.index_offset;
|
||||
u32 instance_offset = 0;
|
||||
if (fetch_shader) {
|
||||
if (vertex_offset == 0 && fetch_shader->vertex_offset_sgpr != -1) {
|
||||
vertex_offset = vs_info.user_data[fetch_shader->vertex_offset_sgpr];
|
||||
}
|
||||
if (fetch_shader->instance_offset_sgpr != -1) {
|
||||
instance_offset = vs_info.user_data[fetch_shader->instance_offset_sgpr];
|
||||
}
|
||||
}
|
||||
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
|
||||
|
Loading…
Reference in New Issue
Block a user