shader_recompiler: Replace buffer pulling with attribute divisor for instance step rates (#3238)

* shader_recompiler: Replace buffer pulling with attribute divisor for instance step rates

* flatten_extended_userdata: Remove special step rate buffer handling

* Review comments

* spirv_emit_context: Name all instance rate attribs properly

* spirv: Merge ReadConstBuffer again

template function only has 1 user now

* attribute: Add missing attributes

* translate: Reimplement step rate instance id

* Resolve validation warnings

* shader_recompiler: Separate vertex inputs from LS stage, cleanup tess
This commit is contained in:
TheTurtle
2025-07-14 00:32:02 +03:00
committed by GitHub
parent b403e1be33
commit 399a725343
22 changed files with 208 additions and 274 deletions

View File

@@ -198,10 +198,13 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
}
void BufferCache::BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline) {
const auto& regs = liverpool->regs;
Vulkan::VertexInputs<vk::VertexInputAttributeDescription2EXT> attributes;
Vulkan::VertexInputs<vk::VertexInputBindingDescription2EXT> bindings;
Vulkan::VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT> divisors;
Vulkan::VertexInputs<AmdGpu::Buffer> guest_buffers;
pipeline.GetVertexInputs(attributes, bindings, guest_buffers);
pipeline.GetVertexInputs(attributes, bindings, divisors, guest_buffers,
regs.vgt_instance_step_rate_0, regs.vgt_instance_step_rate_1);
if (instance.IsVertexInputDynamicState()) {
// Update current vertex inputs.

View File

@@ -72,12 +72,21 @@ GraphicsPipeline::GraphicsPipeline(
VertexInputs<vk::VertexInputAttributeDescription> vertex_attributes;
VertexInputs<vk::VertexInputBindingDescription> vertex_bindings;
VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT> divisors;
VertexInputs<AmdGpu::Buffer> guest_buffers;
if (!instance.IsVertexInputDynamicState()) {
GetVertexInputs(vertex_attributes, vertex_bindings, guest_buffers);
const auto& vs_info = runtime_infos[u32(Shader::LogicalStage::Vertex)].vs_info;
GetVertexInputs(vertex_attributes, vertex_bindings, divisors, guest_buffers,
vs_info.step_rate_0, vs_info.step_rate_1);
}
const vk::PipelineVertexInputDivisorStateCreateInfo divisor_state = {
.vertexBindingDivisorCount = static_cast<u32>(divisors.size()),
.pVertexBindingDivisors = divisors.data(),
};
const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
.pNext = divisors.empty() ? nullptr : &divisor_state,
.vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size()),
.pVertexBindingDescriptions = vertex_bindings.data(),
.vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()),
@@ -304,19 +313,17 @@ GraphicsPipeline::GraphicsPipeline(
GraphicsPipeline::~GraphicsPipeline() = default;
template <typename Attribute, typename Binding>
void GraphicsPipeline::GetVertexInputs(VertexInputs<Attribute>& attributes,
VertexInputs<Binding>& bindings,
VertexInputs<AmdGpu::Buffer>& guest_buffers) const {
void GraphicsPipeline::GetVertexInputs(
VertexInputs<Attribute>& attributes, VertexInputs<Binding>& bindings,
VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT>& divisors,
VertexInputs<AmdGpu::Buffer>& guest_buffers, u32 step_rate_0, u32 step_rate_1) const {
using InstanceIdType = Shader::Gcn::VertexAttribute::InstanceIdType;
if (!fetch_shader || fetch_shader->attributes.empty()) {
return;
}
const auto& vs_info = GetStage(Shader::LogicalStage::Vertex);
for (const auto& attrib : fetch_shader->attributes) {
if (attrib.UsesStepRates()) {
// Skip attribute binding as the data will be pulled by shader.
continue;
}
const auto step_rate = attrib.GetStepRate();
const auto& buffer = attrib.GetSharp(vs_info);
attributes.push_back(Attribute{
.location = attrib.semantic,
@@ -327,12 +334,19 @@ void GraphicsPipeline::GetVertexInputs(VertexInputs<Attribute>& attributes,
bindings.push_back(Binding{
.binding = attrib.semantic,
.stride = buffer.GetStride(),
.inputRate = attrib.GetStepRate() == Shader::Gcn::VertexAttribute::InstanceIdType::None
? vk::VertexInputRate::eVertex
: vk::VertexInputRate::eInstance,
.inputRate = step_rate == InstanceIdType::None ? vk::VertexInputRate::eVertex
: vk::VertexInputRate::eInstance,
});
const u32 divisor = step_rate == InstanceIdType::OverStepRate0
? step_rate_0
: (step_rate == InstanceIdType::OverStepRate1 ? step_rate_1 : 1);
if constexpr (std::is_same_v<Binding, vk::VertexInputBindingDescription2EXT>) {
bindings.back().divisor = 1;
bindings.back().divisor = divisor;
} else if (step_rate != InstanceIdType::None) {
divisors.push_back(vk::VertexInputBindingDivisorDescriptionEXT{
.binding = attrib.semantic,
.divisor = divisor,
});
}
guest_buffers.emplace_back(buffer);
}
@@ -342,11 +356,13 @@ void GraphicsPipeline::GetVertexInputs(VertexInputs<Attribute>& attributes,
template void GraphicsPipeline::GetVertexInputs(
VertexInputs<vk::VertexInputAttributeDescription>& attributes,
VertexInputs<vk::VertexInputBindingDescription>& bindings,
VertexInputs<AmdGpu::Buffer>& guest_buffers) const;
VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT>& divisors,
VertexInputs<AmdGpu::Buffer>& guest_buffers, u32 step_rate_0, u32 step_rate_1) const;
template void GraphicsPipeline::GetVertexInputs(
VertexInputs<vk::VertexInputAttributeDescription2EXT>& attributes,
VertexInputs<vk::VertexInputBindingDescription2EXT>& bindings,
VertexInputs<AmdGpu::Buffer>& guest_buffers) const;
VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT>& divisors,
VertexInputs<AmdGpu::Buffer>& guest_buffers, u32 step_rate_0, u32 step_rate_1) const;
void GraphicsPipeline::BuildDescSetLayout() {
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;

View File

@@ -81,7 +81,9 @@ public:
/// Gets the attributes and bindings for vertex inputs.
template <typename Attribute, typename Binding>
void GetVertexInputs(VertexInputs<Attribute>& attributes, VertexInputs<Binding>& bindings,
VertexInputs<AmdGpu::Buffer>& guest_buffers) const;
VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT>& divisors,
VertexInputs<AmdGpu::Buffer>& guest_buffers, u32 step_rate_0,
u32 step_rate_1) const;
private:
void BuildDescSetLayout();

View File

@@ -248,6 +248,7 @@ bool Instance::CreateDevice() {
// Required
ASSERT(add_extension(VK_KHR_SWAPCHAIN_EXTENSION_NAME));
ASSERT(add_extension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME));
ASSERT(add_extension(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME));
// Optional
depth_range_unrestricted = add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
@@ -436,6 +437,9 @@ bool Instance::CreateDevice() {
vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT{
.legacyVertexAttributes = true,
},
vk::PhysicalDeviceVertexAttributeDivisorFeatures{
.vertexAttributeInstanceRateDivisor = true,
},
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT{
.shaderBufferFloat32AtomicMinMax =
shader_atomic_float2_features.shaderBufferFloat32AtomicMinMax,

View File

@@ -94,15 +94,10 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
switch (stage) {
case Stage::Local: {
BuildCommon(regs.ls_program);
if (regs.stage_enable.IsStageEnabled(static_cast<u32>(Stage::Hull))) {
info.ls_info.links_with_tcs = true;
Shader::TessellationDataConstantBuffer tess_constants;
const auto* pgm = regs.ProgramForStage(static_cast<u32>(Stage::Hull));
const auto params = Liverpool::GetParams(*pgm);
const auto& hull_info = program_cache.at(params.hash)->info;
hull_info.ReadTessConstantBuffer(tess_constants);
info.ls_info.ls_stride = tess_constants.ls_stride;
}
Shader::TessellationDataConstantBuffer tess_constants;
const auto* hull_info = infos[u32(Shader::LogicalStage::TessellationControl)];
hull_info->ReadTessConstantBuffer(tess_constants);
info.ls_info.ls_stride = tess_constants.ls_stride;
break;
}
case Stage::Hull: {
@@ -122,6 +117,8 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
case Stage::Vertex: {
BuildCommon(regs.vs_program);
GatherVertexOutputs(info.vs_info, regs.vs_output_control);
info.vs_info.step_rate_0 = regs.vgt_instance_step_rate_0;
info.vs_info.step_rate_1 = regs.vgt_instance_step_rate_1;
info.vs_info.emulate_depth_negative_one_to_one =
!instance.IsDepthClipControlSupported() &&
regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW;
@@ -460,10 +457,6 @@ bool PipelineCache::RefreshGraphicsKey() {
// Stride will still be handled outside the pipeline using dynamic state.
u32 vertex_binding = 0;
for (const auto& attrib : fetch_shader->attributes) {
if (attrib.UsesStepRates()) {
// Skip attribute binding as the data will be pulled by shader.
continue;
}
const auto& buffer = attrib.GetSharp(*vs_info);
ASSERT(vertex_binding < MaxVertexBufferCount);
key.vertex_buffer_formats[vertex_binding++] =

View File

@@ -20,12 +20,9 @@
namespace Vulkan {
static Shader::PushData MakeUserData(const AmdGpu::Liverpool::Regs& regs) {
Shader::PushData push_data{};
push_data.step0 = regs.vgt_instance_step_rate_0;
push_data.step1 = regs.vgt_instance_step_rate_1;
// TODO(roamic): Add support for multiple viewports and geometry shaders when ViewportIndex
// is encountered and implemented in the recompiler.
Shader::PushData push_data{};
push_data.xoffset = regs.viewport_control.xoffset_enable ? regs.viewports[0].xoffset : 0.f;
push_data.xscale = regs.viewport_control.xscale_enable ? regs.viewports[0].xscale : 1.f;
push_data.yoffset = regs.viewport_control.yoffset_enable ? regs.viewports[0].yoffset : 0.f;