mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-12-10 05:38:49 +00:00
shader_recompiler: Replace buffer pulling with attribute divisor for instance step rates (#3238)
* shader_recompiler: Replace buffer pulling with attribute divisor for instance step rates * flatten_extended_userdata: Remove special step rate buffer handling * Review comments * spirv_emit_context: Name all instance rate attribs properly * spirv: Merge ReadConstBuffer again template function only has 1 user now * attribute: Add missing attributes * translate: Reimplement step rate instance id * Resolve validation warnings * shader_recompiler: Separate vertex inputs from LS stage, cleanup tess
This commit is contained in:
@@ -198,10 +198,13 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
|
||||
}
|
||||
|
||||
void BufferCache::BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline) {
|
||||
const auto& regs = liverpool->regs;
|
||||
Vulkan::VertexInputs<vk::VertexInputAttributeDescription2EXT> attributes;
|
||||
Vulkan::VertexInputs<vk::VertexInputBindingDescription2EXT> bindings;
|
||||
Vulkan::VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT> divisors;
|
||||
Vulkan::VertexInputs<AmdGpu::Buffer> guest_buffers;
|
||||
pipeline.GetVertexInputs(attributes, bindings, guest_buffers);
|
||||
pipeline.GetVertexInputs(attributes, bindings, divisors, guest_buffers,
|
||||
regs.vgt_instance_step_rate_0, regs.vgt_instance_step_rate_1);
|
||||
|
||||
if (instance.IsVertexInputDynamicState()) {
|
||||
// Update current vertex inputs.
|
||||
|
||||
@@ -72,12 +72,21 @@ GraphicsPipeline::GraphicsPipeline(
|
||||
|
||||
VertexInputs<vk::VertexInputAttributeDescription> vertex_attributes;
|
||||
VertexInputs<vk::VertexInputBindingDescription> vertex_bindings;
|
||||
VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT> divisors;
|
||||
VertexInputs<AmdGpu::Buffer> guest_buffers;
|
||||
if (!instance.IsVertexInputDynamicState()) {
|
||||
GetVertexInputs(vertex_attributes, vertex_bindings, guest_buffers);
|
||||
const auto& vs_info = runtime_infos[u32(Shader::LogicalStage::Vertex)].vs_info;
|
||||
GetVertexInputs(vertex_attributes, vertex_bindings, divisors, guest_buffers,
|
||||
vs_info.step_rate_0, vs_info.step_rate_1);
|
||||
}
|
||||
|
||||
const vk::PipelineVertexInputDivisorStateCreateInfo divisor_state = {
|
||||
.vertexBindingDivisorCount = static_cast<u32>(divisors.size()),
|
||||
.pVertexBindingDivisors = divisors.data(),
|
||||
};
|
||||
|
||||
const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
|
||||
.pNext = divisors.empty() ? nullptr : &divisor_state,
|
||||
.vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size()),
|
||||
.pVertexBindingDescriptions = vertex_bindings.data(),
|
||||
.vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()),
|
||||
@@ -304,19 +313,17 @@ GraphicsPipeline::GraphicsPipeline(
|
||||
GraphicsPipeline::~GraphicsPipeline() = default;
|
||||
|
||||
template <typename Attribute, typename Binding>
|
||||
void GraphicsPipeline::GetVertexInputs(VertexInputs<Attribute>& attributes,
|
||||
VertexInputs<Binding>& bindings,
|
||||
VertexInputs<AmdGpu::Buffer>& guest_buffers) const {
|
||||
void GraphicsPipeline::GetVertexInputs(
|
||||
VertexInputs<Attribute>& attributes, VertexInputs<Binding>& bindings,
|
||||
VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT>& divisors,
|
||||
VertexInputs<AmdGpu::Buffer>& guest_buffers, u32 step_rate_0, u32 step_rate_1) const {
|
||||
using InstanceIdType = Shader::Gcn::VertexAttribute::InstanceIdType;
|
||||
if (!fetch_shader || fetch_shader->attributes.empty()) {
|
||||
return;
|
||||
}
|
||||
const auto& vs_info = GetStage(Shader::LogicalStage::Vertex);
|
||||
for (const auto& attrib : fetch_shader->attributes) {
|
||||
if (attrib.UsesStepRates()) {
|
||||
// Skip attribute binding as the data will be pulled by shader.
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto step_rate = attrib.GetStepRate();
|
||||
const auto& buffer = attrib.GetSharp(vs_info);
|
||||
attributes.push_back(Attribute{
|
||||
.location = attrib.semantic,
|
||||
@@ -327,12 +334,19 @@ void GraphicsPipeline::GetVertexInputs(VertexInputs<Attribute>& attributes,
|
||||
bindings.push_back(Binding{
|
||||
.binding = attrib.semantic,
|
||||
.stride = buffer.GetStride(),
|
||||
.inputRate = attrib.GetStepRate() == Shader::Gcn::VertexAttribute::InstanceIdType::None
|
||||
? vk::VertexInputRate::eVertex
|
||||
: vk::VertexInputRate::eInstance,
|
||||
.inputRate = step_rate == InstanceIdType::None ? vk::VertexInputRate::eVertex
|
||||
: vk::VertexInputRate::eInstance,
|
||||
});
|
||||
const u32 divisor = step_rate == InstanceIdType::OverStepRate0
|
||||
? step_rate_0
|
||||
: (step_rate == InstanceIdType::OverStepRate1 ? step_rate_1 : 1);
|
||||
if constexpr (std::is_same_v<Binding, vk::VertexInputBindingDescription2EXT>) {
|
||||
bindings.back().divisor = 1;
|
||||
bindings.back().divisor = divisor;
|
||||
} else if (step_rate != InstanceIdType::None) {
|
||||
divisors.push_back(vk::VertexInputBindingDivisorDescriptionEXT{
|
||||
.binding = attrib.semantic,
|
||||
.divisor = divisor,
|
||||
});
|
||||
}
|
||||
guest_buffers.emplace_back(buffer);
|
||||
}
|
||||
@@ -342,11 +356,13 @@ void GraphicsPipeline::GetVertexInputs(VertexInputs<Attribute>& attributes,
|
||||
template void GraphicsPipeline::GetVertexInputs(
|
||||
VertexInputs<vk::VertexInputAttributeDescription>& attributes,
|
||||
VertexInputs<vk::VertexInputBindingDescription>& bindings,
|
||||
VertexInputs<AmdGpu::Buffer>& guest_buffers) const;
|
||||
VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT>& divisors,
|
||||
VertexInputs<AmdGpu::Buffer>& guest_buffers, u32 step_rate_0, u32 step_rate_1) const;
|
||||
template void GraphicsPipeline::GetVertexInputs(
|
||||
VertexInputs<vk::VertexInputAttributeDescription2EXT>& attributes,
|
||||
VertexInputs<vk::VertexInputBindingDescription2EXT>& bindings,
|
||||
VertexInputs<AmdGpu::Buffer>& guest_buffers) const;
|
||||
VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT>& divisors,
|
||||
VertexInputs<AmdGpu::Buffer>& guest_buffers, u32 step_rate_0, u32 step_rate_1) const;
|
||||
|
||||
void GraphicsPipeline::BuildDescSetLayout() {
|
||||
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
|
||||
|
||||
@@ -81,7 +81,9 @@ public:
|
||||
/// Gets the attributes and bindings for vertex inputs.
|
||||
template <typename Attribute, typename Binding>
|
||||
void GetVertexInputs(VertexInputs<Attribute>& attributes, VertexInputs<Binding>& bindings,
|
||||
VertexInputs<AmdGpu::Buffer>& guest_buffers) const;
|
||||
VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT>& divisors,
|
||||
VertexInputs<AmdGpu::Buffer>& guest_buffers, u32 step_rate_0,
|
||||
u32 step_rate_1) const;
|
||||
|
||||
private:
|
||||
void BuildDescSetLayout();
|
||||
|
||||
@@ -248,6 +248,7 @@ bool Instance::CreateDevice() {
|
||||
// Required
|
||||
ASSERT(add_extension(VK_KHR_SWAPCHAIN_EXTENSION_NAME));
|
||||
ASSERT(add_extension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME));
|
||||
ASSERT(add_extension(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME));
|
||||
|
||||
// Optional
|
||||
depth_range_unrestricted = add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
|
||||
@@ -436,6 +437,9 @@ bool Instance::CreateDevice() {
|
||||
vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT{
|
||||
.legacyVertexAttributes = true,
|
||||
},
|
||||
vk::PhysicalDeviceVertexAttributeDivisorFeatures{
|
||||
.vertexAttributeInstanceRateDivisor = true,
|
||||
},
|
||||
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT{
|
||||
.shaderBufferFloat32AtomicMinMax =
|
||||
shader_atomic_float2_features.shaderBufferFloat32AtomicMinMax,
|
||||
|
||||
@@ -94,15 +94,10 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
|
||||
switch (stage) {
|
||||
case Stage::Local: {
|
||||
BuildCommon(regs.ls_program);
|
||||
if (regs.stage_enable.IsStageEnabled(static_cast<u32>(Stage::Hull))) {
|
||||
info.ls_info.links_with_tcs = true;
|
||||
Shader::TessellationDataConstantBuffer tess_constants;
|
||||
const auto* pgm = regs.ProgramForStage(static_cast<u32>(Stage::Hull));
|
||||
const auto params = Liverpool::GetParams(*pgm);
|
||||
const auto& hull_info = program_cache.at(params.hash)->info;
|
||||
hull_info.ReadTessConstantBuffer(tess_constants);
|
||||
info.ls_info.ls_stride = tess_constants.ls_stride;
|
||||
}
|
||||
Shader::TessellationDataConstantBuffer tess_constants;
|
||||
const auto* hull_info = infos[u32(Shader::LogicalStage::TessellationControl)];
|
||||
hull_info->ReadTessConstantBuffer(tess_constants);
|
||||
info.ls_info.ls_stride = tess_constants.ls_stride;
|
||||
break;
|
||||
}
|
||||
case Stage::Hull: {
|
||||
@@ -122,6 +117,8 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
|
||||
case Stage::Vertex: {
|
||||
BuildCommon(regs.vs_program);
|
||||
GatherVertexOutputs(info.vs_info, regs.vs_output_control);
|
||||
info.vs_info.step_rate_0 = regs.vgt_instance_step_rate_0;
|
||||
info.vs_info.step_rate_1 = regs.vgt_instance_step_rate_1;
|
||||
info.vs_info.emulate_depth_negative_one_to_one =
|
||||
!instance.IsDepthClipControlSupported() &&
|
||||
regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW;
|
||||
@@ -460,10 +457,6 @@ bool PipelineCache::RefreshGraphicsKey() {
|
||||
// Stride will still be handled outside the pipeline using dynamic state.
|
||||
u32 vertex_binding = 0;
|
||||
for (const auto& attrib : fetch_shader->attributes) {
|
||||
if (attrib.UsesStepRates()) {
|
||||
// Skip attribute binding as the data will be pulled by shader.
|
||||
continue;
|
||||
}
|
||||
const auto& buffer = attrib.GetSharp(*vs_info);
|
||||
ASSERT(vertex_binding < MaxVertexBufferCount);
|
||||
key.vertex_buffer_formats[vertex_binding++] =
|
||||
|
||||
@@ -20,12 +20,9 @@
|
||||
namespace Vulkan {
|
||||
|
||||
static Shader::PushData MakeUserData(const AmdGpu::Liverpool::Regs& regs) {
|
||||
Shader::PushData push_data{};
|
||||
push_data.step0 = regs.vgt_instance_step_rate_0;
|
||||
push_data.step1 = regs.vgt_instance_step_rate_1;
|
||||
|
||||
// TODO(roamic): Add support for multiple viewports and geometry shaders when ViewportIndex
|
||||
// is encountered and implemented in the recompiler.
|
||||
Shader::PushData push_data{};
|
||||
push_data.xoffset = regs.viewport_control.xoffset_enable ? regs.viewports[0].xoffset : 0.f;
|
||||
push_data.xscale = regs.viewport_control.xscale_enable ? regs.viewports[0].xscale : 1.f;
|
||||
push_data.yoffset = regs.viewport_control.yoffset_enable ? regs.viewports[0].yoffset : 0.f;
|
||||
|
||||
Reference in New Issue
Block a user