renderer_vulkan: Parse fetch shader per-pipeline

This commit is contained in:
squidbus 2024-12-02 19:37:37 -08:00
parent 8ac7ffd859
commit eda3629114
13 changed files with 176 additions and 127 deletions

View File

@ -280,34 +280,42 @@ void EmitContext::DefineInputs() {
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input); base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input); instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
for (const auto& input : info.vs_inputs) { const auto fetch_shader = info.LoadFetchShader();
ASSERT(input.binding < IR::NumParams); if (!fetch_shader) {
const auto sharp = input.GetSharp(info); break;
}
for (const auto& attrib : fetch_shader->attributes) {
ASSERT(attrib.semantic < IR::NumParams);
const auto sharp = info.GetSharp(attrib);
const Id type{GetAttributeType(*this, sharp.GetNumberFmt())[4]}; const Id type{GetAttributeType(*this, sharp.GetNumberFmt())[4]};
if (input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 || if (attrib.UsesStepRates()) {
input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate1) {
const u32 rate_idx = const u32 rate_idx =
input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ? 0 attrib.GetStepRate() == Gcn::VertexAttribute::InstanceIdType::OverStepRate0 ? 0
: 1; : 1;
const u32 num_components = AmdGpu::NumComponents(sharp.GetDataFmt());
const auto buffer =
std::ranges::find_if(info.buffers, [&attrib](const auto& buffer) {
return buffer.instance_attrib == attrib.semantic;
});
// Note that we pass index rather than Id // Note that we pass index rather than Id
input_params[input.binding] = SpirvAttribute{ input_params[attrib.semantic] = SpirvAttribute{
.id = rate_idx, .id = rate_idx,
.pointer_type = input_u32, .pointer_type = input_u32,
.component_type = U32[1], .component_type = U32[1],
.num_components = input.num_components, .num_components = std::min<u16>(attrib.num_elements, num_components),
.is_integer = true, .is_integer = true,
.is_loaded = false, .is_loaded = false,
.buffer_handle = input.instance_data_buf, .buffer_handle = int(buffer - info.buffers.begin()),
}; };
} else { } else {
Id id{DefineInput(type, input.binding)}; Id id{DefineInput(type, attrib.semantic)};
if (input.instance_step_rate == Info::VsInput::InstanceIdType::Plain) { if (attrib.GetStepRate() == Gcn::VertexAttribute::InstanceIdType::Plain) {
Name(id, fmt::format("vs_instance_attr{}", input.binding)); Name(id, fmt::format("vs_instance_attr{}", attrib.semantic));
} else { } else {
Name(id, fmt::format("vs_in_attr{}", input.binding)); Name(id, fmt::format("vs_in_attr{}", attrib.semantic));
} }
input_params[input.binding] = GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false); input_params[attrib.semantic] =
GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false);
interfaces.push_back(id); interfaces.push_back(id);
} }
} }

View File

@ -34,8 +34,8 @@ namespace Shader::Gcn {
* We take the reverse way, extract the original input semantics from these instructions. * We take the reverse way, extract the original input semantics from these instructions.
**/ **/
FetchShaderData ParseFetchShader(const u32* code, u32* out_size) { FetchShaderData ParseFetchShader(const u32* code) {
FetchShaderData data{}; FetchShaderData data{.code = code};
GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max()); GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max());
GcnDecodeContext decoder; GcnDecodeContext decoder;
@ -49,7 +49,7 @@ FetchShaderData ParseFetchShader(const u32* code, u32* out_size) {
u32 semantic_index = 0; u32 semantic_index = 0;
while (!code_slice.atEnd()) { while (!code_slice.atEnd()) {
const auto inst = decoder.decodeInstruction(code_slice); const auto inst = decoder.decodeInstruction(code_slice);
*out_size += inst.length; data.size += inst.length;
if (inst.opcode == Opcode::S_SETPC_B64) { if (inst.opcode == Opcode::S_SETPC_B64) {
break; break;

View File

@ -3,26 +3,62 @@
#pragma once #pragma once
#include <ranges>
#include <vector> #include <vector>
#include "common/types.h" #include "common/types.h"
namespace Shader::Gcn { namespace Shader::Gcn {
struct VertexAttribute { struct VertexAttribute {
enum InstanceIdType : u8 {
None = 0,
OverStepRate0 = 1,
OverStepRate1 = 2,
Plain = 3,
};
u8 semantic; ///< Semantic index of the attribute u8 semantic; ///< Semantic index of the attribute
u8 dest_vgpr; ///< Destination VGPR to load first component. u8 dest_vgpr; ///< Destination VGPR to load first component.
u8 num_elements; ///< Number of components to load u8 num_elements; ///< Number of components to load
u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V# u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V#
u8 dword_offset; ///< The dword offset of the V# that describes this attribute. u8 dword_offset; ///< The dword offset of the V# that describes this attribute.
u8 instance_data; ///< Indicates that the buffer will be accessed in instance rate u8 instance_data; ///< Indicates that the buffer will be accessed in instance rate
[[nodiscard]] InstanceIdType GetStepRate() const {
return static_cast<InstanceIdType>(instance_data);
}
[[nodiscard]] bool UsesStepRates() const {
const auto step_rate = GetStepRate();
return step_rate == OverStepRate0 || step_rate == OverStepRate1;
}
bool operator==(const VertexAttribute& other) const {
return semantic == other.semantic && dest_vgpr == other.dest_vgpr &&
num_elements == other.num_elements && sgpr_base == other.sgpr_base &&
dword_offset == other.dword_offset && instance_data == other.instance_data;
}
}; };
struct FetchShaderData { struct FetchShaderData {
const u32* code;
u32 size = 0;
std::vector<VertexAttribute> attributes; std::vector<VertexAttribute> attributes;
s8 vertex_offset_sgpr = -1; ///< SGPR of vertex offset from VADDR s8 vertex_offset_sgpr = -1; ///< SGPR of vertex offset from VADDR
s8 instance_offset_sgpr = -1; ///< SGPR of instance offset from VADDR s8 instance_offset_sgpr = -1; ///< SGPR of instance offset from VADDR
[[nodiscard]] bool UsesStepRates() const {
return std::ranges::find_if(attributes, [](const VertexAttribute& attribute) {
return attribute.UsesStepRates();
}) != attributes.end();
}
bool operator==(const FetchShaderData& other) const {
return attributes == other.attributes && vertex_offset_sgpr == other.vertex_offset_sgpr &&
instance_offset_sgpr == other.instance_offset_sgpr;
}
}; };
FetchShaderData ParseFetchShader(const u32* code, u32* out_size); FetchShaderData ParseFetchShader(const u32* code);
} // namespace Shader::Gcn } // namespace Shader::Gcn

View File

@ -368,13 +368,9 @@ void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_ra
void Translator::EmitFetch(const GcnInst& inst) { void Translator::EmitFetch(const GcnInst& inst) {
// Read the pointer to the fetch shader assembly. // Read the pointer to the fetch shader assembly.
const u32 sgpr_base = inst.src[0].code; info.has_fetch_shader = true;
const u32* code; info.fetch_shader_sgpr_base = inst.src[0].code;
std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code)); const auto fetch_data = info.LoadFetchShader();
// Parse the assembly to generate a list of attributes.
u32 fetch_size{};
const auto fetch_data = ParseFetchShader(code, &fetch_size);
if (Config::dumpShaders()) { if (Config::dumpShaders()) {
using namespace Common::FS; using namespace Common::FS;
@ -384,13 +380,10 @@ void Translator::EmitFetch(const GcnInst& inst) {
} }
const auto filename = fmt::format("vs_{:#018x}.fetch.bin", info.pgm_hash); const auto filename = fmt::format("vs_{:#018x}.fetch.bin", info.pgm_hash);
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write}; const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
file.WriteRaw<u8>(code, fetch_size); file.WriteRaw<u8>(fetch_data->code, fetch_data->size);
} }
info.vertex_offset_sgpr = fetch_data.vertex_offset_sgpr; for (const auto& attrib : fetch_data->attributes) {
info.instance_offset_sgpr = fetch_data.instance_offset_sgpr;
for (const auto& attrib : fetch_data.attributes) {
const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic}; const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic};
IR::VectorReg dst_reg{attrib.dest_vgpr}; IR::VectorReg dst_reg{attrib.dest_vgpr};
@ -420,28 +413,14 @@ void Translator::EmitFetch(const GcnInst& inst) {
// In case of programmable step rates we need to fallback to instance data pulling in // In case of programmable step rates we need to fallback to instance data pulling in
// shader, so VBs should be bound as regular data buffers // shader, so VBs should be bound as regular data buffers
s32 instance_buf_handle = -1; if (attrib.UsesStepRates()) {
const auto step_rate = static_cast<Info::VsInput::InstanceIdType>(attrib.instance_data);
if (step_rate == Info::VsInput::OverStepRate0 ||
step_rate == Info::VsInput::OverStepRate1) {
info.buffers.push_back({ info.buffers.push_back({
.sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4), .sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4),
.used_types = IR::Type::F32, .used_types = IR::Type::F32,
.is_instance_data = true, .is_instance_data = true,
.instance_attrib = attrib.semantic,
}); });
instance_buf_handle = s32(info.buffers.size() - 1);
info.uses_step_rates = true;
} }
const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt());
info.vs_inputs.push_back({
.binding = attrib.semantic,
.num_components = std::min<u16>(attrib.num_elements, num_components),
.sgpr_base = attrib.sgpr_base,
.dword_offset = attrib.dword_offset,
.instance_step_rate = step_rate,
.instance_data_buf = instance_buf_handle,
});
} }
} }

View File

@ -9,6 +9,7 @@
#include <boost/container/static_vector.hpp> #include <boost/container/static_vector.hpp>
#include "common/assert.h" #include "common/assert.h"
#include "common/types.h" #include "common/types.h"
#include "frontend/fetch_shader.h"
#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/backend/bindings.h"
#include "shader_recompiler/frontend/copy_shader.h" #include "shader_recompiler/frontend/copy_shader.h"
#include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/attribute.h"
@ -45,6 +46,7 @@ struct BufferResource {
AmdGpu::Buffer inline_cbuf; AmdGpu::Buffer inline_cbuf;
bool is_gds_buffer{}; bool is_gds_buffer{};
bool is_instance_data{}; bool is_instance_data{};
u8 instance_attrib{};
bool is_written{}; bool is_written{};
bool IsStorage(AmdGpu::Buffer buffer) const noexcept { bool IsStorage(AmdGpu::Buffer buffer) const noexcept {
@ -112,27 +114,6 @@ static_assert(sizeof(PushData) <= 128,
* Contains general information generated by the shader recompiler for an input program. * Contains general information generated by the shader recompiler for an input program.
*/ */
struct Info { struct Info {
struct VsInput {
enum InstanceIdType : u8 {
None = 0,
OverStepRate0 = 1,
OverStepRate1 = 2,
Plain = 3,
};
u16 binding;
u16 num_components;
u8 sgpr_base;
u8 dword_offset;
InstanceIdType instance_step_rate;
s32 instance_data_buf;
[[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept {
return info.ReadUdReg<AmdGpu::Buffer>(sgpr_base, dword_offset);
}
};
boost::container::static_vector<VsInput, 32> vs_inputs{};
struct AttributeFlags { struct AttributeFlags {
bool Get(IR::Attribute attrib, u32 comp = 0) const { bool Get(IR::Attribute attrib, u32 comp = 0) const {
return flags[Index(attrib)] & (1 << comp); return flags[Index(attrib)] & (1 << comp);
@ -179,9 +160,6 @@ struct Info {
CopyShaderData gs_copy_data; CopyShaderData gs_copy_data;
s8 vertex_offset_sgpr = -1;
s8 instance_offset_sgpr = -1;
BufferResourceList buffers; BufferResourceList buffers;
TextureBufferResourceList texture_buffers; TextureBufferResourceList texture_buffers;
ImageResourceList images; ImageResourceList images;
@ -208,10 +186,11 @@ struct Info {
bool uses_shared{}; bool uses_shared{};
bool uses_fp16{}; bool uses_fp16{};
bool uses_fp64{}; bool uses_fp64{};
bool uses_step_rates{};
bool translation_failed{}; // indicates that shader has unsupported instructions bool translation_failed{}; // indicates that shader has unsupported instructions
bool has_readconst{}; bool has_readconst{};
u8 mrt_mask{0u}; u8 mrt_mask{0u};
bool has_fetch_shader{false};
u32 fetch_shader_sgpr_base{0u};
explicit Info(Stage stage_, ShaderParams params) explicit Info(Stage stage_, ShaderParams params)
: stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()}, : stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
@ -252,14 +231,18 @@ struct Info {
bnd.user_data += ud_mask.NumRegs(); bnd.user_data += ud_mask.NumRegs();
} }
[[nodiscard]] std::pair<u32, u32> GetDrawOffsets(const AmdGpu::Liverpool::Regs& regs) const { [[nodiscard]] std::pair<u32, u32> GetDrawOffsets(
const AmdGpu::Liverpool::Regs& regs,
const std::optional<Gcn::FetchShaderData>& fetch_shader) const {
u32 vertex_offset = regs.index_offset; u32 vertex_offset = regs.index_offset;
u32 instance_offset = 0; u32 instance_offset = 0;
if (vertex_offset == 0 && vertex_offset_sgpr != -1) { if (fetch_shader) {
vertex_offset = user_data[vertex_offset_sgpr]; if (vertex_offset == 0 && fetch_shader->vertex_offset_sgpr != -1) {
} vertex_offset = user_data[fetch_shader->vertex_offset_sgpr];
if (instance_offset_sgpr != -1) { }
instance_offset = user_data[instance_offset_sgpr]; if (fetch_shader->instance_offset_sgpr != -1) {
instance_offset = user_data[fetch_shader->instance_offset_sgpr];
}
} }
return {vertex_offset, instance_offset}; return {vertex_offset, instance_offset};
} }
@ -273,6 +256,20 @@ struct Info {
srt_info.walker_func(user_data.data(), flattened_ud_buf.data()); srt_info.walker_func(user_data.data(), flattened_ud_buf.data());
} }
} }
[[nodiscard]] std::optional<Gcn::FetchShaderData> LoadFetchShader() const {
if (!has_fetch_shader) {
return std::nullopt;
}
const u32* code;
std::memcpy(&code, &user_data[fetch_shader_sgpr_base], sizeof(code));
return Gcn::ParseFetchShader(code);
}
[[nodiscard]] constexpr AmdGpu::Buffer GetSharp(
const Gcn::VertexAttribute& attrib) const noexcept {
return ReadUdReg<AmdGpu::Buffer>(attrib.sgpr_base, attrib.dword_offset);
}
}; };
constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept { constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept {

View File

@ -6,6 +6,7 @@
#include <bitset> #include <bitset>
#include "common/types.h" #include "common/types.h"
#include "frontend/fetch_shader.h"
#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/backend/bindings.h"
#include "shader_recompiler/info.h" #include "shader_recompiler/info.h"
#include "shader_recompiler/ir/passes/srt.h" #include "shader_recompiler/ir/passes/srt.h"
@ -50,6 +51,7 @@ struct StageSpecialization {
const Shader::Info* info; const Shader::Info* info;
RuntimeInfo runtime_info; RuntimeInfo runtime_info;
Gcn::FetchShaderData fetch_shader_data{};
std::bitset<MaxStageResources> bitset{}; std::bitset<MaxStageResources> bitset{};
boost::container::small_vector<BufferSpecialization, 16> buffers; boost::container::small_vector<BufferSpecialization, 16> buffers;
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers; boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
@ -60,6 +62,9 @@ struct StageSpecialization {
explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_, explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_,
Backend::Bindings start_) Backend::Bindings start_)
: info{&info_}, runtime_info{runtime_info_}, start{start_} { : info{&info_}, runtime_info{runtime_info_}, start{start_} {
if (const auto fetch_shader = info_.LoadFetchShader()) {
fetch_shader_data = *fetch_shader;
}
u32 binding{}; u32 binding{};
if (info->has_readconst) { if (info->has_readconst) {
binding++; binding++;
@ -105,6 +110,9 @@ struct StageSpecialization {
if (runtime_info != other.runtime_info) { if (runtime_info != other.runtime_info) {
return false; return false;
} }
if (fetch_shader_data != other.fetch_shader_data) {
return false;
}
u32 binding{}; u32 binding{};
if (info->has_readconst != other.info->has_readconst) { if (info->has_readconst != other.info->has_readconst) {
return false; return false;

View File

@ -5,6 +5,7 @@
#include "common/alignment.h" #include "common/alignment.h"
#include "common/scope_exit.h" #include "common/scope_exit.h"
#include "common/types.h" #include "common/types.h"
#include "shader_recompiler/frontend/fetch_shader.h"
#include "shader_recompiler/info.h" #include "shader_recompiler/info.h"
#include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/liverpool.h"
#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/buffer_cache/buffer_cache.h"
@ -107,7 +108,8 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
} }
} }
bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) { bool BufferCache::BindVertexBuffers(
const Shader::Info& vs_info, const std::optional<Shader::Gcn::FetchShaderData>& fetch_shader) {
boost::container::small_vector<vk::VertexInputAttributeDescription2EXT, 16> attributes; boost::container::small_vector<vk::VertexInputAttributeDescription2EXT, 16> attributes;
boost::container::small_vector<vk::VertexInputBindingDescription2EXT, 16> bindings; boost::container::small_vector<vk::VertexInputBindingDescription2EXT, 16> bindings;
SCOPE_EXIT { SCOPE_EXIT {
@ -126,7 +128,7 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
} }
}; };
if (vs_info.vs_inputs.empty()) { if (!fetch_shader || fetch_shader->attributes.empty()) {
return false; return false;
} }
@ -150,30 +152,29 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
// Calculate buffers memory overlaps // Calculate buffers memory overlaps
bool has_step_rate = false; bool has_step_rate = false;
boost::container::static_vector<BufferRange, NumVertexBuffers> ranges{}; boost::container::static_vector<BufferRange, NumVertexBuffers> ranges{};
for (const auto& input : vs_info.vs_inputs) { for (const auto& attrib : fetch_shader->attributes) {
if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 || if (attrib.UsesStepRates()) {
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
has_step_rate = true; has_step_rate = true;
continue; continue;
} }
const auto& buffer = input.GetSharp(vs_info); const auto& buffer = vs_info.GetSharp(attrib);
if (buffer.GetSize() == 0) { if (buffer.GetSize() == 0) {
continue; continue;
} }
guest_buffers.emplace_back(buffer); guest_buffers.emplace_back(buffer);
ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize()); ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize());
attributes.push_back({ attributes.push_back({
.location = input.binding, .location = attrib.semantic,
.binding = input.binding, .binding = attrib.semantic,
.format = .format =
Vulkan::LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()), Vulkan::LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()),
.offset = 0, .offset = 0,
}); });
bindings.push_back({ bindings.push_back({
.binding = input.binding, .binding = attrib.semantic,
.stride = buffer.GetStride(), .stride = buffer.GetStride(),
.inputRate = input.instance_step_rate == Shader::Info::VsInput::None .inputRate = attrib.GetStepRate() == Shader::Gcn::VertexAttribute::InstanceIdType::None
? vk::VertexInputRate::eVertex ? vk::VertexInputRate::eVertex
: vk::VertexInputRate::eInstance, : vk::VertexInputRate::eInstance,
.divisor = 1, .divisor = 1,

View File

@ -20,8 +20,11 @@ struct Liverpool;
} }
namespace Shader { namespace Shader {
struct Info; namespace Gcn {
struct FetchShaderData;
} }
struct Info;
} // namespace Shader
namespace VideoCore { namespace VideoCore {
@ -76,7 +79,8 @@ public:
void InvalidateMemory(VAddr device_addr, u64 size); void InvalidateMemory(VAddr device_addr, u64 size);
/// Binds host vertex buffers for the current draw. /// Binds host vertex buffers for the current draw.
bool BindVertexBuffers(const Shader::Info& vs_info); bool BindVertexBuffers(const Shader::Info& vs_info,
const std::optional<Shader::Gcn::FetchShaderData>& fetch_shader);
/// Bind host index buffer for the current draw. /// Bind host index buffer for the current draw.
u32 BindIndexBuffer(bool& is_indexed, u32 index_offset); u32 BindIndexBuffer(bool& is_indexed, u32 index_offset);

View File

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm> #include <algorithm>
#include <utility>
#include <boost/container/small_vector.hpp> #include <boost/container/small_vector.hpp>
#include <boost/container/static_vector.hpp> #include <boost/container/static_vector.hpp>
@ -10,6 +11,8 @@
#include "video_core/amdgpu/resource.h" #include "video_core/amdgpu/resource.h"
#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "shader_recompiler/frontend/fetch_shader.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/texture_cache.h"
@ -20,8 +23,10 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
DescriptorHeap& desc_heap_, const GraphicsPipelineKey& key_, DescriptorHeap& desc_heap_, const GraphicsPipelineKey& key_,
vk::PipelineCache pipeline_cache, vk::PipelineCache pipeline_cache,
std::span<const Shader::Info*, MaxShaderStages> infos, std::span<const Shader::Info*, MaxShaderStages> infos,
std::optional<const Shader::Gcn::FetchShaderData> fetch_shader_,
std::span<const vk::ShaderModule> modules) std::span<const vk::ShaderModule> modules)
: Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache}, key{key_} { : Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache}, key{key_},
fetch_shader{std::move(fetch_shader_)} {
const vk::Device device = instance.GetDevice(); const vk::Device device = instance.GetDevice();
std::ranges::copy(infos, stages.begin()); std::ranges::copy(infos, stages.begin());
BuildDescSetLayout(); BuildDescSetLayout();
@ -46,31 +51,31 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
boost::container::static_vector<vk::VertexInputBindingDescription, 32> vertex_bindings; boost::container::static_vector<vk::VertexInputBindingDescription, 32> vertex_bindings;
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> vertex_attributes; boost::container::static_vector<vk::VertexInputAttributeDescription, 32> vertex_attributes;
if (!instance.IsVertexInputDynamicState()) { if (fetch_shader && !instance.IsVertexInputDynamicState()) {
const auto& vs_info = stages[u32(Shader::Stage::Vertex)]; const auto& vs_info = GetStage(Shader::Stage::Vertex);
for (const auto& input : vs_info->vs_inputs) { for (const auto& attrib : fetch_shader->attributes) {
if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 || if (attrib.UsesStepRates()) {
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
// Skip attribute binding as the data will be pulled by shader // Skip attribute binding as the data will be pulled by shader
continue; continue;
} }
const auto buffer = input.GetSharp(*vs_info); const auto buffer = vs_info.GetSharp(attrib);
if (buffer.GetSize() == 0) { if (buffer.GetSize() == 0) {
continue; continue;
} }
vertex_attributes.push_back({ vertex_attributes.push_back({
.location = input.binding, .location = attrib.semantic,
.binding = input.binding, .binding = attrib.semantic,
.format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()), .format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()),
.offset = 0, .offset = 0,
}); });
vertex_bindings.push_back({ vertex_bindings.push_back({
.binding = input.binding, .binding = attrib.semantic,
.stride = buffer.GetStride(), .stride = buffer.GetStride(),
.inputRate = input.instance_step_rate == Shader::Info::VsInput::None .inputRate =
? vk::VertexInputRate::eVertex attrib.GetStepRate() == Shader::Gcn::VertexAttribute::InstanceIdType::None
: vk::VertexInputRate::eInstance, ? vk::VertexInputRate::eVertex
: vk::VertexInputRate::eInstance,
}); });
} }
} }

View File

@ -59,9 +59,14 @@ public:
GraphicsPipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap, GraphicsPipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap,
const GraphicsPipelineKey& key, vk::PipelineCache pipeline_cache, const GraphicsPipelineKey& key, vk::PipelineCache pipeline_cache,
std::span<const Shader::Info*, MaxShaderStages> stages, std::span<const Shader::Info*, MaxShaderStages> stages,
std::optional<const Shader::Gcn::FetchShaderData> fetch_shader,
std::span<const vk::ShaderModule> modules); std::span<const vk::ShaderModule> modules);
~GraphicsPipeline(); ~GraphicsPipeline();
const std::optional<const Shader::Gcn::FetchShaderData>& GetFetchShader() const noexcept {
return fetch_shader;
}
bool IsEmbeddedVs() const noexcept { bool IsEmbeddedVs() const noexcept {
static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f; static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f;
return key.stage_hashes[u32(Shader::Stage::Vertex)] == EmbeddedVsHash; return key.stage_hashes[u32(Shader::Stage::Vertex)] == EmbeddedVsHash;
@ -94,6 +99,7 @@ private:
private: private:
GraphicsPipelineKey key; GraphicsPipelineKey key;
std::optional<const Shader::Gcn::FetchShaderData> fetch_shader{};
}; };
} // namespace Vulkan } // namespace Vulkan

View File

@ -187,7 +187,7 @@ const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() {
const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key); const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key);
if (is_new) { if (is_new) {
it.value() = graphics_pipeline_pool.Create(instance, scheduler, desc_heap, graphics_key, it.value() = graphics_pipeline_pool.Create(instance, scheduler, desc_heap, graphics_key,
*pipeline_cache, infos, modules); *pipeline_cache, infos, fetch_shader, modules);
} }
return it->second; return it->second;
} }
@ -304,8 +304,8 @@ bool PipelineCache::RefreshGraphicsKey() {
} }
auto params = Liverpool::GetParams(*pgm); auto params = Liverpool::GetParams(*pgm);
std::tie(infos[stage_out_idx], modules[stage_out_idx], key.stage_hashes[stage_out_idx]) = std::tie(infos[stage_out_idx], modules[stage_out_idx], fetch_shader,
GetProgram(stage_in, params, binding); key.stage_hashes[stage_out_idx]) = GetProgram(stage_in, params, binding);
return true; return true;
}; };
@ -341,15 +341,14 @@ bool PipelineCache::RefreshGraphicsKey() {
} }
} }
const auto* vs_info = infos[static_cast<u32>(Shader::Stage::Vertex)]; const auto vs_info = infos[static_cast<u32>(Shader::Stage::Vertex)];
if (vs_info && !instance.IsVertexInputDynamicState()) { if (vs_info && fetch_shader && !instance.IsVertexInputDynamicState()) {
u32 vertex_binding = 0; u32 vertex_binding = 0;
for (const auto& input : vs_info->vs_inputs) { for (const auto& attrib : fetch_shader->attributes) {
if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 || if (attrib.UsesStepRates()) {
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
continue; continue;
} }
const auto& buffer = input.GetSharp(*vs_info); const auto& buffer = vs_info->GetSharp(attrib);
if (buffer.GetSize() == 0) { if (buffer.GetSize() == 0) {
continue; continue;
} }
@ -393,7 +392,7 @@ bool PipelineCache::RefreshComputeKey() {
Shader::Backend::Bindings binding{}; Shader::Backend::Bindings binding{};
const auto* cs_pgm = &liverpool->regs.cs_program; const auto* cs_pgm = &liverpool->regs.cs_program;
const auto cs_params = Liverpool::GetParams(*cs_pgm); const auto cs_params = Liverpool::GetParams(*cs_pgm);
std::tie(infos[0], modules[0], compute_key) = std::tie(infos[0], modules[0], fetch_shader, compute_key) =
GetProgram(Shader::Stage::Compute, cs_params, binding); GetProgram(Shader::Stage::Compute, cs_params, binding);
return true; return true;
} }
@ -424,8 +423,9 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info,
return module; return module;
} }
std::tuple<const Shader::Info*, vk::ShaderModule, u64> PipelineCache::GetProgram( std::tuple<const Shader::Info*, vk::ShaderModule, std::optional<Shader::Gcn::FetchShaderData>, u64>
Shader::Stage stage, Shader::ShaderParams params, Shader::Backend::Bindings& binding) { PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params,
Shader::Backend::Bindings& binding) {
const auto runtime_info = BuildRuntimeInfo(stage); const auto runtime_info = BuildRuntimeInfo(stage);
auto [it_pgm, new_program] = program_cache.try_emplace(params.hash); auto [it_pgm, new_program] = program_cache.try_emplace(params.hash);
if (new_program) { if (new_program) {
@ -435,7 +435,8 @@ std::tuple<const Shader::Info*, vk::ShaderModule, u64> PipelineCache::GetProgram
const auto spec = Shader::StageSpecialization(program->info, runtime_info, start); const auto spec = Shader::StageSpecialization(program->info, runtime_info, start);
program->AddPermut(module, std::move(spec)); program->AddPermut(module, std::move(spec));
it_pgm.value() = program; it_pgm.value() = program;
return std::make_tuple(&program->info, module, HashCombine(params.hash, 0)); return std::make_tuple(&program->info, module, spec.fetch_shader_data,
HashCombine(params.hash, 0));
} }
Program* program = it_pgm->second; Program* program = it_pgm->second;
@ -455,7 +456,8 @@ std::tuple<const Shader::Info*, vk::ShaderModule, u64> PipelineCache::GetProgram
module = it->module; module = it->module;
perm_idx = std::distance(program->modules.begin(), it); perm_idx = std::distance(program->modules.begin(), it);
} }
return std::make_tuple(&info, module, HashCombine(params.hash, perm_idx)); return std::make_tuple(&info, module, spec.fetch_shader_data,
HashCombine(params.hash, perm_idx));
} }
void PipelineCache::DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage, void PipelineCache::DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage,

View File

@ -47,8 +47,10 @@ public:
const ComputePipeline* GetComputePipeline(); const ComputePipeline* GetComputePipeline();
std::tuple<const Shader::Info*, vk::ShaderModule, u64> GetProgram( std::tuple<const Shader::Info*, vk::ShaderModule, std::optional<Shader::Gcn::FetchShaderData>,
Shader::Stage stage, Shader::ShaderParams params, Shader::Backend::Bindings& binding); u64>
GetProgram(Shader::Stage stage, Shader::ShaderParams params,
Shader::Backend::Bindings& binding);
private: private:
bool RefreshGraphicsKey(); bool RefreshGraphicsKey();
@ -80,6 +82,7 @@ private:
tsl::robin_map<GraphicsPipelineKey, GraphicsPipeline*> graphics_pipelines; tsl::robin_map<GraphicsPipelineKey, GraphicsPipeline*> graphics_pipelines;
std::array<const Shader::Info*, MaxShaderStages> infos{}; std::array<const Shader::Info*, MaxShaderStages> infos{};
std::array<vk::ShaderModule, MaxShaderStages> modules{}; std::array<vk::ShaderModule, MaxShaderStages> modules{};
std::optional<Shader::Gcn::FetchShaderData> fetch_shader{};
GraphicsPipelineKey graphics_key{}; GraphicsPipelineKey graphics_key{};
u64 compute_key{}; u64 compute_key{};
}; };

View File

@ -187,13 +187,14 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
} }
const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex); const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex);
buffer_cache.BindVertexBuffers(vs_info); const auto& fetch_shader = pipeline->GetFetchShader();
buffer_cache.BindVertexBuffers(vs_info, fetch_shader);
const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset); const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset);
BeginRendering(*pipeline, state); BeginRendering(*pipeline, state);
UpdateDynamicState(*pipeline); UpdateDynamicState(*pipeline);
const auto [vertex_offset, instance_offset] = vs_info.GetDrawOffsets(regs); const auto [vertex_offset, instance_offset] = vs_info.GetDrawOffsets(regs, fetch_shader);
const auto cmdbuf = scheduler.CommandBuffer(); const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
@ -243,7 +244,8 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
} }
const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex); const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex);
buffer_cache.BindVertexBuffers(vs_info); const auto& fetch_shader = pipeline->GetFetchShader();
buffer_cache.BindVertexBuffers(vs_info, fetch_shader);
buffer_cache.BindIndexBuffer(is_indexed, 0); buffer_cache.BindIndexBuffer(is_indexed, 0);
const auto& [buffer, base] = const auto& [buffer, base] =
@ -397,10 +399,8 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
if (!stage) { if (!stage) {
continue; continue;
} }
if (stage->uses_step_rates) { push_data.step0 = regs.vgt_instance_step_rate_0;
push_data.step0 = regs.vgt_instance_step_rate_0; push_data.step1 = regs.vgt_instance_step_rate_1;
push_data.step1 = regs.vgt_instance_step_rate_1;
}
stage->PushUd(binding, push_data); stage->PushUd(binding, push_data);
BindBuffers(*stage, binding, push_data, set_writes, buffer_barriers); BindBuffers(*stage, binding, push_data, set_writes, buffer_barriers);