Initial support of Geometry shaders (#1244)

* video_core: initial GS support

* fix for components mapping; missing prim type
This commit is contained in:
psucien
2024-10-06 00:26:50 +02:00
committed by GitHub
parent 5bb45dc7ba
commit 927bb0c175
40 changed files with 944 additions and 268 deletions

View File

@@ -61,34 +61,34 @@ vk::CompareOp CompareOp(Liverpool::CompareFunc func) {
}
}
vk::PrimitiveTopology PrimitiveType(Liverpool::PrimitiveType type) {
vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type) {
switch (type) {
case Liverpool::PrimitiveType::PointList:
case AmdGpu::PrimitiveType::PointList:
return vk::PrimitiveTopology::ePointList;
case Liverpool::PrimitiveType::LineList:
case AmdGpu::PrimitiveType::LineList:
return vk::PrimitiveTopology::eLineList;
case Liverpool::PrimitiveType::LineStrip:
case AmdGpu::PrimitiveType::LineStrip:
return vk::PrimitiveTopology::eLineStrip;
case Liverpool::PrimitiveType::TriangleList:
case AmdGpu::PrimitiveType::TriangleList:
return vk::PrimitiveTopology::eTriangleList;
case Liverpool::PrimitiveType::TriangleFan:
case AmdGpu::PrimitiveType::TriangleFan:
return vk::PrimitiveTopology::eTriangleFan;
case Liverpool::PrimitiveType::TriangleStrip:
case AmdGpu::PrimitiveType::TriangleStrip:
return vk::PrimitiveTopology::eTriangleStrip;
case Liverpool::PrimitiveType::AdjLineList:
case AmdGpu::PrimitiveType::AdjLineList:
return vk::PrimitiveTopology::eLineListWithAdjacency;
case Liverpool::PrimitiveType::AdjLineStrip:
case AmdGpu::PrimitiveType::AdjLineStrip:
return vk::PrimitiveTopology::eLineStripWithAdjacency;
case Liverpool::PrimitiveType::AdjTriangleList:
case AmdGpu::PrimitiveType::AdjTriangleList:
return vk::PrimitiveTopology::eTriangleListWithAdjacency;
case Liverpool::PrimitiveType::AdjTriangleStrip:
case AmdGpu::PrimitiveType::AdjTriangleStrip:
return vk::PrimitiveTopology::eTriangleStripWithAdjacency;
case Liverpool::PrimitiveType::PatchPrimitive:
case AmdGpu::PrimitiveType::PatchPrimitive:
return vk::PrimitiveTopology::ePatchList;
case Liverpool::PrimitiveType::QuadList:
case AmdGpu::PrimitiveType::QuadList:
// Needs to generate index buffer on the fly.
return vk::PrimitiveTopology::eTriangleList;
case Liverpool::PrimitiveType::RectList:
case AmdGpu::PrimitiveType::RectList:
return vk::PrimitiveTopology::eTriangleStrip;
default:
UNREACHABLE();

View File

@@ -18,7 +18,7 @@ vk::StencilOp StencilOp(Liverpool::StencilFunc op);
vk::CompareOp CompareOp(Liverpool::CompareFunc func);
vk::PrimitiveTopology PrimitiveType(Liverpool::PrimitiveType type);
vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type);
vk::PolygonMode PolygonMode(Liverpool::PolygonMode mode);

View File

@@ -16,6 +16,10 @@
namespace Vulkan {
static constexpr auto gp_stage_flags = vk::ShaderStageFlagBits::eVertex |
vk::ShaderStageFlagBits::eGeometry |
vk::ShaderStageFlagBits::eFragment;
GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_,
DescriptorHeap& desc_heap_, const GraphicsPipelineKey& key_,
vk::PipelineCache pipeline_cache,
@@ -27,7 +31,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
BuildDescSetLayout();
const vk::PushConstantRange push_constants = {
.stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment,
.stageFlags = gp_stage_flags,
.offset = 0,
.size = sizeof(Shader::PushData),
};
@@ -83,7 +87,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
.pVertexAttributeDescriptions = vertex_attributes.data(),
};
if (key.prim_type == Liverpool::PrimitiveType::RectList && !IsEmbeddedVs()) {
if (key.prim_type == AmdGpu::PrimitiveType::RectList && !IsEmbeddedVs()) {
LOG_WARNING(Render_Vulkan,
"Rectangle List primitive type is only supported for embedded VS");
}
@@ -196,9 +200,9 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
},
};
auto stage = u32(Shader::Stage::Vertex);
boost::container::static_vector<vk::PipelineShaderStageCreateInfo, MaxShaderStages>
shader_stages;
auto stage = u32(Shader::Stage::Vertex);
if (infos[stage]) {
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
.stage = vk::ShaderStageFlagBits::eVertex,
@@ -206,6 +210,14 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
.pName = "main",
});
}
stage = u32(Shader::Stage::Geometry);
if (infos[stage]) {
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
.stage = vk::ShaderStageFlagBits::eGeometry,
.module = modules[stage],
.pName = "main",
});
}
stage = u32(Shader::Stage::Fragment);
if (infos[stage]) {
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
@@ -322,7 +334,7 @@ void GraphicsPipeline::BuildDescSetLayout() {
.descriptorType = buffer.IsStorage(sharp) ? vk::DescriptorType::eStorageBuffer
: vk::DescriptorType::eUniformBuffer,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment,
.stageFlags = gp_stage_flags,
});
}
for (const auto& tex_buffer : stage->texture_buffers) {
@@ -331,7 +343,7 @@ void GraphicsPipeline::BuildDescSetLayout() {
.descriptorType = tex_buffer.is_written ? vk::DescriptorType::eStorageTexelBuffer
: vk::DescriptorType::eUniformTexelBuffer,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment,
.stageFlags = gp_stage_flags,
});
}
for (const auto& image : stage->images) {
@@ -340,7 +352,7 @@ void GraphicsPipeline::BuildDescSetLayout() {
.descriptorType = image.is_storage ? vk::DescriptorType::eStorageImage
: vk::DescriptorType::eSampledImage,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment,
.stageFlags = gp_stage_flags,
});
}
for (const auto& sampler : stage->samplers) {
@@ -348,7 +360,7 @@ void GraphicsPipeline::BuildDescSetLayout() {
.binding = binding++,
.descriptorType = vk::DescriptorType::eSampler,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment,
.stageFlags = gp_stage_flags,
});
}
}
@@ -518,9 +530,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
desc_set, {});
}
}
cmdbuf.pushConstants(*pipeline_layout,
vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment, 0U,
sizeof(push_data), &push_data);
cmdbuf.pushConstants(*pipeline_layout, gp_stage_flags, 0U, sizeof(push_data), &push_data);
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, Handle());
}

View File

@@ -36,7 +36,7 @@ struct GraphicsPipelineKey {
u32 num_samples;
u32 mrt_mask;
Liverpool::StencilControl stencil;
Liverpool::PrimitiveType prim_type;
AmdGpu::PrimitiveType prim_type;
u32 enable_primitive_restart;
u32 primitive_restart_index;
Liverpool::PolygonMode polygon_mode;
@@ -86,13 +86,13 @@ public:
}
[[nodiscard]] bool IsPrimitiveListTopology() const {
return key.prim_type == Liverpool::PrimitiveType::PointList ||
key.prim_type == Liverpool::PrimitiveType::LineList ||
key.prim_type == Liverpool::PrimitiveType::TriangleList ||
key.prim_type == Liverpool::PrimitiveType::AdjLineList ||
key.prim_type == Liverpool::PrimitiveType::AdjTriangleList ||
key.prim_type == Liverpool::PrimitiveType::RectList ||
key.prim_type == Liverpool::PrimitiveType::QuadList;
return key.prim_type == AmdGpu::PrimitiveType::PointList ||
key.prim_type == AmdGpu::PrimitiveType::LineList ||
key.prim_type == AmdGpu::PrimitiveType::TriangleList ||
key.prim_type == AmdGpu::PrimitiveType::AdjLineList ||
key.prim_type == AmdGpu::PrimitiveType::AdjTriangleList ||
key.prim_type == AmdGpu::PrimitiveType::RectList ||
key.prim_type == AmdGpu::PrimitiveType::QuadList;
}
private:

View File

@@ -322,6 +322,7 @@ bool Instance::CreateDevice() {
.geometryShader = features.geometryShader,
.logicOp = features.logicOp,
.depthBiasClamp = features.depthBiasClamp,
.fillModeNonSolid = features.fillModeNonSolid,
.multiViewport = features.multiViewport,
.samplerAnisotropy = features.samplerAnisotropy,
.vertexPipelineStoresAndAtomics = features.vertexPipelineStoresAndAtomics,

View File

@@ -147,6 +147,16 @@ public:
return list_restart;
}
/// Returns true when geometry shaders are supported by the device
bool IsGeometryStageSupported() const {
return features.geometryShader;
}
/// Returns true when tessellation is supported by the device
bool IsTessellationSupported() const {
return features.tessellationShader;
}
/// Returns the vendor ID of the physical device
u32 GetVendorID() const {
return properties.vendorID;

View File

@@ -7,7 +7,10 @@
#include "common/io_file.h"
#include "common/path_util.h"
#include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/frontend/copy_shader.h"
#include "shader_recompiler/info.h"
#include "shader_recompiler/recompiler.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
@@ -82,6 +85,13 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
auto info = Shader::RuntimeInfo{stage};
const auto& regs = liverpool->regs;
switch (stage) {
case Shader::Stage::Export: {
info.num_user_data = regs.es_program.settings.num_user_regs;
info.num_input_vgprs = regs.es_program.settings.vgpr_comp_cnt;
info.num_allocated_vgprs = regs.es_program.settings.num_vgprs * 4;
info.es_info.vertex_data_size = regs.vgt_esgs_ring_itemsize;
break;
}
case Shader::Stage::Vertex: {
info.num_user_data = regs.vs_program.settings.num_user_regs;
info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt;
@@ -92,6 +102,29 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW;
break;
}
case Shader::Stage::Geometry: {
info.num_user_data = regs.gs_program.settings.num_user_regs;
info.num_input_vgprs = regs.gs_program.settings.vgpr_comp_cnt;
info.num_allocated_vgprs = regs.gs_program.settings.num_vgprs * 4;
info.gs_info.output_vertices = regs.vgt_gs_max_vert_out;
info.gs_info.num_invocations =
regs.vgt_gs_instance_cnt.IsEnabled() ? regs.vgt_gs_instance_cnt.count : 1;
info.gs_info.in_primitive = regs.primitive_type;
for (u32 stream_id = 0; stream_id < Shader::GsMaxOutputStreams; ++stream_id) {
info.gs_info.out_primitive[stream_id] =
regs.vgt_gs_out_prim_type.GetPrimitiveType(stream_id);
}
info.gs_info.in_vertex_data_size = regs.vgt_esgs_ring_itemsize;
info.gs_info.out_vertex_data_size = regs.vgt_gs_vert_itemsize[0];
// Extract semantics offsets from a copy shader
const auto vc_stage = Shader::Stage::Vertex;
const auto* pgm_vc = regs.ProgramForStage(static_cast<u32>(vc_stage));
const auto params_vc = Liverpool::GetParams(*pgm_vc);
DumpShader(params_vc.code, params_vc.hash, Shader::Stage::Vertex, 0, "copy.bin");
info.gs_info.copy_data = Shader::ParseCopyShader(params_vc.code);
break;
}
case Shader::Stage::Fragment: {
info.num_user_data = regs.ps_program.settings.num_user_regs;
info.num_allocated_vgprs = regs.ps_program.settings.num_vgprs * 4;
@@ -149,7 +182,7 @@ PipelineCache::~PipelineCache() = default;
const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() {
const auto& regs = liverpool->regs;
// Tessellation is unsupported so skip the draw to avoid locking up the driver.
if (regs.primitive_type == Liverpool::PrimitiveType::PatchPrimitive) {
if (regs.primitive_type == AmdGpu::PrimitiveType::PatchPrimitive) {
return nullptr;
}
// There are several cases (e.g. FCE, FMask/HTile decompression) where we don't need to do an
@@ -163,7 +196,7 @@ const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() {
LOG_TRACE(Render_Vulkan, "FMask decompression pass skipped");
return nullptr;
}
if (regs.primitive_type == Liverpool::PrimitiveType::None) {
if (regs.primitive_type == AmdGpu::PrimitiveType::None) {
LOG_TRACE(Render_Vulkan, "Primitive type 'None' skipped");
return nullptr;
}
@@ -190,15 +223,6 @@ const ComputePipeline* PipelineCache::GetComputePipeline() {
return it->second;
}
bool ShouldSkipShader(u64 shader_hash, const char* shader_type) {
static constexpr std::array<u64, 0> skip_hashes = {};
if (std::ranges::contains(skip_hashes, shader_hash)) {
LOG_WARNING(Render_Vulkan, "Skipped {} shader hash {:#x}.", shader_type, shader_hash);
return true;
}
return false;
}
bool PipelineCache::RefreshGraphicsKey() {
std::memset(&graphics_key, 0, sizeof(GraphicsPipelineKey));
@@ -275,46 +299,66 @@ bool PipelineCache::RefreshGraphicsKey() {
}
Shader::Backend::Bindings binding{};
for (u32 i = 0; i < MaxShaderStages; i++) {
if (!regs.stage_enable.IsStageEnabled(i)) {
key.stage_hashes[i] = 0;
infos[i] = nullptr;
continue;
const auto& TryBindStageRemap = [&](Shader::Stage stage_in, Shader::Stage stage_out) -> bool {
const auto stage_in_idx = static_cast<u32>(stage_in);
const auto stage_out_idx = static_cast<u32>(stage_out);
if (!regs.stage_enable.IsStageEnabled(stage_in_idx)) {
key.stage_hashes[stage_out_idx] = 0;
infos[stage_out_idx] = nullptr;
return false;
}
auto* pgm = regs.ProgramForStage(i);
const auto* pgm = regs.ProgramForStage(stage_in_idx);
if (!pgm || !pgm->Address<u32*>()) {
key.stage_hashes[i] = 0;
infos[i] = nullptr;
continue;
key.stage_hashes[stage_out_idx] = 0;
infos[stage_out_idx] = nullptr;
return false;
}
const auto* bininfo = Liverpool::GetBinaryInfo(*pgm);
if (!bininfo->Valid()) {
LOG_WARNING(Render_Vulkan, "Invalid binary info structure!");
key.stage_hashes[i] = 0;
infos[i] = nullptr;
continue;
}
if (ShouldSkipShader(bininfo->shader_hash, "graphics")) {
return false;
}
const auto stage = Shader::StageFromIndex(i);
const auto params = Liverpool::GetParams(*pgm);
if (stage != Shader::Stage::Vertex && stage != Shader::Stage::Fragment) {
key.stage_hashes[stage_out_idx] = 0;
infos[stage_out_idx] = nullptr;
return false;
}
static bool TessMissingLogged = false;
if (auto* pgm = regs.ProgramForStage(3);
regs.stage_enable.IsStageEnabled(3) && pgm->Address() != 0) {
if (!TessMissingLogged) {
LOG_WARNING(Render_Vulkan, "Tess pipeline compilation skipped");
TessMissingLogged = true;
}
auto params = Liverpool::GetParams(*pgm);
std::tie(infos[stage_out_idx], modules[stage_out_idx], key.stage_hashes[stage_out_idx]) =
GetProgram(stage_in, params, binding);
return true;
};
const auto& TryBindStage = [&](Shader::Stage stage) { return TryBindStageRemap(stage, stage); };
const auto& IsGsFeaturesSupported = [&]() -> bool {
// These checks are temporary until all functionality is implemented.
return !regs.vgt_gs_mode.onchip && !regs.vgt_strmout_config.raw;
};
TryBindStage(Shader::Stage::Fragment);
const auto* fs_info = infos[static_cast<u32>(Shader::Stage::Fragment)];
key.mrt_mask = fs_info ? fs_info->mrt_mask : 0u;
switch (regs.stage_enable.raw) {
case Liverpool::ShaderStageEnable::VgtStages::EsGs: {
if (!instance.IsGeometryStageSupported() || !IsGsFeaturesSupported()) {
break;
}
if (!TryBindStageRemap(Shader::Stage::Export, Shader::Stage::Vertex)) {
return false;
}
std::tie(infos[i], modules[i], key.stage_hashes[i]) = GetProgram(stage, params, binding);
if (!TryBindStage(Shader::Stage::Geometry)) {
return false;
}
break;
}
default: {
TryBindStage(Shader::Stage::Vertex);
infos[static_cast<u32>(Shader::Stage::Geometry)] = nullptr;
break;
}
}
const auto* vs_info = infos[static_cast<u32>(Shader::Stage::Vertex)];
@@ -336,9 +380,6 @@ bool PipelineCache::RefreshGraphicsKey() {
}
}
const auto* fs_info = infos[static_cast<u32>(Shader::Stage::Fragment)];
key.mrt_mask = fs_info ? fs_info->mrt_mask : 0u;
// Second pass to fill remain CB pipeline key data
for (auto cb = 0u, remapped_cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) {
auto const& col_buf = regs.color_buffers[cb];
@@ -364,9 +405,6 @@ bool PipelineCache::RefreshComputeKey() {
Shader::Backend::Bindings binding{};
const auto* cs_pgm = &liverpool->regs.cs_program;
const auto cs_params = Liverpool::GetParams(*cs_pgm);
if (ShouldSkipShader(cs_params.hash, "compute")) {
return false;
}
std::tie(infos[0], modules[0], compute_key) =
GetProgram(Shader::Stage::Compute, cs_params, binding);
return true;
@@ -378,15 +416,11 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info,
Shader::Backend::Bindings& binding) {
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash,
perm_idx != 0 ? "(permutation)" : "");
if (Config::dumpShaders()) {
DumpShader(code, info.pgm_hash, info.stage, perm_idx, "bin");
}
DumpShader(code, info.pgm_hash, info.stage, perm_idx, "bin");
const auto ir_program = Shader::TranslateProgram(code, pools, info, runtime_info, profile);
const auto spv = Shader::Backend::SPIRV::EmitSPIRV(profile, runtime_info, ir_program, binding);
if (Config::dumpShaders()) {
DumpShader(spv, info.pgm_hash, info.stage, perm_idx, "spv");
}
DumpShader(spv, info.pgm_hash, info.stage, perm_idx, "spv");
const auto module = CompileSPV(spv, instance.GetDevice());
const auto name = fmt::format("{}_{:#x}_{}", info.stage, info.pgm_hash, perm_idx);
@@ -429,6 +463,10 @@ std::tuple<const Shader::Info*, vk::ShaderModule, u64> PipelineCache::GetProgram
void PipelineCache::DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage,
size_t perm_idx, std::string_view ext) {
if (!Config::dumpShaders()) {
return;
}
using namespace Common::FS;
const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps";
if (!std::filesystem::exists(dump_dir)) {

View File

@@ -70,9 +70,8 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, s32(vertex_offset),
instance_offset);
} else {
const u32 num_vertices = regs.primitive_type == AmdGpu::Liverpool::PrimitiveType::RectList
? 4
: regs.num_indices;
const u32 num_vertices =
regs.primitive_type == AmdGpu::PrimitiveType::RectList ? 4 : regs.num_indices;
cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), vertex_offset,
instance_offset);
}
@@ -88,7 +87,7 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr address, u32 offset, u32 si
return;
}
ASSERT_MSG(regs.primitive_type != AmdGpu::Liverpool::PrimitiveType::RectList,
ASSERT_MSG(regs.primitive_type != AmdGpu::PrimitiveType::RectList,
"Unsupported primitive type for indirect draw");
try {