renderer_vulkan: Implement rectlist emulation with tessellation (#1857)

* renderer_vulkan: Implement rectlist emulation with tessellation * clang format * renderer_vulkan: Use tessellation for quad primitive as well * vk_rasterizer: Handle viewport enable flags * review * shader_recompiler: Fix quad/rect list FS passthrough semantics. * spirv: Bump to 1.5 * remove pragma --------- Co-authored-by: squidbus <175574877+squidbus@users.noreply.github.com>
2025-12-10 05:38:49 +00:00 · 2024-12-24 13:28:47 +02:00
parent c2e9c877dd
commit 092d42e981
15 changed files with 426 additions and 123 deletions
--- a/src/video_core/buffer_cache/buffer_cache.cpp
+++ b/src/video_core/buffer_cache/buffer_cache.cpp
@@ -238,32 +238,14 @@ u32 BufferCache::BindIndexBuffer(bool& is_indexed, u32 index_offset) {
    // Emulate QuadList and Polygon primitive types with CPU made index buffer.
    const auto& regs = liverpool->regs;
    if (!is_indexed) {
-        bool needs_index_buffer = false;
-        if (regs.primitive_type == AmdGpu::PrimitiveType::QuadList ||
-            regs.primitive_type == AmdGpu::PrimitiveType::Polygon) {
-            needs_index_buffer = true;
-        }
-
-        if (!needs_index_buffer) {
+        if (regs.primitive_type != AmdGpu::PrimitiveType::Polygon) {
            return regs.num_indices;
        }

        // Emit indices.
        const u32 index_size = 3 * regs.num_indices;
        const auto [data, offset] = stream_buffer.Map(index_size);
-
-        switch (regs.primitive_type) {
-        case AmdGpu::PrimitiveType::QuadList:
-            Vulkan::LiverpoolToVK::EmitQuadToTriangleListIndices(data, regs.num_indices);
-            break;
-        case AmdGpu::PrimitiveType::Polygon:
-            Vulkan::LiverpoolToVK::EmitPolygonToTriangleListIndices(data, regs.num_indices);
-            break;
-        default:
-            UNREACHABLE();
-            break;
-        }
-
+        Vulkan::LiverpoolToVK::EmitPolygonToTriangleListIndices(data, regs.num_indices);
        stream_buffer.Commit();

        // Bind index buffer.
@@ -282,31 +264,6 @@ u32 BufferCache::BindIndexBuffer(bool& is_indexed, u32 index_offset) {
    VAddr index_address = regs.index_base_address.Address<VAddr>();
    index_address += index_offset * index_size;

-    if (regs.primitive_type == AmdGpu::PrimitiveType::QuadList) {
-        // Convert indices.
-        const u32 new_index_size = regs.num_indices * index_size * 6 / 4;
-        const auto [data, offset] = stream_buffer.Map(new_index_size);
-        const auto index_ptr = reinterpret_cast<u8*>(index_address);
-        switch (index_type) {
-        case vk::IndexType::eUint16:
-            Vulkan::LiverpoolToVK::ConvertQuadToTriangleListIndices<u16>(data, index_ptr,
-                                                                         regs.num_indices);
-            break;
-        case vk::IndexType::eUint32:
-            Vulkan::LiverpoolToVK::ConvertQuadToTriangleListIndices<u32>(data, index_ptr,
-                                                                         regs.num_indices);
-            break;
-        default:
-            UNREACHABLE_MSG("Unsupported QuadList index type {}", vk::to_string(index_type));
-            break;
-        }
-        stream_buffer.Commit();
-
-        // Bind index buffer.
-        const auto cmdbuf = scheduler.CommandBuffer();
-        cmdbuf.bindIndexBuffer(stream_buffer.Handle(), offset, index_type);
-        return new_index_size / index_size;
-    }
    if (regs.primitive_type == AmdGpu::PrimitiveType::Polygon) {
        UNREACHABLE();
    }
--- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp
@@ -116,12 +116,12 @@ vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type) {
        return vk::PrimitiveTopology::eTriangleStripWithAdjacency;
    case AmdGpu::PrimitiveType::PatchPrimitive:
        return vk::PrimitiveTopology::ePatchList;
-    case AmdGpu::PrimitiveType::QuadList:
    case AmdGpu::PrimitiveType::Polygon:
        // Needs to generate index buffer on the fly.
        return vk::PrimitiveTopology::eTriangleList;
+    case AmdGpu::PrimitiveType::QuadList:
    case AmdGpu::PrimitiveType::RectList:
-        return vk::PrimitiveTopology::eTriangleStrip;
+        return vk::PrimitiveTopology::ePatchList;
    default:
        UNREACHABLE();
        return vk::PrimitiveTopology::eTriangleList;
--- a/src/video_core/renderer_vulkan/liverpool_to_vk.h
+++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h
@@ -70,34 +70,6 @@ vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color

 vk::SampleCountFlagBits NumSamples(u32 num_samples, vk::SampleCountFlags supported_flags);

-static constexpr u16 NumVerticesPerQuad = 4;
-
-inline void EmitQuadToTriangleListIndices(u8* out_ptr, u32 num_vertices) {
-    u16* out_data = reinterpret_cast<u16*>(out_ptr);
-    for (u16 i = 0; i < num_vertices; i += NumVerticesPerQuad) {
-        *out_data++ = i;
-        *out_data++ = i + 1;
-        *out_data++ = i + 2;
-        *out_data++ = i;
-        *out_data++ = i + 2;
-        *out_data++ = i + 3;
-    }
-}
-
-template <typename T>
-void ConvertQuadToTriangleListIndices(u8* out_ptr, const u8* in_ptr, u32 num_vertices) {
-    T* out_data = reinterpret_cast<T*>(out_ptr);
-    const T* in_data = reinterpret_cast<const T*>(in_ptr);
-    for (u16 i = 0; i < num_vertices; i += NumVerticesPerQuad) {
-        *out_data++ = in_data[i];
-        *out_data++ = in_data[i + 1];
-        *out_data++ = in_data[i + 2];
-        *out_data++ = in_data[i];
-        *out_data++ = in_data[i + 2];
-        *out_data++ = in_data[i + 3];
-    }
-}
-
 inline void EmitPolygonToTriangleListIndices(u8* out_ptr, u32 num_vertices) {
    u16* out_data = reinterpret_cast<u16*>(out_ptr);
    for (u16 i = 1; i < num_vertices - 1; i++) {
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -7,25 +7,30 @@
 #include <boost/container/static_vector.hpp>

 #include "common/assert.h"
+#include "common/io_file.h"
 #include "common/scope_exit.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_quad_rect.h"
+#include "shader_recompiler/frontend/fetch_shader.h"
 #include "shader_recompiler/runtime_info.h"
 #include "video_core/amdgpu/resource.h"
 #include "video_core/buffer_cache/buffer_cache.h"
 #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
-
-#include "shader_recompiler/frontend/fetch_shader.h"
 #include "video_core/renderer_vulkan/vk_instance.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_shader_util.h"
 #include "video_core/texture_cache/texture_cache.h"

 namespace Vulkan {

-GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_,
-                                   DescriptorHeap& desc_heap_, const GraphicsPipelineKey& key_,
-                                   vk::PipelineCache pipeline_cache,
-                                   std::span<const Shader::Info*, MaxShaderStages> infos,
-                                   std::optional<const Shader::Gcn::FetchShaderData> fetch_shader_,
-                                   std::span<const vk::ShaderModule> modules)
+using Shader::Backend::SPIRV::AuxShaderType;
+
+GraphicsPipeline::GraphicsPipeline(
+    const Instance& instance_, Scheduler& scheduler_, DescriptorHeap& desc_heap_,
+    const GraphicsPipelineKey& key_, vk::PipelineCache pipeline_cache,
+    std::span<const Shader::Info*, MaxShaderStages> infos,
+    std::span<const Shader::RuntimeInfo, MaxShaderStages> runtime_infos,
+    std::optional<const Shader::Gcn::FetchShaderData> fetch_shader_,
+    std::span<const vk::ShaderModule> modules)
    : Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache}, key{key_},
      fetch_shader{std::move(fetch_shader_)} {
    const vk::Device device = instance.GetDevice();
@@ -88,11 +93,6 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
        .pVertexAttributeDescriptions = vertex_attributes.data(),
    };

-    if (key.prim_type == AmdGpu::PrimitiveType::RectList && !IsEmbeddedVs()) {
-        LOG_WARNING(Render_Vulkan,
-                    "Rectangle List primitive type is only supported for embedded VS");
-    }
-
    auto prim_restart = key.enable_primitive_restart != 0;
    if (prim_restart && IsPrimitiveListTopology() && !instance.IsListRestartSupported()) {
        LOG_WARNING(Render_Vulkan,
@@ -106,9 +106,11 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
    ASSERT_MSG(!prim_restart || key.primitive_restart_index == 0xFFFF ||
                   key.primitive_restart_index == 0xFFFFFFFF,
               "Primitive restart index other than -1 is not supported yet");
-
+    const bool is_rect_list = key.prim_type == AmdGpu::PrimitiveType::RectList;
+    const bool is_quad_list = key.prim_type == AmdGpu::PrimitiveType::QuadList;
+    const auto& fs_info = runtime_infos[u32(Shader::LogicalStage::Fragment)].fs_info;
    const vk::PipelineTessellationStateCreateInfo tessellation_state = {
-        .patchControlPoints = key.patch_control_points,
+        .patchControlPoints = is_rect_list ? 3U : (is_quad_list ? 4U : key.patch_control_points),
    };

    const vk::PipelineRasterizationStateCreateInfo raster_state = {
@@ -232,6 +234,14 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
            .module = modules[stage],
            .pName = "main",
        });
+    } else if (is_rect_list || is_quad_list) {
+        const auto type = is_quad_list ? AuxShaderType::QuadListTCS : AuxShaderType::RectListTCS;
+        auto tcs = Shader::Backend::SPIRV::EmitAuxilaryTessShader(type, fs_info);
+        shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
+            .stage = vk::ShaderStageFlagBits::eTessellationControl,
+            .module = CompileSPV(tcs, instance.GetDevice()),
+            .pName = "main",
+        });
    }
    stage = u32(Shader::LogicalStage::TessellationEval);
    if (infos[stage]) {
@@ -240,6 +250,14 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
            .module = modules[stage],
            .pName = "main",
        });
+    } else if (is_rect_list || is_quad_list) {
+        auto tes =
+            Shader::Backend::SPIRV::EmitAuxilaryTessShader(AuxShaderType::PassthroughTES, fs_info);
+        shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
+            .stage = vk::ShaderStageFlagBits::eTessellationEvaluation,
+            .module = CompileSPV(tes, instance.GetDevice()),
+            .pName = "main",
+        });
    }
    stage = u32(Shader::LogicalStage::Fragment);
    if (infos[stage]) {
@@ -322,8 +340,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
        .pStages = shader_stages.data(),
        .pVertexInputState = !instance.IsVertexInputDynamicState() ? &vertex_input_info : nullptr,
        .pInputAssemblyState = &input_assembly,
-        .pTessellationState =
-            stages[u32(Shader::LogicalStage::TessellationControl)] ? &tessellation_state : nullptr,
+        .pTessellationState = &tessellation_state,
        .pViewportState = &viewport_info,
        .pRasterizationState = &raster_state,
        .pMultisampleState = &multisampling,
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -18,7 +18,7 @@ class TextureCache;

 namespace Vulkan {

-static constexpr u32 MaxShaderStages = 5;
+static constexpr u32 MaxShaderStages = static_cast<u32>(Shader::LogicalStage::NumLogicalStages);
 static constexpr u32 MaxVertexBufferCount = 32;

 class Instance;
@@ -64,6 +64,7 @@ public:
    GraphicsPipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap,
                     const GraphicsPipelineKey& key, vk::PipelineCache pipeline_cache,
                     std::span<const Shader::Info*, MaxShaderStages> stages,
+                     std::span<const Shader::RuntimeInfo, MaxShaderStages> runtime_infos,
                     std::optional<const Shader::Gcn::FetchShaderData> fetch_shader,
                     std::span<const vk::ShaderModule> modules);
    ~GraphicsPipeline();
@@ -72,11 +73,6 @@ public:
        return fetch_shader;
    }

-    bool IsEmbeddedVs() const noexcept {
-        static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f;
-        return key.stage_hashes[u32(Shader::LogicalStage::Vertex)] == EmbeddedVsHash;
-    }
-
    auto GetWriteMasks() const {
        return key.write_masks;
    }
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -80,8 +80,8 @@ void GatherVertexOutputs(Shader::VertexRuntimeInfo& info,
                   : (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None));
 }

-Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_stage) {
-    auto info = Shader::RuntimeInfo{stage};
+const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_stage) {
+    auto& info = runtime_infos[u32(l_stage)];
    const auto& regs = liverpool->regs;
    const auto BuildCommon = [&](const auto& program) {
        info.num_user_data = program.settings.num_user_regs;
@@ -90,6 +90,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_
        info.fp_denorm_mode32 = program.settings.fp_denorm_mode32;
        info.fp_round_mode32 = program.settings.fp_round_mode32;
    };
+    info.Initialize(stage);
    switch (stage) {
    case Stage::Local: {
        BuildCommon(regs.ls_program);
@@ -220,9 +221,9 @@ const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() {
    }
    const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key);
    if (is_new) {
-        it.value() =
-            std::make_unique<GraphicsPipeline>(instance, scheduler, desc_heap, graphics_key,
-                                               *pipeline_cache, infos, fetch_shader, modules);
+        it.value() = std::make_unique<GraphicsPipeline>(instance, scheduler, desc_heap,
+                                                        graphics_key, *pipeline_cache, infos,
+                                                        runtime_infos, fetch_shader, modules);
        if (Config::collectShadersForDebug()) {
            for (auto stage = 0; stage < MaxShaderStages; ++stage) {
                if (infos[stage]) {
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -76,7 +76,7 @@ private:
    vk::ShaderModule CompileModule(Shader::Info& info, Shader::RuntimeInfo& runtime_info,
                                   std::span<const u32> code, size_t perm_idx,
                                   Shader::Backend::Bindings& binding);
-    Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage, Shader::LogicalStage l_stage);
+    const Shader::RuntimeInfo& BuildRuntimeInfo(Shader::Stage stage, Shader::LogicalStage l_stage);

 private:
    const Instance& instance;
@@ -90,6 +90,7 @@ private:
    tsl::robin_map<size_t, std::unique_ptr<Program>> program_cache;
    tsl::robin_map<ComputePipelineKey, std::unique_ptr<ComputePipeline>> compute_pipelines;
    tsl::robin_map<GraphicsPipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_pipelines;
+    std::array<Shader::RuntimeInfo, MaxShaderStages> runtime_infos{};
    std::array<const Shader::Info*, MaxShaderStages> infos{};
    std::array<vk::ShaderModule, MaxShaderStages> modules{};
    std::optional<Shader::Gcn::FetchShaderData> fetch_shader{};
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -245,7 +245,6 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
    }

    auto state = PrepareRenderState(pipeline->GetMrtMask());
-
    if (!BindResources(pipeline)) {
        return;
    }
@@ -267,10 +266,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
        cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, s32(vertex_offset),
                           instance_offset);
    } else {
-        const u32 num_vertices =
-            regs.primitive_type == AmdGpu::PrimitiveType::RectList ? 4 : regs.num_indices;
-        cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), vertex_offset,
-                    instance_offset);
+        cmdbuf.draw(num_indices, regs.num_instances.NumInstances(), vertex_offset, instance_offset);
    }

    ResetBindings();
@@ -285,18 +281,14 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
    }

    const auto& regs = liverpool->regs;
-    if (regs.primitive_type == AmdGpu::PrimitiveType::QuadList ||
-        regs.primitive_type == AmdGpu::PrimitiveType::Polygon) {
-        // We use a generated index buffer to convert quad lists and polygons to triangles. Since it
+    if (regs.primitive_type == AmdGpu::PrimitiveType::Polygon) {
+        // We use a generated index buffer to convert polygons to triangles. Since it
        // changes type of the draw, arguments are not valid for this case. We need to run a
        // conversion pass to repack the indirect arguments buffer first.
        LOG_WARNING(Render_Vulkan, "Primitive type is not supported for indirect draw");
        return;
    }

-    ASSERT_MSG(regs.primitive_type != AmdGpu::PrimitiveType::RectList,
-               "Unsupported primitive type for indirect draw");
-
    const GraphicsPipeline* pipeline = pipeline_cache.GetGraphicsPipeline();
    if (!pipeline) {
        return;
@@ -1009,19 +1001,26 @@ void Rasterizer::UpdateViewportScissorState() {
                regs.clipper_control.clip_space == AmdGpu::Liverpool::ClipSpace::MinusWToW
            ? 1.0f
            : 0.0f;
+    const auto vp_ctl = regs.viewport_control;
    for (u32 i = 0; i < Liverpool::NumViewports; i++) {
        const auto& vp = regs.viewports[i];
        const auto& vp_d = regs.viewport_depths[i];
        if (vp.xscale == 0) {
            continue;
        }
+        const auto xoffset = vp_ctl.xoffset_enable ? vp.xoffset : 0.f;
+        const auto xscale = vp_ctl.xscale_enable ? vp.xscale : 1.f;
+        const auto yoffset = vp_ctl.yoffset_enable ? vp.yoffset : 0.f;
+        const auto yscale = vp_ctl.yscale_enable ? vp.yscale : 1.f;
+        const auto zoffset = vp_ctl.zoffset_enable ? vp.zoffset : 0.f;
+        const auto zscale = vp_ctl.zscale_enable ? vp.zscale : 1.f;
        viewports.push_back({
-            .x = vp.xoffset - vp.xscale,
-            .y = vp.yoffset - vp.yscale,
-            .width = vp.xscale * 2.0f,
-            .height = vp.yscale * 2.0f,
-            .minDepth = vp.zoffset - vp.zscale * reduce_z,
-            .maxDepth = vp.zscale + vp.zoffset,
+            .x = xoffset - xscale,
+            .y = yoffset - yscale,
+            .width = xscale * 2.0f,
+            .height = yscale * 2.0f,
+            .minDepth = zoffset - zscale * reduce_z,
+            .maxDepth = zscale + zoffset,
        });
    }

--- a/src/video_core/renderer_vulkan/vk_shader_util.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp
@@ -126,6 +126,10 @@ EShLanguage ToEshShaderStage(vk::ShaderStageFlagBits stage) {
        return EShLanguage::EShLangVertex;
    case vk::ShaderStageFlagBits::eGeometry:
        return EShLanguage::EShLangGeometry;
+    case vk::ShaderStageFlagBits::eTessellationControl:
+        return EShLanguage::EShLangTessControl;
+    case vk::ShaderStageFlagBits::eTessellationEvaluation:
+        return EShLanguage::EShLangTessEvaluation;
    case vk::ShaderStageFlagBits::eFragment:
        return EShLanguage::EShLangFragment;
    case vk::ShaderStageFlagBits::eCompute: