renderer_vulkan: Implement rectlist emulation with tessellation

2025-08-05 08:52:36 +00:00 · 2024-12-22 23:33:55 +02:00 · 2024-12-22 23:33:55 +02:00 · 3485a30337
commit 3485a30337
parent 14dc136832
14 changed files with 372 additions and 25 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -630,6 +630,8 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
                      src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
                      src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
                      src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp
+                      src/shader_recompiler/backend/spirv/emit_spirv_quad_rect.cpp
+                      src/shader_recompiler/backend/spirv/emit_spirv_quad_rect.h
                      src/shader_recompiler/backend/spirv/emit_spirv_select.cpp
                      src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp
                      src/shader_recompiler/backend/spirv/emit_spirv_special.cpp
--- a/src/common/io_file.h
+++ b/src/common/io_file.h
@ -207,7 +207,7 @@ public:
        return WriteSpan(string);
    }

-    static size_t WriteBytes(const std::filesystem::path path, std::span<const u8> data) {
+    static size_t WriteBytes(const std::filesystem::path path, const auto& data) {
        IOFile out(path, FileAccessMode::Write);
        return out.Write(data);
    }
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@ -1,5 +1,6 @@
 // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
+
 #include <span>
 #include <type_traits>
 #include <utility>
--- a/src/shader_recompiler/backend/spirv/emit_spirv_quad_rect.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_quad_rect.cpp
@ -0,0 +1,307 @@
+// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <sirit/sirit.h>
+#include "shader_recompiler/backend/spirv/emit_spirv_quad_rect.h"
+
+namespace Shader::Backend::SPIRV {
+
+using Sirit::Id;
+
+struct QuadRectListEmitter : public Sirit::Module {
+    explicit QuadRectListEmitter(size_t num_attribs_)
+        : num_attribs{num_attribs_}, inputs{num_attribs}, outputs{num_attribs} {
+        void_id = TypeVoid();
+        bool_id = TypeBool();
+        float_id = TypeFloat(32);
+        uint_id = TypeUInt(32U);
+        int_id = TypeInt(32U, true);
+        bvec2_id = TypeVector(bool_id, 2);
+        vec2_id = TypeVector(float_id, 2);
+        vec3_id = TypeVector(float_id, 3);
+        vec4_id = TypeVector(float_id, 4);
+
+        float_one = Constant(float_id, 1.0f);
+        float_min_one = Constant(float_id, -1.0f);
+        int_zero = Constant(int_id, 0);
+
+        const Id float_arr{TypeArray(float_id, Constant(uint_id, 1U))};
+        gl_per_vertex_type = TypeStruct(vec4_id, float_id, float_arr, float_arr);
+        Decorate(gl_per_vertex_type, spv::Decoration::Block);
+        MemberDecorate(gl_per_vertex_type, 0U, spv::Decoration::BuiltIn,
+                       static_cast<u32>(spv::BuiltIn::Position));
+        MemberDecorate(gl_per_vertex_type, 1U, spv::Decoration::BuiltIn,
+                       static_cast<u32>(spv::BuiltIn::PointSize));
+        MemberDecorate(gl_per_vertex_type, 2U, spv::Decoration::BuiltIn,
+                       static_cast<u32>(spv::BuiltIn::ClipDistance));
+        MemberDecorate(gl_per_vertex_type, 3U, spv::Decoration::BuiltIn,
+                       static_cast<u32>(spv::BuiltIn::CullDistance));
+    }
+
+    /// Emits tessellation control shader for interpolating the 4th vertex of rectange primitive
+    void EmitRectListTCS() {
+        DefineEntry(spv::ExecutionModel::TessellationControl);
+
+        // Set passthrough tessellation factors
+        const Id output_float_id{TypePointer(spv::StorageClass::Output, float_id)};
+        for (int i = 0; i < 4; i++) {
+            const Id ptr{OpAccessChain(output_float_id, gl_tess_level_outer, Int(i))};
+            OpStore(ptr, float_one);
+        }
+        for (int i = 0; i < 2; i++) {
+            const Id ptr{OpAccessChain(output_float_id, gl_tess_level_inner, Int(i))};
+            OpStore(ptr, float_one);
+        }
+
+        const Id input_vec4{TypePointer(spv::StorageClass::Input, vec4_id)};
+        const Id output_vec4{TypePointer(spv::StorageClass::Output, vec4_id)};
+
+        // Emit interpolation block of the 4th vertex in rect.
+        // Load positions
+        std::array<Id, 3> pos;
+        for (int i = 0; i < 3; i++) {
+            pos[i] = OpLoad(vec4_id, OpAccessChain(input_vec4, gl_in, Int(i), int_zero));
+        }
+
+        std::array<Id, 3> point_coord_equal;
+        for (int i = 0; i < 3; i++) {
+            // point_coord_equal[i] = equal(gl_in[i].gl_Position.xy, gl_in[(i + 1) % 3].gl_Position.xy);
+            const Id pos_l_xy{OpVectorShuffle(vec2_id, pos[i], pos[i], 0, 1)};
+            const Id pos_r_xy{OpVectorShuffle(vec2_id, pos[(i + 1) % 3], pos[(i + 1) % 3], 0, 1)};
+            point_coord_equal[i] = OpFOrdEqual(bvec2_id, pos_l_xy, pos_r_xy);
+        }
+
+        std::array<Id, 3> bary_coord;
+        std::array<Id, 3> is_edge_vertex;
+        for (int i = 0; i < 3; i++) {
+            // bool xy_equal = point_coord_equal[i].x && point_coord_equal[(i + 2) % 3].y;
+            const Id xy_equal{
+                OpLogicalAnd(bool_id, OpCompositeExtract(bool_id, point_coord_equal[i], 0),
+                             OpCompositeExtract(bool_id, point_coord_equal[(i + 2) % 3], 1))};
+            // bool yx_equal = point_coord_equal[i].y && point_coord_equal[(i + 2) % 3].x;
+            const Id yx_equal{
+                OpLogicalAnd(bool_id, OpCompositeExtract(bool_id, point_coord_equal[i], 1),
+                             OpCompositeExtract(bool_id, point_coord_equal[(i + 2) % 3], 0))};
+            // bary_coord[i] = (xy_equal || yx_equal) ? -1.f : 1.f;
+            is_edge_vertex[i] = OpLogicalOr(bool_id, xy_equal, yx_equal);
+            bary_coord[i] = OpSelect(float_id, is_edge_vertex[i], float_min_one, float_one);
+        }
+
+        const auto interpolate = [&](Id v0, Id v1, Id v2) {
+            // return v0 * bary_coord.x + v1 * bary_coord.y + v2 * bary_coord.z;
+            const Id p0{OpVectorTimesScalar(vec4_id, v0, bary_coord[0])};
+            const Id p1{OpVectorTimesScalar(vec4_id, v1, bary_coord[1])};
+            const Id p2{OpVectorTimesScalar(vec4_id, v2, bary_coord[2])};
+            return OpFAdd(vec4_id, p0, OpFAdd(vec4_id, p1, p2));
+        };
+
+        // int vertex_index_id = is_edge_vertex[1] ? 1 : (is_edge_vertex[2] ? 2 : 0);
+        Id vertex_index{OpSelect(int_id, is_edge_vertex[2], Int(2), Int(0))};
+        vertex_index = OpSelect(int_id, is_edge_vertex[1], Int(1), vertex_index);
+
+        // int index = (vertex_index_id + gl_InvocationID) % 3;
+        const Id invocation_id{OpLoad(int_id, gl_invocation_id)};
+        const Id invocation_3{OpIEqual(bool_id, invocation_id, Int(3))};
+        const Id index{OpSMod(int_id, OpIAdd(int_id, vertex_index, invocation_id), Int(3))};
+
+        // gl_out[gl_InvocationID].gl_Position = gl_InvocationID == 3 ? pos3 : gl_in[index].gl_Position;
+        const Id pos3{interpolate(pos[0], pos[1], pos[2])};
+        const Id in_ptr{OpAccessChain(input_vec4, gl_in, index, Int(0))};
+        const Id position{OpSelect(vec4_id, invocation_3, pos3, OpLoad(vec4_id, in_ptr))};
+        OpStore(OpAccessChain(output_vec4, gl_out, invocation_id, Int(0)), position);
+
+        // Set attributes
+        for (int i = 0; i < inputs.size(); i++) {
+            // vec4 in_paramN3 = interpolate(bary_coord, in_paramN[0], in_paramN[1], in_paramN[2]);
+            const Id v0{OpLoad(vec4_id, OpAccessChain(input_vec4, inputs[i], Int(0)))};
+            const Id v1{OpLoad(vec4_id, OpAccessChain(input_vec4, inputs[i], Int(1)))};
+            const Id v2{OpLoad(vec4_id, OpAccessChain(input_vec4, inputs[i], Int(2)))};
+            const Id in_param3{interpolate(v0, v1, v2)};
+            // out_paramN[gl_InvocationID] = gl_InvocationID == 3 ? in_paramN3 : in_paramN[index];
+            const Id in_param{OpLoad(vec4_id, OpAccessChain(input_vec4, inputs[i], index))};
+            const Id out_param{OpSelect(vec4_id, invocation_3, in_param3, in_param)};
+            OpStore(OpAccessChain(output_vec4, outputs[i], invocation_id), out_param);
+        }
+
+        OpReturn();
+        OpFunctionEnd();
+    }
+
+    /// Emits a passthrough quad tessellation control shader that outputs 4 control points.
+    void EmitPassthroughTCS() {
+        DefineEntry(spv::ExecutionModel::TessellationControl);
+
+        const Id input_vec4{TypePointer(spv::StorageClass::Input, vec4_id)};
+        const Id output_vec4{TypePointer(spv::StorageClass::Output, vec4_id)};
+        const Id invocation_id{OpLoad(int_id, gl_invocation_id)};
+
+        // gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;
+        const Id in_position{OpLoad(vec4_id, OpAccessChain(input_vec4, gl_in, invocation_id, Int(0)))};
+        OpStore(OpAccessChain(output_vec4, gl_out, invocation_id, Int(0)), in_position);
+
+        for (int i = 0; i < num_attribs; i++) {
+            // out_paramN[gl_InvocationID] = in_paramN[gl_InvocationID];
+            const Id in_param{OpLoad(vec4_id, OpAccessChain(input_vec4, inputs[i], invocation_id))};
+            OpStore(OpAccessChain(output_vec4, outputs[i], invocation_id), in_param);
+        }
+
+        OpReturn();
+        OpFunctionEnd();
+    }
+
+    /// Emits a passthrough quad tessellation evaluation shader that outputs 4 control points.
+    void EmitPassthroughTES() {
+        DefineEntry(spv::ExecutionModel::TessellationEvaluation);
+
+        // const int index = int(gl_TessCoord.y) * 2 + int(gl_TessCoord.x);
+        const Id input_float{TypePointer(spv::StorageClass::Input, float_id)};
+        const Id tess_coord_x{OpLoad(float_id, OpAccessChain(input_float, gl_tess_coord, Int(0)))};
+        const Id tess_coord_y{OpLoad(float_id, OpAccessChain(input_float, gl_tess_coord, Int(1)))};
+        const Id index{OpIAdd(int_id, OpIMul(int_id, OpConvertFToS(int_id, tess_coord_y), Int(2)),
+                              OpConvertFToS(int_id, tess_coord_x))};
+
+        // gl_Position = gl_in[index].gl_Position;
+        const Id input_vec4{TypePointer(spv::StorageClass::Input, vec4_id)};
+        const Id output_vec4{TypePointer(spv::StorageClass::Output, vec4_id)};
+        const Id position{OpLoad(vec4_id, OpAccessChain(input_vec4, gl_in, index, Int(0)))};
+        OpStore(OpAccessChain(output_vec4, gl_per_vertex, Int(0)), position);
+
+        // out_paramN = in_paramN[index];
+        for (int i = 0; i < num_attribs; i++) {
+            const Id param{OpLoad(vec4_id, OpAccessChain(input_vec4, inputs[i], index))};
+            OpStore(outputs[i], param);
+        }
+
+        OpReturn();
+        OpFunctionEnd();
+    }
+
+private:
+    Id Int(s32 value) {
+        return Constant(int_id, value);
+    }
+
+    Id AddInput(Id type) {
+        const Id input{AddGlobalVariable(TypePointer(spv::StorageClass::Input, type),
+                                         spv::StorageClass::Input)};
+        interfaces.push_back(input);
+        return input;
+    }
+
+    Id AddOutput(Id type) {
+        const Id output{AddGlobalVariable(TypePointer(spv::StorageClass::Output, type),
+                                          spv::StorageClass::Output)};
+        interfaces.push_back(output);
+        return output;
+    }
+
+    void DefineEntry(spv::ExecutionModel model) {
+        AddCapability(spv::Capability::Shader);
+        AddCapability(spv::Capability::Tessellation);
+        const Id void_function{TypeFunction(void_id)};
+        main = OpFunction(void_id, spv::FunctionControlMask::MaskNone, void_function);
+        if (model == spv::ExecutionModel::TessellationControl) {
+            AddExecutionMode(main, spv::ExecutionMode::OutputVertices, 4U);
+        } else {
+            AddExecutionMode(main, spv::ExecutionMode::Quads);
+            AddExecutionMode(main, spv::ExecutionMode::SpacingEqual);
+            AddExecutionMode(main, spv::ExecutionMode::VertexOrderCw);
+        }
+        DefineInputs(model);
+        DefineOutputs(model);
+        AddEntryPoint(model, main, "main", interfaces);
+        AddLabel(OpLabel());
+    }
+
+    void DefineOutputs(spv::ExecutionModel model) {
+        if (model == spv::ExecutionModel::TessellationControl) {
+            const Id gl_per_vertex_array{TypeArray(gl_per_vertex_type, Constant(uint_id, 4U))};
+            gl_out = AddOutput(gl_per_vertex_array);
+
+            const Id arr2_id{TypeArray(float_id, Constant(uint_id, 2U))};
+            gl_tess_level_inner = AddOutput(arr2_id);
+            Decorate(gl_tess_level_inner, spv::Decoration::BuiltIn, spv::BuiltIn::TessLevelInner);
+            Decorate(gl_tess_level_inner, spv::Decoration::Patch);
+
+            const Id arr4_id{TypeArray(float_id, Constant(uint_id, 4U))};
+            gl_tess_level_outer = AddOutput(arr4_id);
+            Decorate(gl_tess_level_outer, spv::Decoration::BuiltIn, spv::BuiltIn::TessLevelOuter);
+            Decorate(gl_tess_level_outer, spv::Decoration::Patch);
+        } else {
+            gl_per_vertex = AddOutput(gl_per_vertex_type);
+        }
+        for (int i = 0; i < num_attribs; i++) {
+            outputs[i] = AddOutput(model == spv::ExecutionModel::TessellationControl
+                                       ? TypeArray(vec4_id, Int(4))
+                                       : vec4_id);
+            Decorate(outputs[i], spv::Decoration::Location, i);
+        }
+    }
+
+    void DefineInputs(spv::ExecutionModel model) {
+        if (model == spv::ExecutionModel::TessellationEvaluation) {
+            gl_tess_coord = AddInput(vec3_id);
+            Decorate(gl_tess_coord, spv::Decoration::BuiltIn, spv::BuiltIn::TessCoord);
+        } else {
+            gl_invocation_id = AddInput(int_id);
+            Decorate(gl_invocation_id, spv::Decoration::BuiltIn, spv::BuiltIn::InvocationId);
+        }
+        const Id gl_per_vertex_array{TypeArray(gl_per_vertex_type, Constant(uint_id, 32U))};
+        gl_in = AddInput(gl_per_vertex_array);
+        const Id float_arr{TypeArray(vec4_id, Int(32))};
+        for (int i = 0; i < num_attribs; i++) {
+            inputs[i] = AddInput(float_arr);
+            Decorate(inputs[i], spv::Decoration::Location, i);
+        }
+    }
+
+private:
+    size_t num_attribs;
+    Id main;
+    Id void_id;
+    Id bool_id;
+    Id float_id;
+    Id uint_id;
+    Id int_id;
+    Id bvec2_id;
+    Id vec2_id;
+    Id vec3_id;
+    Id vec4_id;
+    Id float_one;
+    Id float_min_one;
+    Id int_zero;
+    Id gl_per_vertex_type;
+    Id gl_in;
+    union {
+        Id gl_out;
+        Id gl_per_vertex;
+    };
+    Id gl_tess_level_inner;
+    Id gl_tess_level_outer;
+    union {
+        Id gl_tess_coord;
+        Id gl_invocation_id;
+    };
+    std::vector<Id> inputs;
+    std::vector<Id> outputs;
+    std::vector<Id> interfaces;
+};
+
+std::vector<u32> EmitAuxilaryTessShader(AuxShaderType type, size_t num_attribs) {
+    QuadRectListEmitter ctx{num_attribs};
+    switch (type) {
+    case AuxShaderType::RectListTCS:
+        ctx.EmitRectListTCS();
+        break;
+    case AuxShaderType::PassthoughTCS:
+        ctx.EmitPassthroughTCS();
+        break;
+    case AuxShaderType::PassthroughTES:
+        ctx.EmitPassthroughTES();
+        break;
+    }
+    return ctx.Assemble();
+}
+
+
+} // namespace Shader::Backend::SPIRV
--- a/src/shader_recompiler/backend/spirv/emit_spirv_quad_rect.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_quad_rect.h
@ -0,0 +1,19 @@
+// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <vector>
+#include "common/types.h"
+
+namespace Shader::Backend::SPIRV {
+
+enum class AuxShaderType : u32 {
+	RectListTCS,
+	PassthoughTCS,
+	PassthroughTES,
+};
+
+[[nodiscard]] std::vector<u32> EmitAuxilaryTessShader(AuxShaderType type, size_t num_attribs);
+
+} // namespace Shader::Backend::SPIRV
--- a/src/shader_recompiler/runtime_info.h
+++ b/src/shader_recompiler/runtime_info.h
@ -227,7 +227,7 @@ struct RuntimeInfo {
        ComputeRuntimeInfo cs_info;
    };

-    RuntimeInfo(Stage stage_) {
+    void Initialize(Stage stage_) {
        memset(this, 0, sizeof(*this));
        stage = stage_;
    }
--- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp
@ -121,7 +121,7 @@ vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type) {
        // Needs to generate index buffer on the fly.
        return vk::PrimitiveTopology::eTriangleList;
    case AmdGpu::PrimitiveType::RectList:
-        return vk::PrimitiveTopology::eTriangleStrip;
+        return vk::PrimitiveTopology::ePatchList;
    default:
        UNREACHABLE();
        return vk::PrimitiveTopology::eTriangleList;
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@ -8,22 +8,27 @@

 #include "common/assert.h"
 #include "common/scope_exit.h"
+#include "common/io_file.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_quad_rect.h"
+#include "shader_recompiler/frontend/fetch_shader.h"
 #include "shader_recompiler/runtime_info.h"
 #include "video_core/amdgpu/resource.h"
 #include "video_core/buffer_cache/buffer_cache.h"
 #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
-
-#include "shader_recompiler/frontend/fetch_shader.h"
+#include "video_core/renderer_vulkan/vk_shader_util.h"
 #include "video_core/renderer_vulkan/vk_instance.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/texture_cache/texture_cache.h"

 namespace Vulkan {

+using Shader::Backend::SPIRV::AuxShaderType;
+
 GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_,
                                   DescriptorHeap& desc_heap_, const GraphicsPipelineKey& key_,
                                   vk::PipelineCache pipeline_cache,
                                   std::span<const Shader::Info*, MaxShaderStages> infos,
+                                   std::span<const Shader::RuntimeInfo, MaxShaderStages> runtime_infos,
                                   std::optional<const Shader::Gcn::FetchShaderData> fetch_shader_,
                                   std::span<const vk::ShaderModule> modules)
    : Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache}, key{key_},
@ -88,11 +93,6 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
        .pVertexAttributeDescriptions = vertex_attributes.data(),
    };

-    if (key.prim_type == AmdGpu::PrimitiveType::RectList && !IsEmbeddedVs()) {
-        LOG_WARNING(Render_Vulkan,
-                    "Rectangle List primitive type is only supported for embedded VS");
-    }
-
    auto prim_restart = key.enable_primitive_restart != 0;
    if (prim_restart && IsPrimitiveListTopology() && !instance.IsListRestartSupported()) {
        LOG_WARNING(Render_Vulkan,
@ -106,9 +106,11 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
    ASSERT_MSG(!prim_restart || key.primitive_restart_index == 0xFFFF ||
                   key.primitive_restart_index == 0xFFFFFFFF,
               "Primitive restart index other than -1 is not supported yet");
-
+    const bool is_rect_list = key.prim_type == AmdGpu::PrimitiveType::RectList;
+    const size_t num_fs_inputs =
+        runtime_infos[u32(Shader::LogicalStage::Fragment)].fs_info.num_inputs;
    const vk::PipelineTessellationStateCreateInfo tessellation_state = {
-        .patchControlPoints = key.patch_control_points,
+        .patchControlPoints = is_rect_list ? 3U : key.patch_control_points,
    };

    const vk::PipelineRasterizationStateCreateInfo raster_state = {
@ -232,6 +234,14 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
            .module = modules[stage],
            .pName = "main",
        });
+    } else if (is_rect_list) {
+        auto tcs = Shader::Backend::SPIRV::EmitAuxilaryTessShader(AuxShaderType::RectListTCS,
+                                                                  num_fs_inputs);
+        shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
+            .stage = vk::ShaderStageFlagBits::eTessellationControl,
+            .module = CompileSPV(tcs, instance.GetDevice()),
+            .pName = "main",
+        });
    }
    stage = u32(Shader::LogicalStage::TessellationEval);
    if (infos[stage]) {
@ -240,6 +250,13 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
            .module = modules[stage],
            .pName = "main",
        });
+    } else if (is_rect_list) {
+        auto tes = Shader::Backend::SPIRV::EmitAuxilaryTessShader(AuxShaderType::PassthroughTES, num_fs_inputs);
+        shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
+            .stage = vk::ShaderStageFlagBits::eTessellationEvaluation,
+            .module = CompileSPV(tes, instance.GetDevice()),
+            .pName = "main",
+        });
    }
    stage = u32(Shader::LogicalStage::Fragment);
    if (infos[stage]) {
@ -322,8 +339,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
        .pStages = shader_stages.data(),
        .pVertexInputState = !instance.IsVertexInputDynamicState() ? &vertex_input_info : nullptr,
        .pInputAssemblyState = &input_assembly,
-        .pTessellationState =
-            stages[u32(Shader::LogicalStage::TessellationControl)] ? &tessellation_state : nullptr,
+        .pTessellationState = &tessellation_state,
        .pViewportState = &viewport_info,
        .pRasterizationState = &raster_state,
        .pMultisampleState = &multisampling,
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@ -64,6 +64,7 @@ public:
    GraphicsPipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap,
                     const GraphicsPipelineKey& key, vk::PipelineCache pipeline_cache,
                     std::span<const Shader::Info*, MaxShaderStages> stages,
+                     std::span<const Shader::RuntimeInfo, MaxShaderStages> runtime_infos,
                     std::optional<const Shader::Gcn::FetchShaderData> fetch_shader,
                     std::span<const vk::ShaderModule> modules);
    ~GraphicsPipeline();
@ -72,11 +73,6 @@ public:
        return fetch_shader;
    }

-    bool IsEmbeddedVs() const noexcept {
-        static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f;
-        return key.stage_hashes[u32(Shader::LogicalStage::Vertex)] == EmbeddedVsHash;
-    }
-
    auto GetWriteMasks() const {
        return key.write_masks;
    }
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@ -80,8 +80,8 @@ void GatherVertexOutputs(Shader::VertexRuntimeInfo& info,
                   : (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None));
 }

-Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_stage) {
-    auto info = Shader::RuntimeInfo{stage};
+const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_stage) {
+    auto& info = runtime_infos[u32(l_stage)];
    const auto& regs = liverpool->regs;
    const auto BuildCommon = [&](const auto& program) {
        info.num_user_data = program.settings.num_user_regs;
@ -90,6 +90,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_
        info.fp_denorm_mode32 = program.settings.fp_denorm_mode32;
        info.fp_round_mode32 = program.settings.fp_round_mode32;
    };
+    info.Initialize(stage);
    switch (stage) {
    case Stage::Local: {
        BuildCommon(regs.ls_program);
@ -222,7 +223,7 @@ const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() {
    if (is_new) {
        it.value() =
            std::make_unique<GraphicsPipeline>(instance, scheduler, desc_heap, graphics_key,
-                                               *pipeline_cache, infos, fetch_shader, modules);
+                                               *pipeline_cache, infos, runtime_infos, fetch_shader, modules);
        if (Config::collectShadersForDebug()) {
            for (auto stage = 0; stage < MaxShaderStages; ++stage) {
                if (infos[stage]) {
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@ -76,7 +76,7 @@ private:
    vk::ShaderModule CompileModule(Shader::Info& info, Shader::RuntimeInfo& runtime_info,
                                   std::span<const u32> code, size_t perm_idx,
                                   Shader::Backend::Bindings& binding);
-    Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage, Shader::LogicalStage l_stage);
+    const Shader::RuntimeInfo& BuildRuntimeInfo(Shader::Stage stage, Shader::LogicalStage l_stage);

 private:
    const Instance& instance;
@ -90,6 +90,7 @@ private:
    tsl::robin_map<size_t, std::unique_ptr<Program>> program_cache;
    tsl::robin_map<ComputePipelineKey, std::unique_ptr<ComputePipeline>> compute_pipelines;
    tsl::robin_map<GraphicsPipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_pipelines;
+    std::array<Shader::RuntimeInfo, MaxShaderStages> runtime_infos{};
    std::array<const Shader::Info*, MaxShaderStages> infos{};
    std::array<vk::ShaderModule, MaxShaderStages> modules{};
    std::optional<Shader::Gcn::FetchShaderData> fetch_shader{};
--- a/src/video_core/renderer_vulkan/vk_platform.cpp
+++ b/src/video_core/renderer_vulkan/vk_platform.cpp
@ -1,6 +1,6 @@
 // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
-
+#pragma clang optimize off
 // Include the vulkan platform specific header
 #if defined(ANDROID)
 #define VK_USE_PLATFORM_ANDROID_KHR
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@ -238,7 +238,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
                           instance_offset);
    } else {
        const u32 num_vertices =
-            regs.primitive_type == AmdGpu::PrimitiveType::RectList ? 4 : regs.num_indices;
+            regs.primitive_type == AmdGpu::PrimitiveType::RectList ? 3 : regs.num_indices;
        cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), vertex_offset,
                    instance_offset);
    }
--- a/src/video_core/renderer_vulkan/vk_shader_util.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp
@ -126,6 +126,10 @@ EShLanguage ToEshShaderStage(vk::ShaderStageFlagBits stage) {
        return EShLanguage::EShLangVertex;
    case vk::ShaderStageFlagBits::eGeometry:
        return EShLanguage::EShLangGeometry;
+    case vk::ShaderStageFlagBits::eTessellationControl:
+        return EShLanguage::EShLangTessControl;
+    case vk::ShaderStageFlagBits::eTessellationEvaluation:
+        return EShLanguage::EShLangTessEvaluation;
    case vk::ShaderStageFlagBits::eFragment:
        return EShLanguage::EShLangFragment;
    case vk::ShaderStageFlagBits::eCompute: