video_core: Add basic vertex, index buffer handling and pipeline caching

2025-07-25 19:44:57 +00:00 · 2024-05-24 23:50:56 +03:00 · 2024-05-24 23:50:56 +03:00 · f480d091ce
commit f480d091ce
parent 0eaa7d5859
27 changed files with 506 additions and 174 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -50,7 +50,7 @@
 [submodule "externals/toml11"]
 	path = externals/toml11
 	url = https://github.com/ToruNiina/toml11.git
-[submodule "externals/xxHash"]
+[submodule "externals/xxhash"]
 	path = externals/xxHash
 	url = https://github.com/Cyan4973/xxHash.git
 [submodule "externals/zydis"]
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@ -74,8 +74,8 @@ add_subdirectory(magic_enum EXCLUDE_FROM_ALL)
 add_subdirectory(toml11 EXCLUDE_FROM_ALL)
 # xxHash
-add_library(xxhash INTERFACE)
+add_library(xxhash xxhash/xxhash.h xxhash/xxhash.c)
-target_include_directories(xxhash INTERFACE xxhash)
+target_include_directories(xxhash PUBLIC xxhash)
 # Zydis
 option(ZYDIS_BUILD_TOOLS "" OFF)
@ -92,4 +92,4 @@ endif()
 add_subdirectory(sirit EXCLUDE_FROM_ALL)
 if (WIN32)
    target_compile_options(sirit PUBLIC "-Wno-error=unused-command-line-argument")
-endif()
+endif()
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@ -7,6 +7,7 @@
 #include "common/scope_exit.h"
 #include "core/libraries/error_codes.h"
 #include "core/memory.h"
 #include "video_core/renderer_vulkan/vk_instance.h"
 namespace Core {
@ -61,6 +62,10 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M
        new_vma.prot = prot;
        new_vma.name = name;
        new_vma.type = type;
        if (type == VMAType::Direct) {
            MapVulkanMemory(mapped_addr, size);
        }
    };
    // When virtual addr is zero let the address space manager pick the address.
@ -103,6 +108,10 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) {
    ASSERT_MSG(it != vma_map.end() && it->first == virtual_addr,
               "Attempting to unmap partially mapped range");
    if (it->second.type == VMAType::Direct) {
        UnmapVulkanMemory(virtual_addr, size);
    }
    // Mark region as free and attempt to coalesce it with neighbours.
    auto& vma = it->second;
    vma.type = VMAType::Free;
@ -114,6 +123,13 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) {
    impl.Unmap(virtual_addr, size);
 }
 std::pair<vk::Buffer, size_t> MemoryManager::GetVulkanBuffer(VAddr addr) {
    auto it = mapped_memories.upper_bound(addr);
    it = std::prev(it);
    ASSERT(it != mapped_memories.end() && it->first <= addr);
    return std::make_pair(*it->second.buffer, addr - it->first);
 }
 VirtualMemoryArea& MemoryManager::AddMapping(VAddr virtual_addr, size_t size) {
    auto vma_handle = FindVMA(virtual_addr);
    ASSERT_MSG(vma_handle != vma_map.end(), "Virtual address not in vm_map");
@ -171,4 +187,81 @@ MemoryManager::VMAHandle MemoryManager::MergeAdjacent(VMAHandle iter) {
    return iter;
 }
 void MemoryManager::MapVulkanMemory(VAddr addr, size_t size) {
    const vk::Device device = instance->GetDevice();
    const auto memory_props = instance->GetPhysicalDevice().getMemoryProperties();
    void* host_pointer = reinterpret_cast<void*>(addr);
    const auto host_mem_props = device.getMemoryHostPointerPropertiesEXT(
        vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT, host_pointer);
    ASSERT(host_mem_props.memoryTypeBits != 0);
    int mapped_memory_type = -1;
    auto find_mem_type_with_flag = [&](const vk::MemoryPropertyFlags flags) {
        u32 host_mem_types = host_mem_props.memoryTypeBits;
        while (host_mem_types != 0) {
            // Try to find a cached memory type
            mapped_memory_type = std::countr_zero(host_mem_types);
            host_mem_types -= (1 << mapped_memory_type);
            if ((memory_props.memoryTypes[mapped_memory_type].propertyFlags & flags) == flags) {
                return;
            }
        }
        mapped_memory_type = -1;
    };
    // First try to find a memory that is both coherent and cached
    find_mem_type_with_flag(vk::MemoryPropertyFlagBits::eHostCoherent |
                            vk::MemoryPropertyFlagBits::eHostCached);
    if (mapped_memory_type == -1)
        // Then only coherent (lower performance)
        find_mem_type_with_flag(vk::MemoryPropertyFlagBits::eHostCoherent);
    if (mapped_memory_type == -1) {
        LOG_CRITICAL(Render_Vulkan, "No coherent memory available for memory mapping");
        mapped_memory_type = std::countr_zero(host_mem_props.memoryTypeBits);
    }
    const vk::StructureChain alloc_info = {
        vk::MemoryAllocateInfo{
            .allocationSize = size,
            .memoryTypeIndex = static_cast<uint32_t>(mapped_memory_type),
        },
        vk::ImportMemoryHostPointerInfoEXT{
            .handleType = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT,
            .pHostPointer = host_pointer,
        },
    };
    const auto [it, new_memory] = mapped_memories.try_emplace(addr);
    ASSERT_MSG(new_memory, "Attempting to remap already mapped vulkan memory");
    auto& memory = it->second;
    memory.backing = device.allocateMemoryUnique(alloc_info.get());
    constexpr vk::BufferUsageFlags MapFlags =
        vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eVertexBuffer |
        vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst |
        vk::BufferUsageFlagBits::eUniformBuffer;
    const vk::StructureChain buffer_info = {
        vk::BufferCreateInfo{
            .size = size,
            .usage = MapFlags,
            .sharingMode = vk::SharingMode::eExclusive,
        },
        vk::ExternalMemoryBufferCreateInfoKHR{
            .handleTypes = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT,
        }};
    memory.buffer = device.createBufferUnique(buffer_info.get());
    device.bindBufferMemory(*memory.buffer, *memory.backing, 0);
 }
 void MemoryManager::UnmapVulkanMemory(VAddr addr, size_t size) {
    const auto it = mapped_memories.find(addr);
    ASSERT(it != mapped_memories.end() && it->second.buffer_size == size);
    mapped_memories.erase(it);
 }
 } // namespace Core
--- a/src/core/memory.h
+++ b/src/core/memory.h
@ -3,6 +3,7 @@
 #pragma once
 #include <functional>
 #include <string_view>
 #include <vector>
 #include <boost/icl/split_interval_map.hpp>
@ -10,6 +11,11 @@
 #include "common/singleton.h"
 #include "common/types.h"
 #include "core/address_space.h"
 #include "video_core/renderer_vulkan/vk_common.h"
 namespace Vulkan {
 class Instance;
 }
 namespace Core {
@ -86,6 +92,10 @@ public:
    explicit MemoryManager();
    ~MemoryManager();
    void SetInstance(const Vulkan::Instance* instance_) {
        instance = instance_;
    }
    PAddr Allocate(PAddr search_start, PAddr search_end, size_t size, u64 alignment,
                   int memory_type);
@ -97,11 +107,9 @@ public:
    void UnmapMemory(VAddr virtual_addr, size_t size);
-private:
+    std::pair<vk::Buffer, size_t> GetVulkanBuffer(VAddr addr);
    bool HasOverlap(VAddr addr, size_t size) const {
        return vma_map.find(addr) != vma_map.end();
    }
 private:
    VMAHandle FindVMA(VAddr target) {
        // Return first the VMA with base >= target.
        const auto it = vma_map.lower_bound(target);
@ -117,10 +125,22 @@ private:
    VMAHandle MergeAdjacent(VMAHandle iter);
    void MapVulkanMemory(VAddr addr, size_t size);
    void UnmapVulkanMemory(VAddr addr, size_t size);
 private:
    AddressSpace impl;
    std::vector<DirectMemoryArea> allocations;
    VMAMap vma_map;
    struct MappedMemory {
        vk::UniqueBuffer buffer;
        vk::UniqueDeviceMemory backing;
        size_t buffer_size;
    };
    std::map<VAddr, MappedMemory> mapped_memories;
    const Vulkan::Instance* instance{};
 };
 using Memory = Common::Singleton<MemoryManager>;
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@ -175,12 +175,14 @@ void EmitContext::DefineInputs(const IR::Program& program) {
            const Id id{DefineInput(type, input.binding)};
            Name(id, fmt::format("vs_in_attr{}", input.binding));
            input_params[input.binding] = GetAttributeInfo(input.fmt, id);
            interfaces.push_back(id);
        }
        break;
    case Stage::Fragment:
        for (const auto& input : info.ps_inputs) {
            if (input.is_default) {
-                input_params[input.semantic] = {MakeDefaultValue(*this, input.default_value), input_f32, F32[1]};
+                input_params[input.semantic] = {MakeDefaultValue(*this, input.default_value),
                                                input_f32, F32[1]};
                continue;
            }
            const IR::Attribute param{IR::Attribute::Param0 + input.param_index};
@ -192,6 +194,7 @@ void EmitContext::DefineInputs(const IR::Program& program) {
            }
            Name(id, fmt::format("fs_in_attr{}", input.semantic));
            input_params[input.semantic] = {id, input_f32, F32[1], num_components};
            interfaces.push_back(id);
        }
    default:
        break;
@ -212,6 +215,7 @@ void EmitContext::DefineOutputs(const IR::Program& program) {
            const Id id{DefineOutput(F32[num_components], i)};
            Name(id, fmt::format("out_attr{}", i));
            output_params[i] = {id, output_f32, F32[1], num_components};
            interfaces.push_back(id);
        }
        break;
    case Stage::Fragment:
--- a/src/shader_recompiler/frontend/fetch_shader.cpp
+++ b/src/shader_recompiler/frontend/fetch_shader.cpp
@ -40,7 +40,7 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
    struct VsharpLoad {
        u32 dword_offset{};
        s32 base_sgpr{};
-        s32 dst_sgpr{-1};
+        s32 dst_reg{-1};
    };
    boost::container::static_vector<VsharpLoad, 16> loads;
@ -57,11 +57,13 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
        }
        if (inst.inst_class == InstClass::VectorMemBufFmt) {
            // SRSRC is in units of 4 SPGRs while SBASE is in pairs of SGPRs
            const u32 base_sgpr = inst.src[2].code * 4;
            // Find the load instruction that loaded the V# to the SPGR.
            // This is so we can determine its index in the vertex table.
-            const auto it = std::ranges::find_if(loads, [&](VsharpLoad& load) {
+            const auto it = std::ranges::find_if(
-                return load.dst_sgpr == inst.src[2].code * 4;
+                loads, [&](VsharpLoad& load) { return load.dst_reg == base_sgpr; });
            });
            auto& attrib = attributes.emplace_back();
            attrib.semantic = semantic_index++;
@ -71,7 +73,7 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
            attrib.dword_offset = it->dword_offset;
            // Mark load as used.
-            it->dst_sgpr = -1;
+            it->dst_reg = -1;
        }
    }
--- a/src/shader_recompiler/frontend/fetch_shader.h
+++ b/src/shader_recompiler/frontend/fetch_shader.h
@ -9,11 +9,11 @@
 namespace Shader::Gcn {
 struct VertexAttribute {
-    u8 semantic;        ///< Semantic index of the attribute
+    u8 semantic;     ///< Semantic index of the attribute
-    u8 dest_vgpr;       ///< Destination VGPR to load first component
+    u8 dest_vgpr;    ///< Destination VGPR to load first component.
-    u8 num_elements;    ///< Number of components to load
+    u8 num_elements; ///< Number of components to load
-    u8 sgpr_base;       ///< SGPR that contains the pointer to the list of vertex V#
+    u8 sgpr_base;    ///< SGPR that contains the pointer to the list of vertex V#
-    u8 dword_offset;    ///< The dword offset of the V# that describes this attribute.
+    u8 dword_offset; ///< The dword offset of the V# that describes this attribute.
 };
 std::vector<VertexAttribute> ParseFetchShader(const u32* code);
--- a/src/shader_recompiler/frontend/translate/translate.cpp
+++ b/src/shader_recompiler/frontend/translate/translate.cpp
@ -2,8 +2,8 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 #include "shader_recompiler/exception.h"
 #include "shader_recompiler/frontend/translate/translate.h"
 #include "shader_recompiler/frontend/fetch_shader.h"
 #include "shader_recompiler/frontend/translate/translate.h"
 #include "shader_recompiler/runtime_info.h"
 #include "video_core/amdgpu/resource.h"
@ -103,20 +103,21 @@ void Translator::EmitFetch(const GcnInst& inst) {
    // Parse the assembly to generate a list of attributes.
    const auto attribs = ParseFetchShader(code);
    for (const auto& attrib : attribs) {
        IR::VectorReg dst_reg{attrib.dest_vgpr};
        const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic};
        IR::VectorReg dst_reg{attrib.dest_vgpr};
        for (u32 i = 0; i < attrib.num_elements; i++) {
            ir.SetVectorReg(dst_reg++, ir.GetAttribute(attr, i));
        }
        // Read the V# of the attribute to figure out component number and type.
-        const auto buffer = info.ReadUd<AmdGpu::Buffer>(attrib.sgpr_base,
+        const auto buffer = info.ReadUd<AmdGpu::Buffer>(attrib.sgpr_base, attrib.dword_offset);
                                                        attrib.dword_offset);
        const u32 num_components = AmdGpu::NumComponents(buffer.data_format);
        info.vs_inputs.push_back({
            .fmt = buffer.num_format,
            .binding = attrib.semantic,
            .num_components = std::min<u16>(attrib.num_elements, num_components),
            .sgpr_base = attrib.sgpr_base,
            .dword_offset = attrib.dword_offset,
        });
    }
 }
--- a/src/shader_recompiler/ir/program.h
+++ b/src/shader_recompiler/ir/program.h
@ -12,8 +12,6 @@
 namespace Shader::IR {
 struct Program {
    explicit Program(const Info&& info_) : info{info_} {}
    AbstractSyntaxList syntax_list;
    BlockList blocks;
    BlockList post_order_blocks;
--- a/src/shader_recompiler/recompiler.cpp
+++ b/src/shader_recompiler/recompiler.cpp
@ -44,7 +44,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
    file.close();
    // Decode and save instructions
-    IR::Program program{std::move(info)};
+    IR::Program program;
    program.ins_list.reserve(token.size());
    while (!slice.atEnd()) {
        program.ins_list.emplace_back(decoder.decodeInstruction(slice));
@ -55,6 +55,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
    Gcn::CFG cfg{gcn_block_pool, program.ins_list};
    // Structurize control flow graph and create program.
    program.info = std::move(info);
    program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, program.info);
    program.blocks = GenerateBlocks(program.syntax_list);
    program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());
--- a/src/shader_recompiler/recompiler.h
+++ b/src/shader_recompiler/recompiler.h
@ -4,8 +4,8 @@
 #pragma once
 #include "shader_recompiler/ir/basic_block.h"
 #include "shader_recompiler/object_pool.h"
 #include "shader_recompiler/ir/program.h"
 #include "shader_recompiler/object_pool.h"
 namespace Shader {
@ -30,7 +30,6 @@ struct BinaryInfo {
 [[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
                                           ObjectPool<IR::Block>& block_pool,
-                                           std::span<const u32> code,
+                                           std::span<const u32> code, const Info&& info);
                                           const Info&& info);
 } // namespace Shader
--- a/src/shader_recompiler/runtime_info.h
+++ b/src/shader_recompiler/runtime_info.h
@ -40,12 +40,12 @@ enum class TextureType : u32 {
 constexpr u32 NUM_TEXTURE_TYPES = 7;
 struct Info {
    explicit Info(std::span<const u32, 16> user_data_) : user_data{user_data_} {}
    struct VsInput {
        AmdGpu::NumberFormat fmt;
        u16 binding;
        u16 num_components;
        u8 sgpr_base;
        u8 dword_offset;
    };
    boost::container::static_vector<VsInput, 32> vs_inputs{};
@ -60,29 +60,33 @@ struct Info {
    struct AttributeFlags {
        bool Get(IR::Attribute attrib, u32 comp = 0) const {
-            return flags[static_cast<size_t>(attrib)] & (1 << comp);
+            return flags[Index(attrib)] & (1 << comp);
        }
        bool GetAny(IR::Attribute attrib) const {
-            return flags[static_cast<size_t>(attrib)];
+            return flags[Index(attrib)];
        }
        void Set(IR::Attribute attrib, u32 comp = 0) {
-            flags[static_cast<size_t>(attrib)] |= (1 << comp);
+            flags[Index(attrib)] |= (1 << comp);
        }
        u32 NumComponents(IR::Attribute attrib) const {
-            const u8 mask = flags[static_cast<size_t>(attrib)];
+            const u8 mask = flags[Index(attrib)];
            ASSERT(mask != 0b1011 || mask != 0b1101);
            return std::popcount(mask);
        }
        static size_t Index(IR::Attribute attrib) {
            return static_cast<size_t>(attrib);
        }
        std::array<u8, IR::NumAttributes> flags;
    };
    AttributeFlags loads{};
    AttributeFlags stores{};
-    std::span<const u32, 16> user_data;
+    std::span<const u32> user_data;
    Stage stage;
    template <typename T>
--- a/src/video_core/amdgpu/liverpool.cpp
+++ b/src/video_core/amdgpu/liverpool.cpp
@ -114,7 +114,7 @@ void Liverpool::ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes) {
                regs.num_indices = draw_index->index_count;
                regs.draw_initiator = draw_index->draw_initiator;
                if (rasterizer) {
-                    rasterizer->DrawIndex();
+                    rasterizer->Draw(true);
                }
                break;
            }
@ -122,7 +122,9 @@ void Liverpool::ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes) {
                const auto* draw_index = reinterpret_cast<const PM4CmdDrawIndexAuto*>(header);
                regs.num_indices = draw_index->index_count;
                regs.draw_initiator = draw_index->draw_initiator;
-                rasterizer->DrawIndex();
+                if (rasterizer) {
                    rasterizer->Draw(false);
                }
                break;
            }
            case PM4ItOpcode::DispatchDirect: {
--- a/src/video_core/amdgpu/liverpool.h
+++ b/src/video_core/amdgpu/liverpool.h
@ -180,25 +180,6 @@ struct Liverpool {
        BitField<31, 1, u32> disable_color_writes_on_depth_pass;
    };
    union DepthSize {
        u32 raw;
        BitField<0, 11, u32> pitch_tile_max;
        BitField<11, 11, u32> height_tile_max;
        u32 Pitch() const {
            return (pitch_tile_max + 1) << 3;
        }
        u32 Height() const {
            return (height_tile_max + 1) << 3;
        }
    };
    union DepthSlice {
        u32 raw;
        BitField<0, 22, u32> slice_tile_max;
    };
    enum class StencilFunc : u32 {
        Keep = 0,
        Zero = 1,
@ -236,9 +217,45 @@ struct Liverpool {
        BitField<24, 8, u32> stencil_op_val;
    };
-    union StencilInfo {
+    struct DepthBuffer {
-        u32 raw;
+        enum class ZFormat : u32 {
-        BitField<0, 1, u32> format;
+            Invald = 0,
            Z16 = 1,
            Z32Float = 2,
        };
        enum class StencilFormat : u32 {
            Invalid = 0,
            Stencil8 = 1,
        };
        union {
            BitField<0, 2, ZFormat> format;
            BitField<2, 2, u32> num_samples;
            BitField<13, 3, u32> tile_split;
        } z_info;
        union {
            BitField<0, 1, StencilFormat> format;
        } stencil_info;
        u32 z_read_base;
        u32 stencil_read_base;
        u32 z_write_base;
        u32 stencil_write_base;
        union {
            BitField<0, 11, u32> pitch_tile_max;
            BitField<11, 11, u32> height_tile_max;
        } depth_size;
        union {
            BitField<0, 22, u32> tile_max;
        } depth_slice;
        u32 Pitch() const {
            return (depth_size.pitch_tile_max + 1) << 3;
        }
        u32 Height() const {
            return (depth_size.height_tile_max + 1) << 3;
        }
    };
    enum class ClipSpace : u32 {
@ -505,6 +522,12 @@ struct Liverpool {
        u64 CmaskAddress() const {
            return u64(cmask_base_address) << 8;
        }
        NumberFormat NumFormat() const {
            // There is a small difference between T# and CB number types, account for it.
            return info.number_type == AmdGpu::NumberFormat::Uscaled ? AmdGpu::NumberFormat::Srgb
                                                                     : info.number_type;
        }
    };
    enum class PrimitiveType : u32 {
@ -539,14 +562,8 @@ struct Liverpool {
            u32 stencil_clear;
            u32 depth_clear;
            Scissor screen_scissor;
-            INSERT_PADDING_WORDS(0xA011 - 0xA00C - 2);
+            INSERT_PADDING_WORDS(0xA010 - 0xA00C - 2);
-            StencilInfo stencil_info;
+            DepthBuffer depth_buffer;
            u32 z_read_base;
            u32 stencil_read_base;
            u32 z_write_base;
            u32 stencil_write_base;
            DepthSize depth_size;
            DepthSlice depth_slice;
            INSERT_PADDING_WORDS(0xA08E - 0xA018);
            ColorBufferMask color_target_mask;
            ColorBufferMask color_shader_mask;
@ -595,6 +612,17 @@ struct Liverpool {
            VgtNumInstances num_instances;
        };
        std::array<u32, NumRegs> reg_array{};
        const ShaderProgram* ProgramForStage(u32 index) const {
            switch (index) {
            case 0:
                return &vs_program;
            case 4:
                return &ps_program;
            default:
                return nullptr;
            }
        }
    };
    Regs regs{};
@ -635,7 +663,7 @@ static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
 static_assert(GFX6_3D_REG_INDEX(vs_program) == 0x2C48);
 static_assert(GFX6_3D_REG_INDEX(vs_program.user_data) == 0x2C4C);
 static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C);
-static_assert(GFX6_3D_REG_INDEX(depth_slice) == 0xA017);
+static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017);
 static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E);
 static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F);
 static_assert(GFX6_3D_REG_INDEX(viewport_scissors) == 0xA094);
--- a/src/video_core/amdgpu/pixel_format.h
+++ b/src/video_core/amdgpu/pixel_format.h
@ -76,4 +76,3 @@ struct fmt::formatter<AmdGpu::NumberFormat> {
        return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(fmt));
    }
 };
--- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp
@ -1,6 +1,6 @@
 // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
-#pragma clang optimize off
+
 #include "common/assert.h"
 #include "video_core/renderer_vulkan/liverpool_to_vk.h"
@ -114,19 +114,41 @@ vk::CullModeFlags CullMode(Liverpool::CullMode mode) {
 }
 vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) {
-    if (data_format == AmdGpu::DataFormat::Format32_32_32_32 && num_format == AmdGpu::NumberFormat::Float) {
+    if (data_format == AmdGpu::DataFormat::Format32_32_32_32 &&
        num_format == AmdGpu::NumberFormat::Float) {
        return vk::Format::eR32G32B32A32Sfloat;
    }
-    if (data_format == AmdGpu::DataFormat::Format32_32_32 && num_format == AmdGpu::NumberFormat::Uint) {
+    if (data_format == AmdGpu::DataFormat::Format32_32_32 &&
        num_format == AmdGpu::NumberFormat::Uint) {
        return vk::Format::eR32G32B32Uint;
    }
-    if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && num_format == AmdGpu::NumberFormat::Unorm) {
+    if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
        num_format == AmdGpu::NumberFormat::Unorm) {
        return vk::Format::eR8G8B8A8Unorm;
    }
-    if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && num_format == AmdGpu::NumberFormat::Srgb) {
+    if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
        num_format == AmdGpu::NumberFormat::Srgb) {
        return vk::Format::eR8G8B8A8Srgb;
    }
    UNREACHABLE();
 }
 vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format,
                       Liverpool::DepthBuffer::StencilFormat stencil_format) {
    UNREACHABLE();
 }
 void EmitQuadToTriangleListIndices(u8* out_ptr, u32 num_vertices) {
    static constexpr u16 NumVerticesPerQuad = 4;
    u16* out_data = reinterpret_cast<u16*>(out_ptr);
    for (u16 i = 0; i < num_vertices; i += NumVerticesPerQuad) {
        *out_data++ = i;
        *out_data++ = i + 1;
        *out_data++ = i + 2;
        *out_data++ = i + 2;
        *out_data++ = i;
        *out_data++ = i + 3;
    }
 }
 } // namespace Vulkan::LiverpoolToVK
--- a/src/video_core/renderer_vulkan/liverpool_to_vk.h
+++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h
@ -23,4 +23,9 @@ vk::CullModeFlags CullMode(Liverpool::CullMode mode);
 vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format);
 vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format,
                       Liverpool::DepthBuffer::StencilFormat stencil_format);
 void EmitQuadToTriangleListIndices(u8* out_indices, u32 num_vertices);
 } // namespace Vulkan::LiverpoolToVK
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@ -4,22 +4,58 @@
 #include <boost/container/static_vector.hpp>
 #include "common/assert.h"
 #include "core/memory.h"
 #include "video_core/amdgpu/resource.h"
 #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
 #include "video_core/renderer_vulkan/vk_instance.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 namespace Vulkan {
-GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey& key_,
+GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_,
-                                   vk::PipelineCache pipeline_cache_, vk::PipelineLayout layout_,
+                                   const PipelineKey& key_, vk::PipelineCache pipeline_cache,
                                   std::span<const Shader::Info*, MaxShaderStages> infos,
                                   std::array<vk::ShaderModule, MaxShaderStages> modules)
-    : instance{instance_}, pipeline_layout{layout_}, pipeline_cache{pipeline_cache_}, key{key_} {
+    : instance{instance_}, scheduler{scheduler_}, key{key_} {
    const vk::Device device = instance.GetDevice();
    for (u32 i = 0; i < MaxShaderStages; i++) {
        if (!infos[i]) {
            continue;
        }
        stages[i] = *infos[i];
    }
    const vk::PipelineLayoutCreateInfo layout_info = {
        .setLayoutCount = 0U,
        .pSetLayouts = nullptr,
        .pushConstantRangeCount = 0,
        .pPushConstantRanges = nullptr,
    };
    pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
    boost::container::static_vector<vk::VertexInputBindingDescription, 32> bindings;
    boost::container::static_vector<vk::VertexInputAttributeDescription, 32> attributes;
    const auto& vs_info = stages[0];
    for (const auto& input : vs_info.vs_inputs) {
        const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
        attributes.push_back({
            .location = input.binding,
            .binding = input.binding,
            .format = LiverpoolToVK::SurfaceFormat(buffer.data_format, buffer.num_format),
            .offset = 0,
        });
        bindings.push_back({
            .binding = input.binding,
            .stride = u32(buffer.stride),
            .inputRate = vk::VertexInputRate::eVertex,
        });
    }
    const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
-        .vertexBindingDescriptionCount = 0U,
+        .vertexBindingDescriptionCount = static_cast<u32>(bindings.size()),
-        .pVertexBindingDescriptions = nullptr,
+        .pVertexBindingDescriptions = bindings.data(),
-        .vertexAttributeDescriptionCount = 0U,
+        .vertexAttributeDescriptionCount = static_cast<u32>(attributes.size()),
-        .pVertexAttributeDescriptions = nullptr,
+        .pVertexAttributeDescriptions = attributes.data(),
    };
    const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
@ -126,11 +162,12 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey&
        .pName = "main",
    };
-    const vk::Format color_format = vk::Format::eR8G8B8A8Srgb;
+    const auto it = std::ranges::find(key.color_formats, vk::Format::eUndefined);
    const u32 num_color_formats = std::distance(key.color_formats.begin(), it);
    const vk::PipelineRenderingCreateInfoKHR pipeline_rendering_ci = {
-        .colorAttachmentCount = 1,
+        .colorAttachmentCount = num_color_formats,
-        .pColorAttachmentFormats = &color_format,
+        .pColorAttachmentFormats = key.color_formats.data(),
-        .depthAttachmentFormat = vk::Format::eUndefined,
+        .depthAttachmentFormat = key.depth.depth_enable ? key.depth_format : vk::Format::eUndefined,
        .stencilAttachmentFormat = vk::Format::eUndefined,
    };
@ -146,7 +183,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey&
        .pDepthStencilState = &depth_info,
        .pColorBlendState = &color_blending,
        .pDynamicState = &dynamic_info,
-        .layout = pipeline_layout,
+        .layout = *pipeline_layout,
    };
    auto result = device.createGraphicsPipelineUnique(pipeline_cache, pipeline_info);
@ -159,4 +196,20 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey&
 GraphicsPipeline::~GraphicsPipeline() = default;
 void GraphicsPipeline::BindResources(Core::MemoryManager* memory) const {
    std::array<vk::Buffer, MaxVertexBufferCount> buffers;
    std::array<vk::DeviceSize, MaxVertexBufferCount> offsets;
    const auto& vs_info = stages[0];
    const size_t num_buffers = vs_info.vs_inputs.size();
    for (u32 i = 0; i < num_buffers; ++i) {
        const auto& input = vs_info.vs_inputs[i];
        const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
        std::tie(buffers[i], offsets[i]) = memory->GetVulkanBuffer(buffer.base_address);
    }
    const auto cmdbuf = scheduler.CommandBuffer();
    cmdbuf.bindVertexBuffers(0, num_buffers, buffers.data(), offsets.data());
 }
 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@ -1,19 +1,31 @@
 // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 #include <xxhash.h>
 #include "common/types.h"
 #include "shader_recompiler/runtime_info.h"
 #include "video_core/renderer_vulkan/liverpool_to_vk.h"
 #include "video_core/renderer_vulkan/vk_common.h"
 namespace Core {
 class MemoryManager;
 }
 namespace Vulkan {
 static constexpr u32 MaxVertexBufferCount = 32;
 static constexpr u32 MaxShaderStages = 5;
 class Instance;
 class Scheduler;
 using Liverpool = AmdGpu::Liverpool;
 struct PipelineKey {
    std::array<size_t, MaxShaderStages> stage_hashes;
    std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
    vk::Format depth_format;
    Liverpool::DepthControl depth;
    Liverpool::StencilControl stencil;
    Liverpool::StencilRefMask stencil_ref_front;
@ -21,26 +33,41 @@ struct PipelineKey {
    Liverpool::PrimitiveType prim_type;
    Liverpool::PolygonMode polygon_mode;
    Liverpool::CullMode cull_mode;
    bool operator==(const PipelineKey& key) const noexcept {
        return std::memcmp(this, &key, sizeof(PipelineKey)) == 0;
    }
 };
 static_assert(std::has_unique_object_representations_v<PipelineKey>);
 class GraphicsPipeline {
 public:
-    explicit GraphicsPipeline(const Instance& instance, const PipelineKey& key,
+    explicit GraphicsPipeline(const Instance& instance, Scheduler& scheduler,
-                              vk::PipelineCache pipeline_cache, vk::PipelineLayout layout,
+                              const PipelineKey& key, vk::PipelineCache pipeline_cache,
                              std::span<const Shader::Info*, MaxShaderStages> infos,
                              std::array<vk::ShaderModule, MaxShaderStages> modules);
    ~GraphicsPipeline();
    void BindResources(Core::MemoryManager* memory) const;
    [[nodiscard]] vk::Pipeline Handle() const noexcept {
        return *pipeline;
    }
 private:
    const Instance& instance;
    Scheduler& scheduler;
    vk::UniquePipeline pipeline;
-    vk::PipelineLayout pipeline_layout;
+    vk::UniquePipelineLayout pipeline_layout;
-    vk::PipelineCache pipeline_cache;
+    std::array<Shader::Info, MaxShaderStages> stages;
    PipelineKey key;
 };
 } // namespace Vulkan
 template <>
 struct std::hash<Vulkan::PipelineKey> {
    std::size_t operator()(const Vulkan::PipelineKey& key) const noexcept {
        return XXH3_64bits(&key, sizeof(key));
    }
 };
--- a/src/video_core/renderer_vulkan/vk_instance.cpp
+++ b/src/video_core/renderer_vulkan/vk_instance.cpp
@ -271,11 +271,11 @@ void Instance::CollectDeviceParameters() {
    const std::string api_version = GetReadableVersion(properties.apiVersion);
    const std::string extensions = fmt::format("{}", fmt::join(available_extensions, ", "));
-    LOG_INFO(Render_Vulkan, "GPU_Vendor", vendor_name);
+    LOG_INFO(Render_Vulkan, "GPU_Vendor: {}", vendor_name);
-    LOG_INFO(Render_Vulkan, "GPU_Model", model_name);
+    LOG_INFO(Render_Vulkan, "GPU_Model: {}", model_name);
-    LOG_INFO(Render_Vulkan, "GPU_Vulkan_Driver", driver_name);
+    LOG_INFO(Render_Vulkan, "GPU_Vulkan_Driver: {}", driver_name);
-    LOG_INFO(Render_Vulkan, "GPU_Vulkan_Version", api_version);
+    LOG_INFO(Render_Vulkan, "GPU_Vulkan_Version: {}", api_version);
-    LOG_INFO(Render_Vulkan, "GPU_Vulkan_Extensions", extensions);
+    LOG_INFO(Render_Vulkan, "GPU_Vulkan_Extensions: {}", extensions);
 }
 void Instance::CollectToolingInfo() {
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@ -2,10 +2,10 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 #include <fstream>
 #include "common/scope_exit.h"
 #include "shader_recompiler/backend/spirv/emit_spirv.h"
 #include "shader_recompiler/recompiler.h"
 #include "shader_recompiler/runtime_info.h"
 #include "video_core/amdgpu/resource.h"
 #include "video_core/renderer_vulkan/vk_instance.h"
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
@ -14,8 +14,9 @@
 namespace Vulkan {
 Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_data,
-                            AmdGpu::Liverpool::Regs& regs) {
+                            const AmdGpu::Liverpool::Regs& regs) {
-    Shader::Info info{user_data};
+    Shader::Info info{};
    info.user_data = user_data;
    info.stage = stage;
    switch (stage) {
    case Shader::Stage::Fragment: {
@ -39,66 +40,96 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
                             AmdGpu::Liverpool* liverpool_)
    : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{8192},
      block_pool{512} {
    const vk::PipelineLayoutCreateInfo layout_info = {
        .setLayoutCount = 0U,
        .pSetLayouts = nullptr,
        .pushConstantRangeCount = 0,
        .pPushConstantRanges = nullptr,
    };
    pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
    pipeline_cache = instance.GetDevice().createPipelineCacheUnique({});
 }
-void PipelineCache::BindPipeline() {
+const GraphicsPipeline* PipelineCache::GetPipeline() {
-    SCOPE_EXIT {
+    RefreshKey();
-        const auto cmdbuf = scheduler.CommandBuffer();
+    const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key);
-        cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
+    if (is_new) {
-    };
+        it.value() = CreatePipeline();
    }
    const GraphicsPipeline* pipeline = it->second.get();
    return pipeline;
 }
-    if (pipeline) {
+void PipelineCache::RefreshKey() {
-        return;
+    auto& regs = liverpool->regs;
    auto& key = graphics_key;
    key.depth = regs.depth_control;
    key.stencil = regs.stencil_control;
    key.stencil_ref_front = regs.stencil_ref_front;
    key.stencil_ref_back = regs.stencil_ref_back;
    key.prim_type = regs.primitive_type;
    key.polygon_mode = regs.polygon_control.PolyMode();
    const auto& db = regs.depth_buffer;
    key.depth_format = key.depth.depth_enable
                           ? LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format)
                           : vk::Format::eUndefined;
    for (u32 i = 0; i < Liverpool::NumColorBuffers; i++) {
        const auto& cb = regs.color_buffers[i];
        key.color_formats[i] = cb.base_address
                                   ? LiverpoolToVK::SurfaceFormat(cb.info.format, cb.NumFormat())
                                   : vk::Format::eUndefined;
    }
-    const auto get_program = [&](const AmdGpu::Liverpool::ShaderProgram& pgm, Shader::Stage stage) {
+    for (u32 i = 0; i < MaxShaderStages; i++) {
-        const u32* token = pgm.Address<u32>();
+        auto* pgm = regs.ProgramForStage(i);
        if (!pgm || !pgm->Address<u32>()) {
            key.stage_hashes[i] = 0;
            continue;
        }
        const u32* code = pgm->Address<u32>();
        // Retrieve shader header.
        Shader::BinaryInfo bininfo;
-        std::memcpy(&bininfo, token + (token[1] + 1) * 2, sizeof(bininfo));
+        std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
        key.stage_hashes[i] = bininfo.shader_hash;
    }
 }
 std::unique_ptr<GraphicsPipeline> PipelineCache::CreatePipeline() {
    const auto& regs = liverpool->regs;
    std::array<Shader::IR::Program, MaxShaderStages> programs;
    std::array<const Shader::Info*, MaxShaderStages> infos{};
    for (u32 i = 0; i < MaxShaderStages; i++) {
        if (!graphics_key.stage_hashes[i]) {
            stages[i] = VK_NULL_HANDLE;
            continue;
        }
        auto* pgm = regs.ProgramForStage(i);
        const u32* code = pgm->Address<u32>();
        Shader::BinaryInfo bininfo;
        std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
        const u32 num_dwords = bininfo.length / sizeof(u32);
        // Lookup if the shader already exists.
        const auto it = module_map.find(bininfo.shader_hash);
        if (it != module_map.end()) {
-            return *it->second;
+            stages[i] = *it->second;
            continue;
        }
        // Compile and cache shader.
        const auto data = std::span{token, bininfo.length / sizeof(u32)};
        block_pool.ReleaseContents();
        inst_pool.ReleaseContents();
        const auto info = MakeShaderInfo(stage, pgm.user_data, liverpool->regs);
        auto program = Shader::TranslateProgram(inst_pool, block_pool, data, std::move(info));
        const auto code = Shader::Backend::SPIRV::EmitSPIRV(Shader::Profile{}, program);
-        static int counter = 0;
+        // Recompile shader to IR.
-        std::ofstream file(fmt::format("shader{}.spv", counter++), std::ios::out | std::ios::binary);
+        const auto stage = Shader::Stage{i};
-        file.write((const char*)code.data(), code.size() * sizeof(u32));
+        const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
-        file.close();
+        programs[i] = Shader::TranslateProgram(inst_pool, block_pool, std::span{code, num_dwords},
                                               std::move(info));
-        return CompileSPV(code, instance.GetDevice());
+        // Compile IR to SPIR-V
-    };
+        const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(Shader::Profile{}, programs[i]);
        stages[i] = CompileSPV(spv_code, instance.GetDevice());
        infos[i] = &programs[i].info;
    }
-    // Retrieve shader stage modules.
+    return std::make_unique<GraphicsPipeline>(instance, scheduler, graphics_key, *pipeline_cache,
-    // TODO: Only do this when program address is changed.
+                                              infos, stages);
    stages[0] = get_program(liverpool->regs.vs_program, Shader::Stage::Vertex);
    stages[4] = get_program(liverpool->regs.ps_program, Shader::Stage::Fragment);
    // Bind pipeline.
    // TODO: Read entire key based on reg state.
    graphics_key.prim_type = liverpool->regs.primitive_type;
    graphics_key.polygon_mode = liverpool->regs.polygon_control.PolyMode();
    pipeline = std::make_unique<GraphicsPipeline>(instance, graphics_key, *pipeline_cache,
                                                  *pipeline_layout, stages);
 }
 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@ -8,6 +8,10 @@
 #include "shader_recompiler/object_pool.h"
 #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
 namespace Shader {
 struct Info;
 }
 namespace Vulkan {
 class Instance;
@ -21,7 +25,12 @@ public:
                           AmdGpu::Liverpool* liverpool);
    ~PipelineCache() = default;
-    void BindPipeline();
+    const GraphicsPipeline* GetPipeline();
 private:
    void RefreshKey();
    std::unique_ptr<GraphicsPipeline> CreatePipeline();
 private:
    const Instance& instance;
@ -31,7 +40,7 @@ private:
    vk::UniquePipelineLayout pipeline_layout;
    tsl::robin_map<size_t, vk::UniqueShaderModule> module_map;
    std::array<vk::ShaderModule, MaxShaderStages> stages{};
-    std::unique_ptr<GraphicsPipeline> pipeline;
+    tsl::robin_map<PipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_pipelines;
    PipelineKey graphics_key{};
    Shader::ObjectPool<Shader::IR::Inst> inst_pool;
    Shader::ObjectPool<Shader::IR::Block> block_pool;
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@ -2,6 +2,7 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 #include "common/config.h"
 #include "core/memory.h"
 #include "video_core/amdgpu/liverpool.h"
 #include "video_core/renderer_vulkan/vk_instance.h"
 #include "video_core/renderer_vulkan/vk_rasterizer.h"
@ -18,24 +19,25 @@ static constexpr vk::BufferUsageFlags VertexIndexFlags = vk::BufferUsageFlagBits
 Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
                       VideoCore::TextureCache& texture_cache_, AmdGpu::Liverpool* liverpool_)
    : instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
-      liverpool{liverpool_}, pipeline_cache{instance, scheduler, liverpool},
+      liverpool{liverpool_}, memory{Core::Memory::Instance()},
      pipeline_cache{instance, scheduler, liverpool},
      vertex_index_buffer{instance, scheduler, VertexIndexFlags, 64_MB} {
    if (!Config::nullGpu()) {
        liverpool->BindRasterizer(this);
    }
    memory->SetInstance(&instance);
 }
 Rasterizer::~Rasterizer() = default;
-void Rasterizer::DrawIndex() {
+void Rasterizer::Draw(bool is_indexed) {
    const auto cmdbuf = scheduler.CommandBuffer();
-    auto& regs = liverpool->regs;
+    const auto& regs = liverpool->regs;
-
+    const u32 num_indices = SetupIndexBuffer(is_indexed);
-    UpdateDynamicState();
+    const auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0]);
-
+    const GraphicsPipeline* pipeline = pipeline_cache.GetPipeline();
-    pipeline_cache.BindPipeline();
+    pipeline->BindResources(memory);
    auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0]);
    const vk::RenderingAttachmentInfo color_info = {
        .imageView = *image_view.image_view,
@ -52,13 +54,50 @@ void Rasterizer::DrawIndex() {
        .pColorAttachments = &color_info,
    };
    UpdateDynamicState();
    cmdbuf.beginRendering(rendering_info);
-    cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), 0, vk::IndexType::eUint32);
+    cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
-    cmdbuf.bindVertexBuffers(0, vertex_index_buffer.Handle(), vk::DeviceSize(0));
+    if (is_indexed) {
-    cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), 0, 0);
+        cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
    } else {
        cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), 0, 0);
    }
    cmdbuf.endRendering();
 }
 u32 Rasterizer::SetupIndexBuffer(bool& is_indexed) {
    // Emulate QuadList primitive type with CPU made index buffer.
    const auto& regs = liverpool->regs;
    if (liverpool->regs.primitive_type == Liverpool::PrimitiveType::QuadList) {
        ASSERT_MSG(!is_indexed, "Using QuadList primitive with indexed draw");
        is_indexed = true;
        // Emit indices.
        const u32 index_size = 3 * regs.num_indices;
        const auto [data, offset, _] = vertex_index_buffer.Map(index_size);
        LiverpoolToVK::EmitQuadToTriangleListIndices(data, regs.num_indices);
        vertex_index_buffer.Commit(index_size);
        // Bind index buffer.
        const auto cmdbuf = scheduler.CommandBuffer();
        cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), offset, vk::IndexType::eUint16);
        return index_size / sizeof(u16);
    }
    if (!is_indexed) {
        return 0;
    }
    const VAddr index_address = regs.index_base_address.Address();
    const auto [buffer, offset] = memory->GetVulkanBuffer(index_address);
    const vk::IndexType index_type =
        regs.index_buffer_type.index_type == Liverpool::IndexType::Index16 ? vk::IndexType::eUint16
                                                                           : vk::IndexType::eUint32;
    const auto cmdbuf = scheduler.CommandBuffer();
    cmdbuf.bindIndexBuffer(buffer, offset, index_type);
    return regs.num_indices;
 }
 void Rasterizer::UpdateDynamicState() {
    UpdateViewportScissorState();
 }
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@ -3,7 +3,6 @@
 #pragma once
 #include <memory>
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 #include "video_core/renderer_vulkan/vk_stream_buffer.h"
@ -11,6 +10,10 @@ namespace AmdGpu {
 struct Liverpool;
 }
 namespace Core {
 class MemoryManager;
 }
 namespace VideoCore {
 class TextureCache;
 }
@ -26,20 +29,14 @@ public:
                        VideoCore::TextureCache& texture_cache, AmdGpu::Liverpool* liverpool);
    ~Rasterizer();
-    /// Performs a draw call with an index buffer.
+    void Draw(bool is_indexed);
    void DrawIndex();
    /// Performs a draw call without an index buffer.
    void DrawAuto();
    /// Updates graphics state that is not part of the bound pipeline.
    void UpdateDynamicState();
 private:
-    /// Updates viewport and scissor from liverpool registers.
+    u32 SetupIndexBuffer(bool& is_indexed);
-    void UpdateViewportScissorState();
+    void MapMemory(VAddr addr, size_t size);
-    /// Updates depth and stencil pipeline state from liverpool registers.
+    void UpdateDynamicState();
    void UpdateViewportScissorState();
    void UpdateDepthStencilState();
 private:
@ -47,6 +44,7 @@ private:
    Scheduler& scheduler;
    VideoCore::TextureCache& texture_cache;
    AmdGpu::Liverpool* liverpool;
    Core::MemoryManager* memory;
    PipelineCache pipeline_cache;
    StreamBuffer vertex_index_buffer;
 };
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@ -35,7 +35,7 @@ public:
     * @param size Size to reserve.
     * @returns A pair of a raw memory pointer (with offset added), and the buffer offset
     */
-    std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment);
+    std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment = 0);
    /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
    void Commit(u64 size);
--- a/src/video_core/texture_cache/image.cpp
+++ b/src/video_core/texture_cache/image.cpp
@ -67,12 +67,8 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noe
 }
 ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept {
    // There is a small difference between T# and CB number types, account for it.
    const auto number_fmt =
        buffer.info.number_type == AmdGpu::NumberFormat::Uscaled ? AmdGpu::NumberFormat::Srgb
                                                                 : buffer.info.number_type;
    is_tiled = true;
-    pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, number_fmt);
+    pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat());
    type = vk::ImageType::e2D;
    size.width = buffer.Pitch();
    size.height = buffer.Height();
--- a/src/video_core/texture_cache/texture_cache.cpp
+++ b/src/video_core/texture_cache/texture_cache.cpp
@ -147,7 +147,8 @@ ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buff
        return slot_image_views[view_id];
    }
-    const ImageViewId view_id = slot_image_views.insert(instance, scheduler, view_info, image.image);
+    const ImageViewId view_id =
        slot_image_views.insert(instance, scheduler, view_info, image.image);
    image.image_view_infos.emplace_back(view_info);
    image.image_view_ids.emplace_back(view_id);
    return slot_image_views[view_id];