From f480d091ce7b3569e1b63e36de947a792f760603 Mon Sep 17 00:00:00 2001
From: raphaelthegreat <47210458+raphaelthegreat@users.noreply.github.com>
Date: Fri, 24 May 2024 23:50:56 +0300
Subject: [PATCH] video_core: Add basic vertex, index buffer handling and
 pipeline caching

---
 .gitmodules                                   |   2 +-
 externals/CMakeLists.txt                      |   6 +-
 src/core/memory.cpp                           |  93 ++++++++++++++
 src/core/memory.h                             |  28 +++-
 .../backend/spirv/spirv_emit_context.cpp      |   6 +-
 .../frontend/fetch_shader.cpp                 |  12 +-
 src/shader_recompiler/frontend/fetch_shader.h |  10 +-
 .../frontend/translate/translate.cpp          |   9 +-
 src/shader_recompiler/ir/program.h            |   2 -
 src/shader_recompiler/recompiler.cpp          |   3 +-
 src/shader_recompiler/recompiler.h            |   5 +-
 src/shader_recompiler/runtime_info.h          |  18 ++-
 src/video_core/amdgpu/liverpool.cpp           |   6 +-
 src/video_core/amdgpu/liverpool.h             |  90 ++++++++-----
 src/video_core/amdgpu/pixel_format.h          |   1 -
 .../renderer_vulkan/liverpool_to_vk.cpp       |  32 ++++-
 .../renderer_vulkan/liverpool_to_vk.h         |   5 +
 .../renderer_vulkan/vk_graphics_pipeline.cpp  |  77 +++++++++--
 .../renderer_vulkan/vk_graphics_pipeline.h    |  35 ++++-
 .../renderer_vulkan/vk_instance.cpp           |  10 +-
 .../renderer_vulkan/vk_pipeline_cache.cpp     | 121 +++++++++++-------
 .../renderer_vulkan/vk_pipeline_cache.h       |  13 +-
 .../renderer_vulkan/vk_rasterizer.cpp         |  63 +++++++--
 .../renderer_vulkan/vk_rasterizer.h           |  22 ++--
 .../renderer_vulkan/vk_stream_buffer.h        |   2 +-
 src/video_core/texture_cache/image.cpp        |   6 +-
 .../texture_cache/texture_cache.cpp           |   3 +-
 27 files changed, 506 insertions(+), 174 deletions(-)

diff --git a/.gitmodules b/.gitmodules
index 76ca5dcae..8c54fe861 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -50,7 +50,7 @@
 [submodule "externals/toml11"]
 	path = externals/toml11
 	url = https://github.com/ToruNiina/toml11.git
-[submodule "externals/xxHash"]
+[submodule "externals/xxhash"]
 	path = externals/xxHash
 	url = https://github.com/Cyan4973/xxHash.git
 [submodule "externals/zydis"]
diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt
index 24bca2f1c..b2d348b7f 100644
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -74,8 +74,8 @@ add_subdirectory(magic_enum EXCLUDE_FROM_ALL)
 add_subdirectory(toml11 EXCLUDE_FROM_ALL)
 
 # xxHash
-add_library(xxhash INTERFACE)
-target_include_directories(xxhash INTERFACE xxhash)
+add_library(xxhash xxhash/xxhash.h xxhash/xxhash.c)
+target_include_directories(xxhash PUBLIC xxhash)
 
 # Zydis
 option(ZYDIS_BUILD_TOOLS "" OFF)
@@ -92,4 +92,4 @@ endif()
 add_subdirectory(sirit EXCLUDE_FROM_ALL)
 if (WIN32)
     target_compile_options(sirit PUBLIC "-Wno-error=unused-command-line-argument")
-endif()
\ No newline at end of file
+endif()
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index aa5c66761..a16abbe01 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -7,6 +7,7 @@
 #include "common/scope_exit.h"
 #include "core/libraries/error_codes.h"
 #include "core/memory.h"
+#include "video_core/renderer_vulkan/vk_instance.h"
 
 namespace Core {
 
@@ -61,6 +62,10 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M
         new_vma.prot = prot;
         new_vma.name = name;
         new_vma.type = type;
+
+        if (type == VMAType::Direct) {
+            MapVulkanMemory(mapped_addr, size);
+        }
     };
 
     // When virtual addr is zero let the address space manager pick the address.
@@ -103,6 +108,10 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) {
     ASSERT_MSG(it != vma_map.end() && it->first == virtual_addr,
                "Attempting to unmap partially mapped range");
 
+    if (it->second.type == VMAType::Direct) {
+        UnmapVulkanMemory(virtual_addr, size);
+    }
+
     // Mark region as free and attempt to coalesce it with neighbours.
     auto& vma = it->second;
     vma.type = VMAType::Free;
@@ -114,6 +123,13 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) {
     impl.Unmap(virtual_addr, size);
 }
 
+std::pair<vk::Buffer, size_t> MemoryManager::GetVulkanBuffer(VAddr addr) {
+    auto it = mapped_memories.upper_bound(addr);
+    it = std::prev(it);
+    ASSERT(it != mapped_memories.end() && it->first <= addr);
+    return std::make_pair(*it->second.buffer, addr - it->first);
+}
+
 VirtualMemoryArea& MemoryManager::AddMapping(VAddr virtual_addr, size_t size) {
     auto vma_handle = FindVMA(virtual_addr);
     ASSERT_MSG(vma_handle != vma_map.end(), "Virtual address not in vm_map");
@@ -171,4 +187,81 @@ MemoryManager::VMAHandle MemoryManager::MergeAdjacent(VMAHandle iter) {
     return iter;
 }
 
+void MemoryManager::MapVulkanMemory(VAddr addr, size_t size) {
+    const vk::Device device = instance->GetDevice();
+    const auto memory_props = instance->GetPhysicalDevice().getMemoryProperties();
+    void* host_pointer = reinterpret_cast<void*>(addr);
+    const auto host_mem_props = device.getMemoryHostPointerPropertiesEXT(
+        vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT, host_pointer);
+    ASSERT(host_mem_props.memoryTypeBits != 0);
+
+    int mapped_memory_type = -1;
+    auto find_mem_type_with_flag = [&](const vk::MemoryPropertyFlags flags) {
+        u32 host_mem_types = host_mem_props.memoryTypeBits;
+        while (host_mem_types != 0) {
+            // Try to find a cached memory type
+            mapped_memory_type = std::countr_zero(host_mem_types);
+            host_mem_types -= (1 << mapped_memory_type);
+
+            if ((memory_props.memoryTypes[mapped_memory_type].propertyFlags & flags) == flags) {
+                return;
+            }
+        }
+
+        mapped_memory_type = -1;
+    };
+
+    // First try to find a memory that is both coherent and cached
+    find_mem_type_with_flag(vk::MemoryPropertyFlagBits::eHostCoherent |
+                            vk::MemoryPropertyFlagBits::eHostCached);
+    if (mapped_memory_type == -1)
+        // Then only coherent (lower performance)
+        find_mem_type_with_flag(vk::MemoryPropertyFlagBits::eHostCoherent);
+
+    if (mapped_memory_type == -1) {
+        LOG_CRITICAL(Render_Vulkan, "No coherent memory available for memory mapping");
+        mapped_memory_type = std::countr_zero(host_mem_props.memoryTypeBits);
+    }
+
+    const vk::StructureChain alloc_info = {
+        vk::MemoryAllocateInfo{
+            .allocationSize = size,
+            .memoryTypeIndex = static_cast<uint32_t>(mapped_memory_type),
+        },
+        vk::ImportMemoryHostPointerInfoEXT{
+            .handleType = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT,
+            .pHostPointer = host_pointer,
+        },
+    };
+
+    const auto [it, new_memory] = mapped_memories.try_emplace(addr);
+    ASSERT_MSG(new_memory, "Attempting to remap already mapped vulkan memory");
+
+    auto& memory = it->second;
+    memory.backing = device.allocateMemoryUnique(alloc_info.get());
+
+    constexpr vk::BufferUsageFlags MapFlags =
+        vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eVertexBuffer |
+        vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst |
+        vk::BufferUsageFlagBits::eUniformBuffer;
+
+    const vk::StructureChain buffer_info = {
+        vk::BufferCreateInfo{
+            .size = size,
+            .usage = MapFlags,
+            .sharingMode = vk::SharingMode::eExclusive,
+        },
+        vk::ExternalMemoryBufferCreateInfoKHR{
+            .handleTypes = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT,
+        }};
+    memory.buffer = device.createBufferUnique(buffer_info.get());
+    device.bindBufferMemory(*memory.buffer, *memory.backing, 0);
+}
+
+void MemoryManager::UnmapVulkanMemory(VAddr addr, size_t size) {
+    const auto it = mapped_memories.find(addr);
+    ASSERT(it != mapped_memories.end() && it->second.buffer_size == size);
+    mapped_memories.erase(it);
+}
+
 } // namespace Core
diff --git a/src/core/memory.h b/src/core/memory.h
index a86930c81..4c0fadbfd 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -3,6 +3,7 @@
 
 #pragma once
 
+#include <functional>
 #include <string_view>
 #include <vector>
 #include <boost/icl/split_interval_map.hpp>
@@ -10,6 +11,11 @@
 #include "common/singleton.h"
 #include "common/types.h"
 #include "core/address_space.h"
+#include "video_core/renderer_vulkan/vk_common.h"
+
+namespace Vulkan {
+class Instance;
+}
 
 namespace Core {
 
@@ -86,6 +92,10 @@ public:
     explicit MemoryManager();
     ~MemoryManager();
 
+    void SetInstance(const Vulkan::Instance* instance_) {
+        instance = instance_;
+    }
+
     PAddr Allocate(PAddr search_start, PAddr search_end, size_t size, u64 alignment,
                    int memory_type);
 
@@ -97,11 +107,9 @@ public:
 
     void UnmapMemory(VAddr virtual_addr, size_t size);
 
-private:
-    bool HasOverlap(VAddr addr, size_t size) const {
-        return vma_map.find(addr) != vma_map.end();
-    }
+    std::pair<vk::Buffer, size_t> GetVulkanBuffer(VAddr addr);
 
+private:
     VMAHandle FindVMA(VAddr target) {
         // Return first the VMA with base >= target.
         const auto it = vma_map.lower_bound(target);
@@ -117,10 +125,22 @@ private:
 
     VMAHandle MergeAdjacent(VMAHandle iter);
 
+    void MapVulkanMemory(VAddr addr, size_t size);
+
+    void UnmapVulkanMemory(VAddr addr, size_t size);
+
 private:
     AddressSpace impl;
     std::vector<DirectMemoryArea> allocations;
     VMAMap vma_map;
+
+    struct MappedMemory {
+        vk::UniqueBuffer buffer;
+        vk::UniqueDeviceMemory backing;
+        size_t buffer_size;
+    };
+    std::map<VAddr, MappedMemory> mapped_memories;
+    const Vulkan::Instance* instance{};
 };
 
 using Memory = Common::Singleton<MemoryManager>;
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index 376175dc9..771e46d40 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -175,12 +175,14 @@ void EmitContext::DefineInputs(const IR::Program& program) {
             const Id id{DefineInput(type, input.binding)};
             Name(id, fmt::format("vs_in_attr{}", input.binding));
             input_params[input.binding] = GetAttributeInfo(input.fmt, id);
+            interfaces.push_back(id);
         }
         break;
     case Stage::Fragment:
         for (const auto& input : info.ps_inputs) {
             if (input.is_default) {
-                input_params[input.semantic] = {MakeDefaultValue(*this, input.default_value), input_f32, F32[1]};
+                input_params[input.semantic] = {MakeDefaultValue(*this, input.default_value),
+                                                input_f32, F32[1]};
                 continue;
             }
             const IR::Attribute param{IR::Attribute::Param0 + input.param_index};
@@ -192,6 +194,7 @@ void EmitContext::DefineInputs(const IR::Program& program) {
             }
             Name(id, fmt::format("fs_in_attr{}", input.semantic));
             input_params[input.semantic] = {id, input_f32, F32[1], num_components};
+            interfaces.push_back(id);
         }
     default:
         break;
@@ -212,6 +215,7 @@ void EmitContext::DefineOutputs(const IR::Program& program) {
             const Id id{DefineOutput(F32[num_components], i)};
             Name(id, fmt::format("out_attr{}", i));
             output_params[i] = {id, output_f32, F32[1], num_components};
+            interfaces.push_back(id);
         }
         break;
     case Stage::Fragment:
diff --git a/src/shader_recompiler/frontend/fetch_shader.cpp b/src/shader_recompiler/frontend/fetch_shader.cpp
index b17fbc522..7f4f50e94 100644
--- a/src/shader_recompiler/frontend/fetch_shader.cpp
+++ b/src/shader_recompiler/frontend/fetch_shader.cpp
@@ -40,7 +40,7 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
     struct VsharpLoad {
         u32 dword_offset{};
         s32 base_sgpr{};
-        s32 dst_sgpr{-1};
+        s32 dst_reg{-1};
     };
     boost::container::static_vector<VsharpLoad, 16> loads;
 
@@ -57,11 +57,13 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
         }
 
         if (inst.inst_class == InstClass::VectorMemBufFmt) {
+            // SRSRC is in units of 4 SPGRs while SBASE is in pairs of SGPRs
+            const u32 base_sgpr = inst.src[2].code * 4;
+
             // Find the load instruction that loaded the V# to the SPGR.
             // This is so we can determine its index in the vertex table.
-            const auto it = std::ranges::find_if(loads, [&](VsharpLoad& load) {
-                return load.dst_sgpr == inst.src[2].code * 4;
-            });
+            const auto it = std::ranges::find_if(
+                loads, [&](VsharpLoad& load) { return load.dst_reg == base_sgpr; });
 
             auto& attrib = attributes.emplace_back();
             attrib.semantic = semantic_index++;
@@ -71,7 +73,7 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
             attrib.dword_offset = it->dword_offset;
 
             // Mark load as used.
-            it->dst_sgpr = -1;
+            it->dst_reg = -1;
         }
     }
 
diff --git a/src/shader_recompiler/frontend/fetch_shader.h b/src/shader_recompiler/frontend/fetch_shader.h
index 627e19aa0..2f8eae12c 100644
--- a/src/shader_recompiler/frontend/fetch_shader.h
+++ b/src/shader_recompiler/frontend/fetch_shader.h
@@ -9,11 +9,11 @@
 namespace Shader::Gcn {
 
 struct VertexAttribute {
-    u8 semantic;        ///< Semantic index of the attribute
-    u8 dest_vgpr;       ///< Destination VGPR to load first component
-    u8 num_elements;    ///< Number of components to load
-    u8 sgpr_base;       ///< SGPR that contains the pointer to the list of vertex V#
-    u8 dword_offset;    ///< The dword offset of the V# that describes this attribute.
+    u8 semantic;     ///< Semantic index of the attribute
+    u8 dest_vgpr;    ///< Destination VGPR to load first component.
+    u8 num_elements; ///< Number of components to load
+    u8 sgpr_base;    ///< SGPR that contains the pointer to the list of vertex V#
+    u8 dword_offset; ///< The dword offset of the V# that describes this attribute.
 };
 
 std::vector<VertexAttribute> ParseFetchShader(const u32* code);
diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp
index 6dc85d168..06faf28d6 100644
--- a/src/shader_recompiler/frontend/translate/translate.cpp
+++ b/src/shader_recompiler/frontend/translate/translate.cpp
@@ -2,8 +2,8 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #include "shader_recompiler/exception.h"
-#include "shader_recompiler/frontend/translate/translate.h"
 #include "shader_recompiler/frontend/fetch_shader.h"
+#include "shader_recompiler/frontend/translate/translate.h"
 #include "shader_recompiler/runtime_info.h"
 #include "video_core/amdgpu/resource.h"
 
@@ -103,20 +103,21 @@ void Translator::EmitFetch(const GcnInst& inst) {
     // Parse the assembly to generate a list of attributes.
     const auto attribs = ParseFetchShader(code);
     for (const auto& attrib : attribs) {
-        IR::VectorReg dst_reg{attrib.dest_vgpr};
         const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic};
+        IR::VectorReg dst_reg{attrib.dest_vgpr};
         for (u32 i = 0; i < attrib.num_elements; i++) {
             ir.SetVectorReg(dst_reg++, ir.GetAttribute(attr, i));
         }
 
         // Read the V# of the attribute to figure out component number and type.
-        const auto buffer = info.ReadUd<AmdGpu::Buffer>(attrib.sgpr_base,
-                                                        attrib.dword_offset);
+        const auto buffer = info.ReadUd<AmdGpu::Buffer>(attrib.sgpr_base, attrib.dword_offset);
         const u32 num_components = AmdGpu::NumComponents(buffer.data_format);
         info.vs_inputs.push_back({
             .fmt = buffer.num_format,
             .binding = attrib.semantic,
             .num_components = std::min<u16>(attrib.num_elements, num_components),
+            .sgpr_base = attrib.sgpr_base,
+            .dword_offset = attrib.dword_offset,
         });
     }
 }
diff --git a/src/shader_recompiler/ir/program.h b/src/shader_recompiler/ir/program.h
index 27e33b119..eff933f28 100644
--- a/src/shader_recompiler/ir/program.h
+++ b/src/shader_recompiler/ir/program.h
@@ -12,8 +12,6 @@
 namespace Shader::IR {
 
 struct Program {
-    explicit Program(const Info&& info_) : info{info_} {}
-
     AbstractSyntaxList syntax_list;
     BlockList blocks;
     BlockList post_order_blocks;
diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp
index 86173b5b8..66d19620f 100644
--- a/src/shader_recompiler/recompiler.cpp
+++ b/src/shader_recompiler/recompiler.cpp
@@ -44,7 +44,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
     file.close();
 
     // Decode and save instructions
-    IR::Program program{std::move(info)};
+    IR::Program program;
     program.ins_list.reserve(token.size());
     while (!slice.atEnd()) {
         program.ins_list.emplace_back(decoder.decodeInstruction(slice));
@@ -55,6 +55,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
     Gcn::CFG cfg{gcn_block_pool, program.ins_list};
 
     // Structurize control flow graph and create program.
+    program.info = std::move(info);
     program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, program.info);
     program.blocks = GenerateBlocks(program.syntax_list);
     program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());
diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h
index c3a5c7023..da6cdfaad 100644
--- a/src/shader_recompiler/recompiler.h
+++ b/src/shader_recompiler/recompiler.h
@@ -4,8 +4,8 @@
 #pragma once
 
 #include "shader_recompiler/ir/basic_block.h"
-#include "shader_recompiler/object_pool.h"
 #include "shader_recompiler/ir/program.h"
+#include "shader_recompiler/object_pool.h"
 
 namespace Shader {
 
@@ -30,7 +30,6 @@ struct BinaryInfo {
 
 [[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
                                            ObjectPool<IR::Block>& block_pool,
-                                           std::span<const u32> code,
-                                           const Info&& info);
+                                           std::span<const u32> code, const Info&& info);
 
 } // namespace Shader
diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h
index 959768af2..40c9c6b0f 100644
--- a/src/shader_recompiler/runtime_info.h
+++ b/src/shader_recompiler/runtime_info.h
@@ -40,12 +40,12 @@ enum class TextureType : u32 {
 constexpr u32 NUM_TEXTURE_TYPES = 7;
 
 struct Info {
-    explicit Info(std::span<const u32, 16> user_data_) : user_data{user_data_} {}
-
     struct VsInput {
         AmdGpu::NumberFormat fmt;
         u16 binding;
         u16 num_components;
+        u8 sgpr_base;
+        u8 dword_offset;
     };
     boost::container::static_vector<VsInput, 32> vs_inputs{};
 
@@ -60,29 +60,33 @@ struct Info {
 
     struct AttributeFlags {
         bool Get(IR::Attribute attrib, u32 comp = 0) const {
-            return flags[static_cast<size_t>(attrib)] & (1 << comp);
+            return flags[Index(attrib)] & (1 << comp);
         }
 
         bool GetAny(IR::Attribute attrib) const {
-            return flags[static_cast<size_t>(attrib)];
+            return flags[Index(attrib)];
         }
 
         void Set(IR::Attribute attrib, u32 comp = 0) {
-            flags[static_cast<size_t>(attrib)] |= (1 << comp);
+            flags[Index(attrib)] |= (1 << comp);
         }
 
         u32 NumComponents(IR::Attribute attrib) const {
-            const u8 mask = flags[static_cast<size_t>(attrib)];
+            const u8 mask = flags[Index(attrib)];
             ASSERT(mask != 0b1011 || mask != 0b1101);
             return std::popcount(mask);
         }
 
+        static size_t Index(IR::Attribute attrib) {
+            return static_cast<size_t>(attrib);
+        }
+
         std::array<u8, IR::NumAttributes> flags;
     };
     AttributeFlags loads{};
     AttributeFlags stores{};
 
-    std::span<const u32, 16> user_data;
+    std::span<const u32> user_data;
     Stage stage;
 
     template <typename T>
diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp
index b1563a34a..d43f749b9 100644
--- a/src/video_core/amdgpu/liverpool.cpp
+++ b/src/video_core/amdgpu/liverpool.cpp
@@ -114,7 +114,7 @@ void Liverpool::ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes) {
                 regs.num_indices = draw_index->index_count;
                 regs.draw_initiator = draw_index->draw_initiator;
                 if (rasterizer) {
-                    rasterizer->DrawIndex();
+                    rasterizer->Draw(true);
                 }
                 break;
             }
@@ -122,7 +122,9 @@ void Liverpool::ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes) {
                 const auto* draw_index = reinterpret_cast<const PM4CmdDrawIndexAuto*>(header);
                 regs.num_indices = draw_index->index_count;
                 regs.draw_initiator = draw_index->draw_initiator;
-                rasterizer->DrawIndex();
+                if (rasterizer) {
+                    rasterizer->Draw(false);
+                }
                 break;
             }
             case PM4ItOpcode::DispatchDirect: {
diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h
index a4e9df376..83fd2494a 100644
--- a/src/video_core/amdgpu/liverpool.h
+++ b/src/video_core/amdgpu/liverpool.h
@@ -180,25 +180,6 @@ struct Liverpool {
         BitField<31, 1, u32> disable_color_writes_on_depth_pass;
     };
 
-    union DepthSize {
-        u32 raw;
-        BitField<0, 11, u32> pitch_tile_max;
-        BitField<11, 11, u32> height_tile_max;
-
-        u32 Pitch() const {
-            return (pitch_tile_max + 1) << 3;
-        }
-
-        u32 Height() const {
-            return (height_tile_max + 1) << 3;
-        }
-    };
-
-    union DepthSlice {
-        u32 raw;
-        BitField<0, 22, u32> slice_tile_max;
-    };
-
     enum class StencilFunc : u32 {
         Keep = 0,
         Zero = 1,
@@ -236,9 +217,45 @@ struct Liverpool {
         BitField<24, 8, u32> stencil_op_val;
     };
 
-    union StencilInfo {
-        u32 raw;
-        BitField<0, 1, u32> format;
+    struct DepthBuffer {
+        enum class ZFormat : u32 {
+            Invald = 0,
+            Z16 = 1,
+            Z32Float = 2,
+        };
+
+        enum class StencilFormat : u32 {
+            Invalid = 0,
+            Stencil8 = 1,
+        };
+
+        union {
+            BitField<0, 2, ZFormat> format;
+            BitField<2, 2, u32> num_samples;
+            BitField<13, 3, u32> tile_split;
+        } z_info;
+        union {
+            BitField<0, 1, StencilFormat> format;
+        } stencil_info;
+        u32 z_read_base;
+        u32 stencil_read_base;
+        u32 z_write_base;
+        u32 stencil_write_base;
+        union {
+            BitField<0, 11, u32> pitch_tile_max;
+            BitField<11, 11, u32> height_tile_max;
+        } depth_size;
+        union {
+            BitField<0, 22, u32> tile_max;
+        } depth_slice;
+
+        u32 Pitch() const {
+            return (depth_size.pitch_tile_max + 1) << 3;
+        }
+
+        u32 Height() const {
+            return (depth_size.height_tile_max + 1) << 3;
+        }
     };
 
     enum class ClipSpace : u32 {
@@ -505,6 +522,12 @@ struct Liverpool {
         u64 CmaskAddress() const {
             return u64(cmask_base_address) << 8;
         }
+
+        NumberFormat NumFormat() const {
+            // There is a small difference between T# and CB number types, account for it.
+            return info.number_type == AmdGpu::NumberFormat::Uscaled ? AmdGpu::NumberFormat::Srgb
+                                                                     : info.number_type;
+        }
     };
 
     enum class PrimitiveType : u32 {
@@ -539,14 +562,8 @@ struct Liverpool {
             u32 stencil_clear;
             u32 depth_clear;
             Scissor screen_scissor;
-            INSERT_PADDING_WORDS(0xA011 - 0xA00C - 2);
-            StencilInfo stencil_info;
-            u32 z_read_base;
-            u32 stencil_read_base;
-            u32 z_write_base;
-            u32 stencil_write_base;
-            DepthSize depth_size;
-            DepthSlice depth_slice;
+            INSERT_PADDING_WORDS(0xA010 - 0xA00C - 2);
+            DepthBuffer depth_buffer;
             INSERT_PADDING_WORDS(0xA08E - 0xA018);
             ColorBufferMask color_target_mask;
             ColorBufferMask color_shader_mask;
@@ -595,6 +612,17 @@ struct Liverpool {
             VgtNumInstances num_instances;
         };
         std::array<u32, NumRegs> reg_array{};
+
+        const ShaderProgram* ProgramForStage(u32 index) const {
+            switch (index) {
+            case 0:
+                return &vs_program;
+            case 4:
+                return &ps_program;
+            default:
+                return nullptr;
+            }
+        }
     };
 
     Regs regs{};
@@ -635,7 +663,7 @@ static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
 static_assert(GFX6_3D_REG_INDEX(vs_program) == 0x2C48);
 static_assert(GFX6_3D_REG_INDEX(vs_program.user_data) == 0x2C4C);
 static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C);
-static_assert(GFX6_3D_REG_INDEX(depth_slice) == 0xA017);
+static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017);
 static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E);
 static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F);
 static_assert(GFX6_3D_REG_INDEX(viewport_scissors) == 0xA094);
diff --git a/src/video_core/amdgpu/pixel_format.h b/src/video_core/amdgpu/pixel_format.h
index fb0c27517..7555cdb33 100644
--- a/src/video_core/amdgpu/pixel_format.h
+++ b/src/video_core/amdgpu/pixel_format.h
@@ -76,4 +76,3 @@ struct fmt::formatter<AmdGpu::NumberFormat> {
         return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(fmt));
     }
 };
-
diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp
index 00b28de9e..906b937e1 100644
--- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp
@@ -1,6 +1,6 @@
 // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
-#pragma clang optimize off
+
 #include "common/assert.h"
 #include "video_core/renderer_vulkan/liverpool_to_vk.h"
 
@@ -114,19 +114,41 @@ vk::CullModeFlags CullMode(Liverpool::CullMode mode) {
 }
 
 vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) {
-    if (data_format == AmdGpu::DataFormat::Format32_32_32_32 && num_format == AmdGpu::NumberFormat::Float) {
+    if (data_format == AmdGpu::DataFormat::Format32_32_32_32 &&
+        num_format == AmdGpu::NumberFormat::Float) {
         return vk::Format::eR32G32B32A32Sfloat;
     }
-    if (data_format == AmdGpu::DataFormat::Format32_32_32 && num_format == AmdGpu::NumberFormat::Uint) {
+    if (data_format == AmdGpu::DataFormat::Format32_32_32 &&
+        num_format == AmdGpu::NumberFormat::Uint) {
         return vk::Format::eR32G32B32Uint;
     }
-    if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && num_format == AmdGpu::NumberFormat::Unorm) {
+    if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
+        num_format == AmdGpu::NumberFormat::Unorm) {
         return vk::Format::eR8G8B8A8Unorm;
     }
-    if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && num_format == AmdGpu::NumberFormat::Srgb) {
+    if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
+        num_format == AmdGpu::NumberFormat::Srgb) {
         return vk::Format::eR8G8B8A8Srgb;
     }
     UNREACHABLE();
 }
 
+vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format,
+                       Liverpool::DepthBuffer::StencilFormat stencil_format) {
+    UNREACHABLE();
+}
+
+void EmitQuadToTriangleListIndices(u8* out_ptr, u32 num_vertices) {
+    static constexpr u16 NumVerticesPerQuad = 4;
+    u16* out_data = reinterpret_cast<u16*>(out_ptr);
+    for (u16 i = 0; i < num_vertices; i += NumVerticesPerQuad) {
+        *out_data++ = i;
+        *out_data++ = i + 1;
+        *out_data++ = i + 2;
+        *out_data++ = i + 2;
+        *out_data++ = i;
+        *out_data++ = i + 3;
+    }
+}
+
 } // namespace Vulkan::LiverpoolToVK
diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.h b/src/video_core/renderer_vulkan/liverpool_to_vk.h
index c04b1cb95..38f021fdb 100644
--- a/src/video_core/renderer_vulkan/liverpool_to_vk.h
+++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h
@@ -23,4 +23,9 @@ vk::CullModeFlags CullMode(Liverpool::CullMode mode);
 
 vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format);
 
+vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format,
+                       Liverpool::DepthBuffer::StencilFormat stencil_format);
+
+void EmitQuadToTriangleListIndices(u8* out_indices, u32 num_vertices);
+
 } // namespace Vulkan::LiverpoolToVK
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 4ba2b61bd..3db09efef 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -4,22 +4,58 @@
 #include <boost/container/static_vector.hpp>
 
 #include "common/assert.h"
+#include "core/memory.h"
+#include "video_core/amdgpu/resource.h"
 #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
 #include "video_core/renderer_vulkan/vk_instance.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
 
 namespace Vulkan {
 
-GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey& key_,
-                                   vk::PipelineCache pipeline_cache_, vk::PipelineLayout layout_,
+GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_,
+                                   const PipelineKey& key_, vk::PipelineCache pipeline_cache,
+                                   std::span<const Shader::Info*, MaxShaderStages> infos,
                                    std::array<vk::ShaderModule, MaxShaderStages> modules)
-    : instance{instance_}, pipeline_layout{layout_}, pipeline_cache{pipeline_cache_}, key{key_} {
+    : instance{instance_}, scheduler{scheduler_}, key{key_} {
     const vk::Device device = instance.GetDevice();
+    for (u32 i = 0; i < MaxShaderStages; i++) {
+        if (!infos[i]) {
+            continue;
+        }
+        stages[i] = *infos[i];
+    }
+
+    const vk::PipelineLayoutCreateInfo layout_info = {
+        .setLayoutCount = 0U,
+        .pSetLayouts = nullptr,
+        .pushConstantRangeCount = 0,
+        .pPushConstantRanges = nullptr,
+    };
+    pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
+
+    boost::container::static_vector<vk::VertexInputBindingDescription, 32> bindings;
+    boost::container::static_vector<vk::VertexInputAttributeDescription, 32> attributes;
+    const auto& vs_info = stages[0];
+    for (const auto& input : vs_info.vs_inputs) {
+        const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
+        attributes.push_back({
+            .location = input.binding,
+            .binding = input.binding,
+            .format = LiverpoolToVK::SurfaceFormat(buffer.data_format, buffer.num_format),
+            .offset = 0,
+        });
+        bindings.push_back({
+            .binding = input.binding,
+            .stride = u32(buffer.stride),
+            .inputRate = vk::VertexInputRate::eVertex,
+        });
+    }
 
     const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
-        .vertexBindingDescriptionCount = 0U,
-        .pVertexBindingDescriptions = nullptr,
-        .vertexAttributeDescriptionCount = 0U,
-        .pVertexAttributeDescriptions = nullptr,
+        .vertexBindingDescriptionCount = static_cast<u32>(bindings.size()),
+        .pVertexBindingDescriptions = bindings.data(),
+        .vertexAttributeDescriptionCount = static_cast<u32>(attributes.size()),
+        .pVertexAttributeDescriptions = attributes.data(),
     };
 
     const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
@@ -126,11 +162,12 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey&
         .pName = "main",
     };
 
-    const vk::Format color_format = vk::Format::eR8G8B8A8Srgb;
+    const auto it = std::ranges::find(key.color_formats, vk::Format::eUndefined);
+    const u32 num_color_formats = std::distance(key.color_formats.begin(), it);
     const vk::PipelineRenderingCreateInfoKHR pipeline_rendering_ci = {
-        .colorAttachmentCount = 1,
-        .pColorAttachmentFormats = &color_format,
-        .depthAttachmentFormat = vk::Format::eUndefined,
+        .colorAttachmentCount = num_color_formats,
+        .pColorAttachmentFormats = key.color_formats.data(),
+        .depthAttachmentFormat = key.depth.depth_enable ? key.depth_format : vk::Format::eUndefined,
         .stencilAttachmentFormat = vk::Format::eUndefined,
     };
 
@@ -146,7 +183,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey&
         .pDepthStencilState = &depth_info,
         .pColorBlendState = &color_blending,
         .pDynamicState = &dynamic_info,
-        .layout = pipeline_layout,
+        .layout = *pipeline_layout,
     };
 
     auto result = device.createGraphicsPipelineUnique(pipeline_cache, pipeline_info);
@@ -159,4 +196,20 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey&
 
 GraphicsPipeline::~GraphicsPipeline() = default;
 
+void GraphicsPipeline::BindResources(Core::MemoryManager* memory) const {
+    std::array<vk::Buffer, MaxVertexBufferCount> buffers;
+    std::array<vk::DeviceSize, MaxVertexBufferCount> offsets;
+
+    const auto& vs_info = stages[0];
+    const size_t num_buffers = vs_info.vs_inputs.size();
+    for (u32 i = 0; i < num_buffers; ++i) {
+        const auto& input = vs_info.vs_inputs[i];
+        const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
+        std::tie(buffers[i], offsets[i]) = memory->GetVulkanBuffer(buffer.base_address);
+    }
+
+    const auto cmdbuf = scheduler.CommandBuffer();
+    cmdbuf.bindVertexBuffers(0, num_buffers, buffers.data(), offsets.data());
+}
+
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index d8b7887b5..47cc5c233 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -1,19 +1,31 @@
 // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
+#include <xxhash.h>
 #include "common/types.h"
+#include "shader_recompiler/runtime_info.h"
 #include "video_core/renderer_vulkan/liverpool_to_vk.h"
 #include "video_core/renderer_vulkan/vk_common.h"
 
+namespace Core {
+class MemoryManager;
+}
+
 namespace Vulkan {
 
+static constexpr u32 MaxVertexBufferCount = 32;
 static constexpr u32 MaxShaderStages = 5;
 
 class Instance;
+class Scheduler;
 
 using Liverpool = AmdGpu::Liverpool;
 
 struct PipelineKey {
+    std::array<size_t, MaxShaderStages> stage_hashes;
+    std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
+    vk::Format depth_format;
+
     Liverpool::DepthControl depth;
     Liverpool::StencilControl stencil;
     Liverpool::StencilRefMask stencil_ref_front;
@@ -21,26 +33,41 @@ struct PipelineKey {
     Liverpool::PrimitiveType prim_type;
     Liverpool::PolygonMode polygon_mode;
     Liverpool::CullMode cull_mode;
+
+    bool operator==(const PipelineKey& key) const noexcept {
+        return std::memcmp(this, &key, sizeof(PipelineKey)) == 0;
+    }
 };
 static_assert(std::has_unique_object_representations_v<PipelineKey>);
 
 class GraphicsPipeline {
 public:
-    explicit GraphicsPipeline(const Instance& instance, const PipelineKey& key,
-                              vk::PipelineCache pipeline_cache, vk::PipelineLayout layout,
+    explicit GraphicsPipeline(const Instance& instance, Scheduler& scheduler,
+                              const PipelineKey& key, vk::PipelineCache pipeline_cache,
+                              std::span<const Shader::Info*, MaxShaderStages> infos,
                               std::array<vk::ShaderModule, MaxShaderStages> modules);
     ~GraphicsPipeline();
 
+    void BindResources(Core::MemoryManager* memory) const;
+
     [[nodiscard]] vk::Pipeline Handle() const noexcept {
         return *pipeline;
     }
 
 private:
     const Instance& instance;
+    Scheduler& scheduler;
     vk::UniquePipeline pipeline;
-    vk::PipelineLayout pipeline_layout;
-    vk::PipelineCache pipeline_cache;
+    vk::UniquePipelineLayout pipeline_layout;
+    std::array<Shader::Info, MaxShaderStages> stages;
     PipelineKey key;
 };
 
 } // namespace Vulkan
+
+template <>
+struct std::hash<Vulkan::PipelineKey> {
+    std::size_t operator()(const Vulkan::PipelineKey& key) const noexcept {
+        return XXH3_64bits(&key, sizeof(key));
+    }
+};
diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp
index 0cde3e6e4..32dca0c5a 100644
--- a/src/video_core/renderer_vulkan/vk_instance.cpp
+++ b/src/video_core/renderer_vulkan/vk_instance.cpp
@@ -271,11 +271,11 @@ void Instance::CollectDeviceParameters() {
     const std::string api_version = GetReadableVersion(properties.apiVersion);
     const std::string extensions = fmt::format("{}", fmt::join(available_extensions, ", "));
 
-    LOG_INFO(Render_Vulkan, "GPU_Vendor", vendor_name);
-    LOG_INFO(Render_Vulkan, "GPU_Model", model_name);
-    LOG_INFO(Render_Vulkan, "GPU_Vulkan_Driver", driver_name);
-    LOG_INFO(Render_Vulkan, "GPU_Vulkan_Version", api_version);
-    LOG_INFO(Render_Vulkan, "GPU_Vulkan_Extensions", extensions);
+    LOG_INFO(Render_Vulkan, "GPU_Vendor: {}", vendor_name);
+    LOG_INFO(Render_Vulkan, "GPU_Model: {}", model_name);
+    LOG_INFO(Render_Vulkan, "GPU_Vulkan_Driver: {}", driver_name);
+    LOG_INFO(Render_Vulkan, "GPU_Vulkan_Version: {}", api_version);
+    LOG_INFO(Render_Vulkan, "GPU_Vulkan_Extensions: {}", extensions);
 }
 
 void Instance::CollectToolingInfo() {
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index e0134442f..6de86c4c1 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -2,10 +2,10 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #include <fstream>
-#include "common/scope_exit.h"
 #include "shader_recompiler/backend/spirv/emit_spirv.h"
 #include "shader_recompiler/recompiler.h"
 #include "shader_recompiler/runtime_info.h"
+#include "video_core/amdgpu/resource.h"
 #include "video_core/renderer_vulkan/vk_instance.h"
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
@@ -14,8 +14,9 @@
 namespace Vulkan {
 
 Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_data,
-                            AmdGpu::Liverpool::Regs& regs) {
-    Shader::Info info{user_data};
+                            const AmdGpu::Liverpool::Regs& regs) {
+    Shader::Info info{};
+    info.user_data = user_data;
     info.stage = stage;
     switch (stage) {
     case Shader::Stage::Fragment: {
@@ -39,66 +40,96 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
                              AmdGpu::Liverpool* liverpool_)
     : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{8192},
       block_pool{512} {
-    const vk::PipelineLayoutCreateInfo layout_info = {
-        .setLayoutCount = 0U,
-        .pSetLayouts = nullptr,
-        .pushConstantRangeCount = 0,
-        .pPushConstantRanges = nullptr,
-    };
-    pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
     pipeline_cache = instance.GetDevice().createPipelineCacheUnique({});
 }
 
-void PipelineCache::BindPipeline() {
-    SCOPE_EXIT {
-        const auto cmdbuf = scheduler.CommandBuffer();
-        cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
-    };
+const GraphicsPipeline* PipelineCache::GetPipeline() {
+    RefreshKey();
+    const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key);
+    if (is_new) {
+        it.value() = CreatePipeline();
+    }
+    const GraphicsPipeline* pipeline = it->second.get();
+    return pipeline;
+}
 
-    if (pipeline) {
-        return;
+void PipelineCache::RefreshKey() {
+    auto& regs = liverpool->regs;
+    auto& key = graphics_key;
+
+    key.depth = regs.depth_control;
+    key.stencil = regs.stencil_control;
+    key.stencil_ref_front = regs.stencil_ref_front;
+    key.stencil_ref_back = regs.stencil_ref_back;
+    key.prim_type = regs.primitive_type;
+    key.polygon_mode = regs.polygon_control.PolyMode();
+
+    const auto& db = regs.depth_buffer;
+    key.depth_format = key.depth.depth_enable
+                           ? LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format)
+                           : vk::Format::eUndefined;
+    for (u32 i = 0; i < Liverpool::NumColorBuffers; i++) {
+        const auto& cb = regs.color_buffers[i];
+        key.color_formats[i] = cb.base_address
+                                   ? LiverpoolToVK::SurfaceFormat(cb.info.format, cb.NumFormat())
+                                   : vk::Format::eUndefined;
     }
 
-    const auto get_program = [&](const AmdGpu::Liverpool::ShaderProgram& pgm, Shader::Stage stage) {
-        const u32* token = pgm.Address<u32>();
+    for (u32 i = 0; i < MaxShaderStages; i++) {
+        auto* pgm = regs.ProgramForStage(i);
+        if (!pgm || !pgm->Address<u32>()) {
+            key.stage_hashes[i] = 0;
+            continue;
+        }
+        const u32* code = pgm->Address<u32>();
 
-        // Retrieve shader header.
         Shader::BinaryInfo bininfo;
-        std::memcpy(&bininfo, token + (token[1] + 1) * 2, sizeof(bininfo));
+        std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
+        key.stage_hashes[i] = bininfo.shader_hash;
+    }
+}
+
+std::unique_ptr<GraphicsPipeline> PipelineCache::CreatePipeline() {
+    const auto& regs = liverpool->regs;
+
+    std::array<Shader::IR::Program, MaxShaderStages> programs;
+    std::array<const Shader::Info*, MaxShaderStages> infos{};
+
+    for (u32 i = 0; i < MaxShaderStages; i++) {
+        if (!graphics_key.stage_hashes[i]) {
+            stages[i] = VK_NULL_HANDLE;
+            continue;
+        }
+        auto* pgm = regs.ProgramForStage(i);
+        const u32* code = pgm->Address<u32>();
+
+        Shader::BinaryInfo bininfo;
+        std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
+        const u32 num_dwords = bininfo.length / sizeof(u32);
 
-        // Lookup if the shader already exists.
         const auto it = module_map.find(bininfo.shader_hash);
         if (it != module_map.end()) {
-            return *it->second;
+            stages[i] = *it->second;
+            continue;
         }
 
-        // Compile and cache shader.
-        const auto data = std::span{token, bininfo.length / sizeof(u32)};
         block_pool.ReleaseContents();
         inst_pool.ReleaseContents();
-        const auto info = MakeShaderInfo(stage, pgm.user_data, liverpool->regs);
-        auto program = Shader::TranslateProgram(inst_pool, block_pool, data, std::move(info));
-        const auto code = Shader::Backend::SPIRV::EmitSPIRV(Shader::Profile{}, program);
 
-        static int counter = 0;
-        std::ofstream file(fmt::format("shader{}.spv", counter++), std::ios::out | std::ios::binary);
-        file.write((const char*)code.data(), code.size() * sizeof(u32));
-        file.close();
+        // Recompile shader to IR.
+        const auto stage = Shader::Stage{i};
+        const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
+        programs[i] = Shader::TranslateProgram(inst_pool, block_pool, std::span{code, num_dwords},
+                                               std::move(info));
 
-        return CompileSPV(code, instance.GetDevice());
-    };
+        // Compile IR to SPIR-V
+        const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(Shader::Profile{}, programs[i]);
+        stages[i] = CompileSPV(spv_code, instance.GetDevice());
+        infos[i] = &programs[i].info;
+    }
 
-    // Retrieve shader stage modules.
-    // TODO: Only do this when program address is changed.
-    stages[0] = get_program(liverpool->regs.vs_program, Shader::Stage::Vertex);
-    stages[4] = get_program(liverpool->regs.ps_program, Shader::Stage::Fragment);
-
-    // Bind pipeline.
-    // TODO: Read entire key based on reg state.
-    graphics_key.prim_type = liverpool->regs.primitive_type;
-    graphics_key.polygon_mode = liverpool->regs.polygon_control.PolyMode();
-    pipeline = std::make_unique<GraphicsPipeline>(instance, graphics_key, *pipeline_cache,
-                                                  *pipeline_layout, stages);
+    return std::make_unique<GraphicsPipeline>(instance, scheduler, graphics_key, *pipeline_cache,
+                                              infos, stages);
 }
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 7634f9cbc..32830eabf 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -8,6 +8,10 @@
 #include "shader_recompiler/object_pool.h"
 #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
 
+namespace Shader {
+struct Info;
+}
+
 namespace Vulkan {
 
 class Instance;
@@ -21,7 +25,12 @@ public:
                            AmdGpu::Liverpool* liverpool);
     ~PipelineCache() = default;
 
-    void BindPipeline();
+    const GraphicsPipeline* GetPipeline();
+
+private:
+    void RefreshKey();
+
+    std::unique_ptr<GraphicsPipeline> CreatePipeline();
 
 private:
     const Instance& instance;
@@ -31,7 +40,7 @@ private:
     vk::UniquePipelineLayout pipeline_layout;
     tsl::robin_map<size_t, vk::UniqueShaderModule> module_map;
     std::array<vk::ShaderModule, MaxShaderStages> stages{};
-    std::unique_ptr<GraphicsPipeline> pipeline;
+    tsl::robin_map<PipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_pipelines;
     PipelineKey graphics_key{};
     Shader::ObjectPool<Shader::IR::Inst> inst_pool;
     Shader::ObjectPool<Shader::IR::Block> block_pool;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 595dcff1c..3d301f620 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -2,6 +2,7 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #include "common/config.h"
+#include "core/memory.h"
 #include "video_core/amdgpu/liverpool.h"
 #include "video_core/renderer_vulkan/vk_instance.h"
 #include "video_core/renderer_vulkan/vk_rasterizer.h"
@@ -18,24 +19,25 @@ static constexpr vk::BufferUsageFlags VertexIndexFlags = vk::BufferUsageFlagBits
 Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
                        VideoCore::TextureCache& texture_cache_, AmdGpu::Liverpool* liverpool_)
     : instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
-      liverpool{liverpool_}, pipeline_cache{instance, scheduler, liverpool},
+      liverpool{liverpool_}, memory{Core::Memory::Instance()},
+      pipeline_cache{instance, scheduler, liverpool},
       vertex_index_buffer{instance, scheduler, VertexIndexFlags, 64_MB} {
     if (!Config::nullGpu()) {
         liverpool->BindRasterizer(this);
     }
+
+    memory->SetInstance(&instance);
 }
 
 Rasterizer::~Rasterizer() = default;
 
-void Rasterizer::DrawIndex() {
+void Rasterizer::Draw(bool is_indexed) {
     const auto cmdbuf = scheduler.CommandBuffer();
-    auto& regs = liverpool->regs;
-
-    UpdateDynamicState();
-
-    pipeline_cache.BindPipeline();
-
-    auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0]);
+    const auto& regs = liverpool->regs;
+    const u32 num_indices = SetupIndexBuffer(is_indexed);
+    const auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0]);
+    const GraphicsPipeline* pipeline = pipeline_cache.GetPipeline();
+    pipeline->BindResources(memory);
 
     const vk::RenderingAttachmentInfo color_info = {
         .imageView = *image_view.image_view,
@@ -52,13 +54,50 @@ void Rasterizer::DrawIndex() {
         .pColorAttachments = &color_info,
     };
 
+    UpdateDynamicState();
+
     cmdbuf.beginRendering(rendering_info);
-    cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), 0, vk::IndexType::eUint32);
-    cmdbuf.bindVertexBuffers(0, vertex_index_buffer.Handle(), vk::DeviceSize(0));
-    cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), 0, 0);
+    cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
+    if (is_indexed) {
+        cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
+    } else {
+        cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), 0, 0);
+    }
     cmdbuf.endRendering();
 }
 
+u32 Rasterizer::SetupIndexBuffer(bool& is_indexed) {
+    // Emulate QuadList primitive type with CPU made index buffer.
+    const auto& regs = liverpool->regs;
+    if (liverpool->regs.primitive_type == Liverpool::PrimitiveType::QuadList) {
+        ASSERT_MSG(!is_indexed, "Using QuadList primitive with indexed draw");
+        is_indexed = true;
+
+        // Emit indices.
+        const u32 index_size = 3 * regs.num_indices;
+        const auto [data, offset, _] = vertex_index_buffer.Map(index_size);
+        LiverpoolToVK::EmitQuadToTriangleListIndices(data, regs.num_indices);
+        vertex_index_buffer.Commit(index_size);
+
+        // Bind index buffer.
+        const auto cmdbuf = scheduler.CommandBuffer();
+        cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), offset, vk::IndexType::eUint16);
+        return index_size / sizeof(u16);
+    }
+    if (!is_indexed) {
+        return 0;
+    }
+
+    const VAddr index_address = regs.index_base_address.Address();
+    const auto [buffer, offset] = memory->GetVulkanBuffer(index_address);
+    const vk::IndexType index_type =
+        regs.index_buffer_type.index_type == Liverpool::IndexType::Index16 ? vk::IndexType::eUint16
+                                                                           : vk::IndexType::eUint32;
+    const auto cmdbuf = scheduler.CommandBuffer();
+    cmdbuf.bindIndexBuffer(buffer, offset, index_type);
+    return regs.num_indices;
+}
+
 void Rasterizer::UpdateDynamicState() {
     UpdateViewportScissorState();
 }
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index a1e940bac..a8386c252 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -3,7 +3,6 @@
 
 #pragma once
 
-#include <memory>
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 #include "video_core/renderer_vulkan/vk_stream_buffer.h"
 
@@ -11,6 +10,10 @@ namespace AmdGpu {
 struct Liverpool;
 }
 
+namespace Core {
+class MemoryManager;
+}
+
 namespace VideoCore {
 class TextureCache;
 }
@@ -26,20 +29,14 @@ public:
                         VideoCore::TextureCache& texture_cache, AmdGpu::Liverpool* liverpool);
     ~Rasterizer();
 
-    /// Performs a draw call with an index buffer.
-    void DrawIndex();
-
-    /// Performs a draw call without an index buffer.
-    void DrawAuto();
-
-    /// Updates graphics state that is not part of the bound pipeline.
-    void UpdateDynamicState();
+    void Draw(bool is_indexed);
 
 private:
-    /// Updates viewport and scissor from liverpool registers.
-    void UpdateViewportScissorState();
+    u32 SetupIndexBuffer(bool& is_indexed);
+    void MapMemory(VAddr addr, size_t size);
 
-    /// Updates depth and stencil pipeline state from liverpool registers.
+    void UpdateDynamicState();
+    void UpdateViewportScissorState();
     void UpdateDepthStencilState();
 
 private:
@@ -47,6 +44,7 @@ private:
     Scheduler& scheduler;
     VideoCore::TextureCache& texture_cache;
     AmdGpu::Liverpool* liverpool;
+    Core::MemoryManager* memory;
     PipelineCache pipeline_cache;
     StreamBuffer vertex_index_buffer;
 };
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
index d31a1f5d3..637f03d05 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -35,7 +35,7 @@ public:
      * @param size Size to reserve.
      * @returns A pair of a raw memory pointer (with offset added), and the buffer offset
      */
-    std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment);
+    std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment = 0);
 
     /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
     void Commit(u64 size);
diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp
index e9ac4ff02..6a7bba8e2 100644
--- a/src/video_core/texture_cache/image.cpp
+++ b/src/video_core/texture_cache/image.cpp
@@ -67,12 +67,8 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noe
 }
 
 ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept {
-    // There is a small difference between T# and CB number types, account for it.
-    const auto number_fmt =
-        buffer.info.number_type == AmdGpu::NumberFormat::Uscaled ? AmdGpu::NumberFormat::Srgb
-                                                                 : buffer.info.number_type;
     is_tiled = true;
-    pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, number_fmt);
+    pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat());
     type = vk::ImageType::e2D;
     size.width = buffer.Pitch();
     size.height = buffer.Height();
diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp
index 17cc3ec26..e21bb6ed2 100644
--- a/src/video_core/texture_cache/texture_cache.cpp
+++ b/src/video_core/texture_cache/texture_cache.cpp
@@ -147,7 +147,8 @@ ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buff
         return slot_image_views[view_id];
     }
 
-    const ImageViewId view_id = slot_image_views.insert(instance, scheduler, view_info, image.image);
+    const ImageViewId view_id =
+        slot_image_views.insert(instance, scheduler, view_info, image.image);
     image.image_view_infos.emplace_back(view_info);
     image.image_view_ids.emplace_back(view_id);
     return slot_image_views[view_id];