video_core: Initial implementation of pipeline cache (#3816)

* Initial implementation * Fix for crash caused by stale stages data; cosmetics applied * Someone mentioned the assert * Async blob writer * Fix for memory leak * Remain stuff * Async changed to `packaged_task`
2025-12-08 20:58:41 +00:00 · 2025-11-29 10:52:08 +01:00
parent f9ef57f74b
commit a9f8eaf778
37 changed files with 1339 additions and 166 deletions
--- a/.gitmodules
+++ b/.gitmodules
@@ -117,3 +117,6 @@
 	path = externals/sdl3_mixer
 	url = https://github.com/libsdl-org/SDL_mixer
 	shallow = true
+[submodule "externals/miniz"]
+	path = externals/miniz
+	url = https://github.com/richgel999/miniz
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -693,7 +693,6 @@ set(COMMON src/common/logging/backend.cpp
           src/common/lru_cache.h
           src/common/error.cpp
           src/common/error.h
-           src/common/scope_exit.h
           src/common/fixed_value.h
           src/common/func_traits.h
           src/common/native_clock.cpp
@@ -707,6 +706,8 @@ set(COMMON src/common/logging/backend.cpp
           src/common/rdtsc.h
           src/common/recursive_lock.cpp
           src/common/recursive_lock.h
+           src/common/scope_exit.h
+           src/common/serdes.h
           src/common/sha1.h
           src/common/shared_first_mutex.h
           src/common/signal_context.h
@@ -986,6 +987,8 @@ set(VIDEO_CORE src/video_core/amdgpu/cb_db_extent.h
               src/video_core/renderer_vulkan/vk_pipeline_cache.h
               src/video_core/renderer_vulkan/vk_pipeline_common.cpp
               src/video_core/renderer_vulkan/vk_pipeline_common.h
+               src/video_core/renderer_vulkan/vk_pipeline_serialization.cpp
+               src/video_core/renderer_vulkan/vk_pipeline_serialization.h
               src/video_core/renderer_vulkan/vk_platform.cpp
               src/video_core/renderer_vulkan/vk_platform.h
               src/video_core/renderer_vulkan/vk_presenter.cpp
@@ -1023,6 +1026,8 @@ set(VIDEO_CORE src/video_core/amdgpu/cb_db_extent.h
               src/video_core/texture_cache/tile_manager.cpp
               src/video_core/texture_cache/tile_manager.h
               src/video_core/texture_cache/types.h
+               src/video_core/cache_storage.cpp
+               src/video_core/cache_storage.h
               src/video_core/page_manager.cpp
               src/video_core/page_manager.h
               src/video_core/multi_level_page_table.h
@@ -1077,7 +1082,8 @@ add_executable(shadps4
 create_target_directory_groups(shadps4)

 target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API FFmpeg::ffmpeg Dear_ImGui gcn half::half ZLIB::ZLIB PNG::PNG)
-target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::glslang SDL3::SDL3 SDL3_mixer::SDL3_mixer pugixml::pugixml stb::headers libusb::usb lfreist-hwinfo::hwinfo nlohmann_json::nlohmann_json)
+target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::glslang SDL3::SDL3 SDL3_mixer::SDL3_mixer pugixml::pugixml)
+target_link_libraries(shadps4 PRIVATE stb::headers libusb::usb lfreist-hwinfo::hwinfo nlohmann_json::nlohmann_json miniz)

 target_compile_definitions(shadps4 PRIVATE IMGUI_USER_CONFIG="imgui/imgui_config.h")
 target_compile_definitions(Dear_ImGui PRIVATE IMGUI_USER_CONFIG="${PROJECT_SOURCE_DIR}/src/imgui/imgui_config.h")
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -261,3 +261,6 @@ endif()
 #nlohmann json 
 set(JSON_BuildTests OFF CACHE INTERNAL "")
 add_subdirectory(json)
+
+# miniz
+add_subdirectory(miniz)
--- a/externals/miniz
+++ b/externals/miniz
--- a/src/common/config.cpp
+++ b/src/common/config.cpp
@@ -191,6 +191,8 @@ static ConfigEntry<bool> vkCrashDiagnostic(false);
 static ConfigEntry<bool> vkHostMarkers(false);
 static ConfigEntry<bool> vkGuestMarkers(false);
 static ConfigEntry<bool> rdocEnable(false);
+static ConfigEntry<bool> pipelineCacheEnable(false);
+static ConfigEntry<bool> pipelineCacheArchive(false);

 // Debug
 static ConfigEntry<bool> isDebugDump(false);
@@ -452,6 +454,14 @@ bool isRdocEnabled() {
    return rdocEnable.get();
 }

+bool isPipelineCacheEnabled() {
+    return pipelineCacheEnable.get();
+}
+
+bool isPipelineCacheArchived() {
+    return pipelineCacheArchive.get();
+}
+
 bool fpsColor() {
    return isFpsColor.get();
 }
@@ -603,6 +613,14 @@ void setRdocEnabled(bool enable, bool is_game_specific) {
    rdocEnable.set(enable, is_game_specific);
 }

+void setPipelineCacheEnabled(bool enable, bool is_game_specific) {
+    pipelineCacheEnable.set(enable, is_game_specific);
+}
+
+void setPipelineCacheArchived(bool enable, bool is_game_specific) {
+    pipelineCacheArchive.set(enable, is_game_specific);
+}
+
 void setVblankFreq(u32 value, bool is_game_specific) {
    vblankFrequency.set(value, is_game_specific);
 }
@@ -939,6 +957,8 @@ void load(const std::filesystem::path& path, bool is_game_specific) {
        vkHostMarkers.setFromToml(vk, "hostMarkers", is_game_specific);
        vkGuestMarkers.setFromToml(vk, "guestMarkers", is_game_specific);
        rdocEnable.setFromToml(vk, "rdocEnable", is_game_specific);
+        pipelineCacheEnable.setFromToml(vk, "pipelineCacheEnable", is_game_specific);
+        pipelineCacheArchive.setFromToml(vk, "pipelineCacheArchive", is_game_specific);
    }

    string current_version = {};
@@ -1107,6 +1127,8 @@ void save(const std::filesystem::path& path, bool is_game_specific) {
    vkHostMarkers.setTomlValue(data, "Vulkan", "hostMarkers", is_game_specific);
    vkGuestMarkers.setTomlValue(data, "Vulkan", "guestMarkers", is_game_specific);
    rdocEnable.setTomlValue(data, "Vulkan", "rdocEnable", is_game_specific);
+    pipelineCacheEnable.setTomlValue(data, "Vulkan", "pipelineCacheEnable", is_game_specific);
+    pipelineCacheArchive.setTomlValue(data, "Vulkan", "pipelineCacheArchive", is_game_specific);

    isDebugDump.setTomlValue(data, "Debug", "DebugDump", is_game_specific);
    isShaderDebug.setTomlValue(data, "Debug", "CollectShader", is_game_specific);
@@ -1237,6 +1259,8 @@ void setDefaultValues(bool is_game_specific) {
    vkHostMarkers.set(false, is_game_specific);
    vkGuestMarkers.set(false, is_game_specific);
    rdocEnable.set(false, is_game_specific);
+    pipelineCacheEnable.set(false, is_game_specific);
+    pipelineCacheArchive.set(false, is_game_specific);

    // GS - Debug
    isDebugDump.set(false, is_game_specific);
--- a/src/common/config.h
+++ b/src/common/config.h
@@ -94,7 +94,11 @@ void setVkGuestMarkersEnabled(bool enable, bool is_game_specific = false);
 bool getEnableDiscordRPC();
 void setEnableDiscordRPC(bool enable);
 bool isRdocEnabled();
+bool isPipelineCacheEnabled();
+bool isPipelineCacheArchived();
 void setRdocEnabled(bool enable, bool is_game_specific = false);
+void setPipelineCacheEnabled(bool enable, bool is_game_specific = false);
+void setPipelineCacheArchived(bool enable, bool is_game_specific = false);
 std::string getLogType();
 void setLogType(const std::string& type, bool is_game_specific = false);
 std::string getLogFilter();
--- a/src/common/path_util.cpp
+++ b/src/common/path_util.cpp
@@ -127,6 +127,7 @@ static auto UserPaths = [] {
    create_path(PathType::MetaDataDir, user_dir / METADATA_DIR);
    create_path(PathType::CustomTrophy, user_dir / CUSTOM_TROPHY);
    create_path(PathType::CustomConfigs, user_dir / CUSTOM_CONFIGS);
+    create_path(PathType::CacheDir, user_dir / CACHE_DIR);

    std::ofstream notice_file(user_dir / CUSTOM_TROPHY / "Notice.txt");
    if (notice_file.is_open()) {
--- a/src/common/path_util.h
+++ b/src/common/path_util.h
@@ -24,6 +24,7 @@ enum class PathType {
    MetaDataDir,    // Where game metadata (e.g. trophies and menu backgrounds) is stored.
    CustomTrophy,   // Where custom files for trophies are stored.
    CustomConfigs,  // Where custom files for different games are stored.
+    CacheDir,       // Where pipeline and shader cache is stored.
 };

 constexpr auto PORTABLE_DIR = "user";
@@ -42,6 +43,7 @@ constexpr auto PATCHES_DIR = "patches";
 constexpr auto METADATA_DIR = "game_data";
 constexpr auto CUSTOM_TROPHY = "custom_trophy";
 constexpr auto CUSTOM_CONFIGS = "custom_configs";
+constexpr auto CACHE_DIR = "cache";

 // Filenames
 constexpr auto LOG_FILE = "shad_log.txt";
--- a/src/common/serdes.h
+++ b/src/common/serdes.h
@@ -0,0 +1,140 @@
+// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include "common/assert.h"
+#include "common/types.h"
+
+#include <cstddef>
+
+namespace Serialization {
+
+template <typename T>
+concept Container = requires(T t) {
+    typename T::iterator;
+    { t.begin() } -> std::same_as<typename T::iterator>;
+    { t.end() } -> std::same_as<typename T::iterator>;
+    { t.size() } -> std::convertible_to<std::size_t>;
+};
+
+struct Archive {
+    void Alloc(size_t size) {
+        container.resize(size);
+    }
+
+    void Grow(size_t size) {
+        container.resize(container.size() + size);
+    }
+
+    void Merge(const Archive& ar) {
+        container.insert(container.end(), ar.container.cbegin(), ar.container.cend());
+        offset = container.size();
+    }
+
+    [[nodiscard]] size_t SizeBytes() const {
+        return container.size();
+    }
+
+    u8* CurrPtr() {
+        return container.data() + offset;
+    }
+
+    void Advance(size_t size) {
+        ASSERT(offset + size <= container.size());
+        offset += size;
+    }
+
+    std::vector<u8>&& TakeOff() {
+        offset = 0;
+        return std::move(container);
+    }
+
+    [[nodiscard]] bool IsEoS() const {
+        return offset >= container.size();
+    }
+
+    Archive() = default;
+    explicit Archive(std::vector<u8>&& v) : container{v} {}
+
+private:
+    u32 offset{};
+    std::vector<u8> container{};
+
+    friend struct Writer;
+    friend struct Reader;
+};
+
+struct Writer {
+    template <typename T>
+    void Write(const T* ptr, size_t size) {
+        if (ar.offset + size >= ar.container.size()) {
+            ar.Grow(size);
+        }
+        std::memcpy(ar.CurrPtr(), reinterpret_cast<const void*>(ptr), size);
+        ar.Advance(size);
+    }
+
+    template <typename T>
+        requires(!Container<T>)
+    void Write(const T& value) {
+        const auto size = sizeof(value);
+        Write(&value, size);
+    }
+
+    void Write(const auto& v) {
+        Write(v.size());
+        for (const auto& elem : v) {
+            Write(elem);
+        }
+    }
+
+    void Write(const std::string& s) {
+        Write(s.size());
+        Write(s.c_str(), s.size());
+    }
+
+    Writer() = delete;
+    explicit Writer(Archive& ar_) : ar{ar_} {}
+
+    Archive& ar;
+};
+
+struct Reader {
+    template <typename T>
+    void Read(T* ptr, size_t size) {
+        ASSERT(ar.offset + size <= ar.container.size());
+        std::memcpy(reinterpret_cast<void*>(ptr), ar.CurrPtr(), size);
+        ar.Advance(size);
+    }
+
+    template <typename T>
+        requires(!Container<T>)
+    void Read(T& value) {
+        const auto size = sizeof(value);
+        Read(&value, size);
+    }
+
+    void Read(auto& v) {
+        size_t num_elements{};
+        Read(num_elements);
+        for (int i = 0; i < num_elements; ++i) {
+            v.emplace_back();
+            Read(v.back());
+        }
+    }
+
+    void Read(std::string& s) {
+        size_t length{};
+        Read(length);
+        s.resize(length);
+        Read(s.data(), length);
+    }
+
+    Reader() = delete;
+    explicit Reader(Archive& ar_) : ar{ar_} {}
+
+    Archive& ar;
+};
+
+} // namespace Serialization
--- a/src/emulator.cpp
+++ b/src/emulator.cpp
@@ -42,6 +42,7 @@
 #include "core/linker.h"
 #include "core/memory.h"
 #include "emulator.h"
+#include "video_core/cache_storage.h"
 #include "video_core/renderdoc.h"

 #ifdef _WIN32
@@ -387,6 +388,7 @@ void Emulator::Run(std::filesystem::path file, std::vector<std::string> args,
    }

    UpdatePlayTime(id);
+    Storage::DataBase::Instance().Close();

    std::quick_exit(0);
 }
--- a/src/shader_recompiler/frontend/fetch_shader.cpp
+++ b/src/shader_recompiler/frontend/fetch_shader.cpp
@@ -51,7 +51,7 @@ std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info) {
    }

    const auto* code = GetFetchShaderCode(info, info.fetch_shader_sgpr_base);
-    FetchShaderData data{.code = code};
+    FetchShaderData data{};
    GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max());
    GcnDecodeContext decoder;

--- a/src/shader_recompiler/frontend/fetch_shader.h
+++ b/src/shader_recompiler/frontend/fetch_shader.h
@@ -8,6 +8,10 @@
 #include "common/types.h"
 #include "shader_recompiler/info.h"

+namespace Serialization {
+struct Archive;
+}
+
 namespace Shader::Gcn {

 struct VertexAttribute {
@@ -50,7 +54,6 @@ struct VertexAttribute {
 };

 struct FetchShaderData {
-    const u32* code;
    u32 size = 0;
    std::vector<VertexAttribute> attributes;
    s8 vertex_offset_sgpr = -1;   ///< SGPR of vertex offset from VADDR
@@ -60,6 +63,9 @@ struct FetchShaderData {
        return attributes == other.attributes && vertex_offset_sgpr == other.vertex_offset_sgpr &&
               instance_offset_sgpr == other.instance_offset_sgpr;
    }
+
+    void Serialize(Serialization::Archive& ar) const;
+    bool Deserialize(Serialization::Archive& buffer);
 };

 const u32* GetFetchShaderCode(const Info& info, u32 sgpr_base);
--- a/src/shader_recompiler/frontend/structured_control_flow.cpp
+++ b/src/shader_recompiler/frontend/structured_control_flow.cpp
@@ -596,9 +596,8 @@ public:
                  IR::AbstractSyntaxList& syntax_list_, std::span<const GcnInst> inst_list_,
                  Info& info_, const RuntimeInfo& runtime_info_, const Profile& profile_)
        : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_},
-          syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_},
-          runtime_info{runtime_info_}, profile{profile_},
-          translator{info_, runtime_info_, profile_} {
+          syntax_list{syntax_list_}, inst_list{inst_list_}, runtime_info{runtime_info_},
+          profile{profile_}, translator{info_, runtime_info_, profile_} {
        Visit(root_stmt, nullptr, nullptr);

        IR::Block* first_block = syntax_list.front().data.block;
@@ -782,7 +781,7 @@ private:
        }
    }

-    IR::Block* MergeBlock(Statement& parent, Statement& stmt) {
+    IR::Block* MergeBlock(Statement& parent, Statement& stmt) const {
        Statement* merge_stmt{TryFindForwardBlock(stmt)};
        if (!merge_stmt) {
            // Create a merge block we can visit later
@@ -798,7 +797,6 @@ private:
    IR::AbstractSyntaxList& syntax_list;
    const Block dummy_flow_block{.is_dummy = true};
    std::span<const GcnInst> inst_list;
-    Info& info;
    const RuntimeInfo& runtime_info;
    const Profile& profile;
    Translator translator;
--- a/src/shader_recompiler/frontend/translate/translate.cpp
+++ b/src/shader_recompiler/frontend/translate/translate.cpp
@@ -560,7 +560,8 @@ void Translator::EmitFetch(const GcnInst& inst) {
        }
        const auto filename = fmt::format("vs_{:#018x}.fetch.bin", info.pgm_hash);
        const auto file = IOFile{dump_dir / filename, FileAccessMode::Create};
-        file.WriteRaw<u8>(fetch_data->code, fetch_data->size);
+        const auto* code = GetFetchShaderCode(info, code_sgpr_base);
+        file.WriteRaw<u8>(code, fetch_data->size);
    }

    for (const auto& attrib : fetch_data->attributes) {
--- a/src/shader_recompiler/info.h
+++ b/src/shader_recompiler/info.h
@@ -19,6 +19,10 @@
 #include "shader_recompiler/resource.h"
 #include "shader_recompiler/runtime_info.h"

+namespace Serialization {
+struct Archive;
+}
+
 namespace Shader {

 enum class Qualifier : u8 {
@@ -34,7 +38,49 @@ enum class Qualifier : u8 {
 /**
 * Contains general information generated by the shader recompiler for an input program.
 */
-struct Info {
+struct InfoPersistent {
+    BufferResourceList buffers;
+    ImageResourceList images;
+    SamplerResourceList samplers;
+    FMaskResourceList fmasks;
+
+    struct UserDataMask {
+        void Set(IR::ScalarReg reg) noexcept {
+            mask |= 1 << static_cast<u32>(reg);
+        }
+
+        u32 Index(IR::ScalarReg reg) const noexcept {
+            const u32 reg_mask = (1 << static_cast<u32>(reg)) - 1;
+            return std::popcount(mask & reg_mask);
+        }
+
+        u32 NumRegs() const noexcept {
+            return std::popcount(mask);
+        }
+
+        u32 mask;
+    };
+    UserDataMask ud_mask{};
+    u32 fetch_shader_sgpr_base{};
+
+    u64 pgm_hash{};
+
+    s32 tess_consts_dword_offset = -1;
+    IR::ScalarReg tess_consts_ptr_base = IR::ScalarReg::Max;
+    Stage stage;
+    LogicalStage l_stage;
+
+    u8 mrt_mask{};
+    bool has_fetch_shader{};
+    bool has_bitwise_xor{};
+    bool uses_dma{};
+
+    InfoPersistent() = default;
+    InfoPersistent(Stage stage_, LogicalStage l_stage_, u64 pgm_hash_)
+        : stage{stage_}, l_stage{l_stage_}, pgm_hash{pgm_hash_} {}
+};
+
+struct Info : InfoPersistent {
    struct AttributeFlags {
        bool Get(IR::Attribute attrib, u32 comp = 0) const {
            return flags[Index(attrib)] & (1 << comp);
@@ -58,56 +104,32 @@ struct Info {

        std::array<u8, IR::NumAttributes> flags;
    };
-    AttributeFlags loads{};
-    AttributeFlags stores{};

-    struct UserDataMask {
-        void Set(IR::ScalarReg reg) noexcept {
-            mask |= 1 << static_cast<u32>(reg);
-        }
-
-        u32 Index(IR::ScalarReg reg) const noexcept {
-            const u32 reg_mask = (1 << static_cast<u32>(reg)) - 1;
-            return std::popcount(mask & reg_mask);
-        }
-
-        u32 NumRegs() const noexcept {
-            return std::popcount(mask);
-        }
-
-        u32 mask;
+    enum class ReadConstType {
+        None = 0,
+        Immediate = 1 << 0,
+        Dynamic = 1 << 1,
    };
-    UserDataMask ud_mask{};
-
-    CopyShaderData gs_copy_data;
-    u32 uses_patches{};
-
-    BufferResourceList buffers;
-    ImageResourceList images;
-    SamplerResourceList samplers;
-    FMaskResourceList fmasks;
-
-    PersistentSrtInfo srt_info;
-    std::vector<u32> flattened_ud_buf;

    struct Interpolation {
        Qualifier primary;
        Qualifier auxiliary;
    };
-    std::array<Interpolation, IR::NumParams> fs_interpolation{};
-
-    IR::ScalarReg tess_consts_ptr_base = IR::ScalarReg::Max;
-    s32 tess_consts_dword_offset = -1;

    std::span<const u32> user_data;
-    Stage stage;
-    LogicalStage l_stage;
+    std::vector<u32> flattened_ud_buf;
+    PersistentSrtInfo srt_info;
+
+    AttributeFlags loads{};
+    AttributeFlags stores{};
+
+    ReadConstType readconst_types{};
+    CopyShaderData gs_copy_data;
+    u32 uses_patches{};

-    u64 pgm_hash{};
    VAddr pgm_base;
    bool has_storage_images{};
    bool has_discard{};
-    bool has_bitwise_xor{};
    bool has_image_gather{};
    bool has_image_query{};
    bool uses_buffer_atomic_float_min_max{};
@@ -125,20 +147,12 @@ struct Info {
    bool stores_tess_level_outer{};
    bool stores_tess_level_inner{};
    bool translation_failed{};
-    u8 mrt_mask{0u};
-    bool has_fetch_shader{false};
-    u32 fetch_shader_sgpr_base{0u};

-    enum class ReadConstType {
-        None = 0,
-        Immediate = 1 << 0,
-        Dynamic = 1 << 1,
-    };
-    ReadConstType readconst_types{};
-    bool uses_dma{};
+    std::array<Interpolation, IR::NumParams> fs_interpolation{};

-    explicit Info(Stage stage_, LogicalStage l_stage_, ShaderParams params)
-        : stage{stage_}, l_stage{l_stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
+    Info() = default;
+    Info(Stage stage_, LogicalStage l_stage_, ShaderParams params)
+        : InfoPersistent(stage_, l_stage_, params.hash), pgm_base{params.Base()},
          user_data{params.user_data} {}

    template <typename T>
@@ -192,6 +206,9 @@ struct Info {
               reinterpret_cast<TessellationDataConstantBuffer*>(tess_constants_addr),
               sizeof(tess_constants));
    }
+
+    void Serialize(Serialization::Archive& ar) const;
+    bool Deserialize(Serialization::Archive& ar);
 };
 DECLARE_ENUM_FLAG_OPERATORS(Info::ReadConstType);

--- a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp
+++ b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp
@@ -28,6 +28,17 @@ using namespace Xbyak::util;
 static Xbyak::CodeGenerator g_srt_codegen(32_MB);
 static const u8* g_srt_codegen_start = nullptr;

+namespace Shader {
+
+PFN_SrtWalker RegisterWalkerCode(const u8* ptr, size_t size) {
+    const auto func_addr = (PFN_SrtWalker)g_srt_codegen.getCurr();
+    g_srt_codegen.db(ptr, size);
+    g_srt_codegen.ready();
+    return func_addr;
+}
+
+} // namespace Shader
+
 namespace {

 static void DumpSrtProgram(const Shader::Info& info, const u8* code, size_t codesize) {
@@ -215,9 +226,12 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {
    c.ret();
    c.ready();

+    info.srt_info.walker_func_size =
+        c.getCurr() - reinterpret_cast<const u8*>(info.srt_info.walker_func);
+
    if (Config::dumpShaders()) {
-        size_t codesize = c.getCurr() - reinterpret_cast<const u8*>(info.srt_info.walker_func);
-        DumpSrtProgram(info, reinterpret_cast<const u8*>(info.srt_info.walker_func), codesize);
+        DumpSrtProgram(info, reinterpret_cast<const u8*>(info.srt_info.walker_func),
+                       info.srt_info.walker_func_size);
    }

    info.srt_info.flattened_bufsize_dw = pass_info.dst_off_dw;
--- a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp
+++ b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp
@@ -363,7 +363,7 @@ static IR::F32 ReadTessControlPointAttribute(IR::U32 addr, const u32 stride, IR:

 } // namespace

-void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
+void HullShaderTransform(IR::Program& program, const RuntimeInfo& runtime_info) {
    const Info& info = program.info;

    for (IR::Block* block : program.blocks) {
@@ -561,8 +561,8 @@ void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
    }
 }

-void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
-    Info& info = program.info;
+void DomainShaderTransform(const IR::Program& program, const RuntimeInfo& runtime_info) {
+    const Info& info = program.info;

    for (IR::Block* block : program.blocks) {
        for (IR::Inst& inst : block->Instructions()) {
--- a/src/shader_recompiler/ir/passes/ir_passes.h
+++ b/src/shader_recompiler/ir/passes/ir_passes.h
@@ -24,8 +24,8 @@ void LowerBufferFormatToRaw(IR::Program& program);
 void LowerFp64ToFp32(IR::Program& program);
 void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info);
 void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info);
-void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info);
-void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info);
+void HullShaderTransform(IR::Program& program, const RuntimeInfo& runtime_info);
+void DomainShaderTransform(const IR::Program& program, const RuntimeInfo& runtime_info);
 void SharedMemoryBarrierPass(IR::Program& program, const RuntimeInfo& runtime_info,
                             const Profile& profile);
 void SharedMemorySimplifyPass(IR::Program& program, const Profile& profile);
--- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
+++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
@@ -498,7 +498,8 @@ void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
        // buffer_load_format_xyz v[8:10], v1, s[32:35], 0 ...
        // is used to define an inline buffer resource
        std::array<u64, 2> raw;
-        raw[0] = info.pgm_base + (handle->Arg(0).U32() | u64(handle->Arg(1).U32()) << 32);
+        // Keep relative address, we'll do fixup of the address at buffer fetch later
+        raw[0] = (handle->Arg(0).U32() | u64(handle->Arg(1).U32()) << 32);
        raw[1] = handle->Arg(2).U32() | u64(handle->Arg(3).U32()) << 32;
        const auto buffer = std::bit_cast<AmdGpu::Buffer>(raw);
        buffer_binding = descriptors.Add(BufferResource{
--- a/src/shader_recompiler/ir/passes/srt.h
+++ b/src/shader_recompiler/ir/passes/srt.h
@@ -7,9 +7,14 @@
 #include <boost/container/small_vector.hpp>
 #include "common/types.h"

+namespace Serialization {
+struct Archive;
+}
+
 namespace Shader {

 using PFN_SrtWalker = void PS4_SYSV_ABI (*)(const u32* /*user_data*/, u32* /*flat_dst*/);
+PFN_SrtWalker RegisterWalkerCode(const u8* ptr, size_t size);

 struct PersistentSrtInfo {
    // Special case when fetch shader uses step rates.
@@ -20,7 +25,11 @@ struct PersistentSrtInfo {
    };

    PFN_SrtWalker walker_func{};
+    size_t walker_func_size{};
    u32 flattened_bufsize_dw = 16; // NumUserDataRegs
+
+    void Serialize(Serialization::Archive& ar) const;
+    bool Deserialize(Serialization::Archive& ar);
 };

 } // namespace Shader
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -8,6 +8,10 @@
 namespace Shader {

 struct Profile {
+    u64 max_ubo_size{};
+    u32 max_viewport_width{};
+    u32 max_viewport_height{};
+    u32 max_shared_memory_size{};
    u32 supported_spirv{0x00010000};
    u32 subgroup_size{};
    bool support_int8{};
@@ -37,10 +41,7 @@ struct Profile {
    bool needs_lds_barriers{};
    bool needs_buffer_offsets{};
    bool needs_unorm_fixup{};
-    u64 max_ubo_size{};
-    u32 max_viewport_width{};
-    u32 max_viewport_height{};
-    u32 max_shared_memory_size{};
+    bool _pad0{};
 };

 } // namespace Shader
--- a/src/shader_recompiler/recompiler.cpp
+++ b/src/shader_recompiler/recompiler.cpp
@@ -29,7 +29,7 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
    return blocks;
 }

-IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info,
+IR::Program TranslateProgram(const std::span<const u32>& code, Pools& pools, Info& info,
                             RuntimeInfo& runtime_info, const Profile& profile) {
    // Ensure first instruction is expected.
    constexpr u32 token_mov_vcchi = 0xBEEB03FF;
@@ -55,8 +55,8 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
    Gcn::CFG cfg{gcn_block_pool, program.ins_list};

    // Structurize control flow graph and create program.
-    program.syntax_list = Shader::Gcn::BuildASL(pools.inst_pool, pools.block_pool, cfg,
-                                                program.info, runtime_info, profile);
+    program.syntax_list =
+        Shader::Gcn::BuildASL(pools.inst_pool, pools.block_pool, cfg, info, runtime_info, profile);
    program.blocks = GenerateBlocks(program.syntax_list);
    program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());

--- a/src/shader_recompiler/recompiler.h
+++ b/src/shader_recompiler/recompiler.h
@@ -27,7 +27,8 @@ struct Pools {
    }
 };

-[[nodiscard]] IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info,
-                                           RuntimeInfo& runtime_info, const Profile& profile);
+[[nodiscard]] IR::Program TranslateProgram(const std::span<const u32>& code, Pools& pools,
+                                           Info& info, RuntimeInfo& runtime_info,
+                                           const Profile& profile);

 } // namespace Shader
--- a/src/shader_recompiler/resource.h
+++ b/src/shader_recompiler/resource.h
@@ -53,8 +53,15 @@ struct BufferResource {
    }

    constexpr AmdGpu::Buffer GetSharp(const auto& info) const noexcept {
-        const auto buffer =
-            inline_cbuf ? inline_cbuf : info.template ReadUdSharp<AmdGpu::Buffer>(sharp_idx);
+        AmdGpu::Buffer buffer{};
+        if (inline_cbuf) {
+            buffer = inline_cbuf;
+            if (inline_cbuf.base_address > 1) {
+                buffer.base_address += info.pgm_base; // address fixup
+            }
+        } else {
+            buffer = info.template ReadUdSharp<AmdGpu::Buffer>(sharp_idx);
+        }
        if (!buffer.Valid()) {
            LOG_DEBUG(Render, "Encountered invalid buffer sharp");
            return AmdGpu::Buffer::Null();
--- a/src/shader_recompiler/runtime_info.h
+++ b/src/shader_recompiler/runtime_info.h
@@ -159,7 +159,8 @@ struct GeometryRuntimeInfo {
        return num_outputs == other.num_outputs && outputs == other.outputs && num_invocations &&
               other.num_invocations && output_vertices == other.output_vertices &&
               in_primitive == other.in_primitive &&
-               std::ranges::equal(out_primitive, other.out_primitive);
+               std::ranges::equal(out_primitive, other.out_primitive) &&
+               vs_copy_hash == other.vs_copy_hash;
    }
 };

--- a/src/shader_recompiler/specialization.h
+++ b/src/shader_recompiler/specialization.h
@@ -79,8 +79,8 @@ struct SamplerSpecialization {
 struct StageSpecialization {
    static constexpr size_t MaxStageResources = 128;

-    const Shader::Info* info;
-    RuntimeInfo runtime_info;
+    const Info* info{};
+    RuntimeInfo runtime_info{};
    std::bitset<MaxStageResources> bitset{};
    std::optional<Gcn::FetchShaderData> fetch_shader_data{};
    boost::container::small_vector<VsAttribSpecialization, 32> vs_attribs;
@@ -90,6 +90,7 @@ struct StageSpecialization {
    boost::container::small_vector<SamplerSpecialization, 16> samplers;
    Backend::Bindings start{};

+    StageSpecialization() = default;
    StageSpecialization(const Info& info_, RuntimeInfo runtime_info_, const Profile& profile_,
                        Backend::Bindings start_)
        : info{&info_}, runtime_info{runtime_info_}, start{start_} {
@@ -158,7 +159,7 @@ struct StageSpecialization {
        // Initialize runtime_info fields that rely on analysis in tessellation passes
        if (info->l_stage == LogicalStage::TessellationControl ||
            info->l_stage == LogicalStage::TessellationEval) {
-            Shader::TessellationDataConstantBuffer tess_constants;
+            TessellationDataConstantBuffer tess_constants{};
            info->ReadTessConstantBuffer(tess_constants);
            if (info->l_stage == LogicalStage::TessellationControl) {
                runtime_info.hs_info.InitFromTessConstants(tess_constants);
@@ -192,21 +193,43 @@ struct StageSpecialization {
        }
    }

+    [[nodiscard]] bool Valid() const {
+        return info != nullptr;
+    }
+
    bool operator==(const StageSpecialization& other) const {
-        if (start != other.start) {
+        if (!Valid()) {
            return false;
        }
+
+        if (vs_attribs != other.vs_attribs) {
+            return false;
+        }
+
        if (runtime_info != other.runtime_info) {
            return false;
        }
+
        if (fetch_shader_data != other.fetch_shader_data) {
            return false;
        }
-        for (u32 i = 0; i < vs_attribs.size(); i++) {
-            if (vs_attribs[i] != other.vs_attribs[i]) {
-                return false;
-            }
+
+        if (fmasks != other.fmasks) {
+            return false;
        }
+
+        // For VS which only generates geometry and doesn't have any inputs, its start
+        // bindings still may change as they depend on previously processed FS. The check below
+        // handles this case and prevents generation of redundant permutations. This is also safe
+        // for other types of shaders with no bindings.
+        if (bitset.none() && other.bitset.none()) {
+            return true;
+        }
+
+        if (start != other.start) {
+            return false;
+        }
+
        u32 binding{};
        for (u32 i = 0; i < buffers.size(); i++) {
            if (other.bitset[binding++] && buffers[i] != other.buffers[i]) {
@@ -218,11 +241,7 @@ struct StageSpecialization {
                return false;
            }
        }
-        for (u32 i = 0; i < fmasks.size(); i++) {
-            if (other.bitset[binding++] && fmasks[i] != other.fmasks[i]) {
-                return false;
-            }
-        }
+
        for (u32 i = 0; i < samplers.size(); i++) {
            if (samplers[i] != other.samplers[i]) {
                return false;
@@ -230,6 +249,9 @@ struct StageSpecialization {
        }
        return true;
    }
+
+    void Serialize(Serialization::Archive& ar) const;
+    bool Deserialize(Serialization::Archive& ar);
 };

 } // namespace Shader
--- a/src/video_core/amdgpu/pixel_format.h
+++ b/src/video_core/amdgpu/pixel_format.h
@@ -79,10 +79,10 @@ enum class NumberFormat : u32 {
    Ubscaled = 13,
 };

-enum class NumberClass {
-    Float,
-    Sint,
-    Uint,
+enum class NumberClass : u8 {
+    Float = 0,
+    Sint = 1,
+    Uint = 2,
 };

 enum class CompSwizzle : u8 {
--- a/src/video_core/cache_storage.cpp
+++ b/src/video_core/cache_storage.cpp
@@ -0,0 +1,264 @@
+// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "common/config.h"
+#include "common/elf_info.h"
+#include "common/io_file.h"
+#include "common/polyfill_thread.h"
+#include "common/thread.h"
+
+#include "video_core/cache_storage.h"
+#include "video_core/renderer_vulkan/vk_instance.h"
+#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+
+#include <miniz.h>
+
+#include <condition_variable>
+#include <functional>
+#include <future>
+#include <mutex>
+#include <queue>
+
+namespace {
+
+std::mutex submit_mutex{};
+u32 num_requests{};
+std::condition_variable_any request_cv{};
+std::queue<std::packaged_task<void()>> req_queue{};
+std::mutex m_request{};
+
+mz_zip_archive zip_ar{};
+bool ar_is_read_only{true};
+
+} // namespace
+
+namespace Storage {
+
+void ProcessIO(const std::stop_token& stoken) {
+    Common::SetCurrentThreadName("shadPS4:PipelineCacheIO");
+
+    while (!stoken.stop_requested()) {
+        {
+            std::unique_lock lk{submit_mutex};
+            Common::CondvarWait(request_cv, lk, stoken, [&] { return num_requests; });
+        }
+
+        if (stoken.stop_requested()) {
+            break;
+        }
+
+        while (num_requests) {
+            std::packaged_task<void()> request{};
+            {
+                std::scoped_lock lock{m_request};
+                if (req_queue.empty()) {
+                    continue;
+                }
+                request = std::move(req_queue.front());
+                req_queue.pop();
+            }
+
+            if (request.valid()) {
+                request();
+                request.get_future().wait();
+            }
+
+            --num_requests;
+        }
+    }
+}
+
+constexpr std::string GetBlobFileExtension(BlobType type) {
+    switch (type) {
+    case BlobType::ShaderMeta: {
+        return "meta";
+    }
+    case BlobType::ShaderBinary: {
+        return "spv";
+    }
+    case BlobType::PipelineKey: {
+        return "key";
+    }
+    case BlobType::ShaderProfile: {
+        return "bin";
+    }
+    default:
+        UNREACHABLE();
+    }
+}
+
+void DataBase::Open() {
+    if (opened) {
+        return;
+    }
+
+    const auto& game_info = Common::ElfInfo::Instance();
+
+    using namespace Common::FS;
+    if (Config::isPipelineCacheArchived()) {
+        mz_zip_zero_struct(&zip_ar);
+
+        cache_path = GetUserPath(PathType::CacheDir) /
+                     std::filesystem::path{game_info.GameSerial()}.replace_extension(".zip");
+
+        if (!mz_zip_reader_init_file(&zip_ar, cache_path.string().c_str(),
+                                     MZ_ZIP_FLAG_READ_ALLOW_WRITING) ||
+            !mz_zip_validate_archive(&zip_ar, 0)) {
+            LOG_INFO(Render, "Cache archive {} is not found or archive is corrupted",
+                     cache_path.string().c_str());
+            mz_zip_reader_end(&zip_ar);
+            mz_zip_writer_init_file(&zip_ar, cache_path.string().c_str(), 0);
+        }
+    } else {
+        cache_path = GetUserPath(PathType::CacheDir) / game_info.GameSerial();
+        if (!std::filesystem::exists(cache_path)) {
+            std::filesystem::create_directories(cache_path);
+        }
+    }
+
+    io_worker = std::jthread{ProcessIO};
+    opened = true;
+}
+
+void DataBase::Close() {
+    if (!IsOpened()) {
+        return;
+    }
+
+    io_worker.request_stop();
+    io_worker.join();
+
+    if (Config::isPipelineCacheArchived()) {
+        mz_zip_writer_finalize_archive(&zip_ar);
+        mz_zip_writer_end(&zip_ar);
+    }
+
+    LOG_INFO(Render, "Cache dumped");
+}
+
+template <typename T>
+bool WriteVector(const BlobType type, std::filesystem::path&& path_, std::vector<T>&& v) {
+    {
+        auto request = std::packaged_task<void()>{[=]() {
+            auto path{path_};
+            path.replace_extension(GetBlobFileExtension(type));
+            if (Config::isPipelineCacheArchived()) {
+                ASSERT_MSG(!ar_is_read_only,
+                           "The archive is read-only. Did you forget to call `FinishPreload`?");
+                if (!mz_zip_writer_add_mem(&zip_ar, path.string().c_str(), v.data(),
+                                           v.size() * sizeof(T), MZ_BEST_COMPRESSION)) {
+                    LOG_ERROR(Render, "Failed to add {} to the archive", path.string().c_str());
+                }
+            } else {
+                using namespace Common::FS;
+                const auto file = IOFile{path, FileAccessMode::Create};
+                file.Write(v);
+            }
+        }};
+        std::scoped_lock lock{m_request};
+        req_queue.emplace(std::move(request));
+    }
+
+    std::scoped_lock lk{submit_mutex};
+    ++num_requests;
+    request_cv.notify_one();
+    return true;
+}
+
+template <typename T>
+void LoadVector(BlobType type, std::filesystem::path& path, std::vector<T>& v) {
+    using namespace Common::FS;
+    path.replace_extension(GetBlobFileExtension(type));
+    if (Config::isPipelineCacheArchived()) {
+        int index{-1};
+        index = mz_zip_reader_locate_file(&zip_ar, path.string().c_str(), nullptr, 0);
+        if (index < 0) {
+            LOG_WARNING(Render, "File {} is not found in the archive", path.string().c_str());
+            return;
+        }
+        mz_zip_archive_file_stat stat{};
+        mz_zip_reader_file_stat(&zip_ar, index, &stat);
+        v.resize(stat.m_uncomp_size / sizeof(T));
+        mz_zip_reader_extract_to_mem(&zip_ar, index, v.data(), stat.m_uncomp_size, 0);
+    } else {
+        const auto file = IOFile{path, FileAccessMode::Read};
+        v.resize(file.GetSize() / sizeof(T));
+        file.Read(v);
+    }
+}
+
+bool DataBase::Save(BlobType type, const std::string& name, std::vector<u8>&& data) {
+    if (!opened) {
+        return false;
+    }
+
+    auto path = Config::isPipelineCacheArchived() ? std::filesystem::path{name} : cache_path / name;
+    return WriteVector(type, std::move(path), std::move(data));
+}
+
+bool DataBase::Save(BlobType type, const std::string& name, std::vector<u32>&& data) {
+    if (!opened) {
+        return false;
+    }
+
+    auto path = Config::isPipelineCacheArchived() ? std::filesystem::path{name} : cache_path / name;
+    return WriteVector(type, std::move(path), std::move(data));
+}
+
+void DataBase::Load(BlobType type, const std::string& name, std::vector<u8>& data) {
+    if (!opened) {
+        return;
+    }
+
+    auto path = Config::isPipelineCacheArchived() ? std::filesystem::path{name} : cache_path / name;
+    return LoadVector(type, path, data);
+}
+
+void DataBase::Load(BlobType type, const std::string& name, std::vector<u32>& data) {
+    if (!opened) {
+        return;
+    }
+
+    auto path = Config::isPipelineCacheArchived() ? std::filesystem::path{name} : cache_path / name;
+    return LoadVector(type, path, data);
+}
+
+void DataBase::ForEachBlob(BlobType type, const std::function<void(std::vector<u8>&& data)>& func) {
+    const auto& ext = GetBlobFileExtension(type);
+    if (Config::isPipelineCacheArchived()) {
+        const auto num_files = mz_zip_reader_get_num_files(&zip_ar);
+        for (int index = 0; index < num_files; ++index) {
+            std::array<char, MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE> file_name{};
+            file_name.fill(0);
+            mz_zip_reader_get_filename(&zip_ar, index, file_name.data(), file_name.size());
+            if (std::string{file_name.data()}.ends_with(ext)) {
+                mz_zip_archive_file_stat stat{};
+                mz_zip_reader_file_stat(&zip_ar, index, &stat);
+                std::vector<u8> data(stat.m_uncomp_size);
+                mz_zip_reader_extract_to_mem(&zip_ar, index, data.data(), data.size(), 0);
+                func(std::move(data));
+            }
+        }
+    } else {
+        for (const auto& file_name : std::filesystem::directory_iterator{cache_path}) {
+            if (file_name.path().extension().string().ends_with(ext)) {
+                using namespace Common::FS;
+                const auto& file = IOFile{file_name, FileAccessMode::Read};
+                if (file.IsOpen()) {
+                    std::vector<u8> data(file.GetSize());
+                    file.Read(data);
+                    func(std::move(data));
+                }
+            }
+        }
+    }
+}
+
+void DataBase::FinishPreload() {
+    if (Config::isPipelineCacheArchived()) {
+        mz_zip_writer_init_from_reader(&zip_ar, cache_path.string().c_str());
+        ar_is_read_only = false;
+    }
+}
+
+} // namespace Storage
--- a/src/video_core/cache_storage.h
+++ b/src/video_core/cache_storage.h
@@ -0,0 +1,50 @@
+// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include "common/path_util.h"
+#include "common/singleton.h"
+#include "common/types.h"
+
+#include <functional>
+#include <thread>
+#include <vector>
+
+namespace Storage {
+
+enum class BlobType : u32 {
+    ShaderMeta,
+    ShaderBinary,
+    PipelineKey,
+    ShaderProfile,
+};
+
+class DataBase {
+public:
+    static DataBase& Instance() {
+        return *Common::Singleton<DataBase>::Instance();
+    }
+
+    void Open();
+    void Close();
+    [[nodiscard]] bool IsOpened() const {
+        return opened;
+    }
+    void FinishPreload();
+
+    bool Save(BlobType type, const std::string& name, std::vector<u8>&& data);
+    bool Save(BlobType type, const std::string& name, std::vector<u32>&& data);
+
+    void Load(BlobType type, const std::string& name, std::vector<u8>& data);
+    void Load(BlobType type, const std::string& name, std::vector<u32>& data);
+
+    void ForEachBlob(BlobType type, const std::function<void(std::vector<u8>&& data)>& func);
+
+private:
+    std::jthread io_worker{};
+    std::filesystem::path cache_path{};
+    bool opened{};
+};
+
+} // namespace Storage
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -13,7 +13,8 @@ namespace Vulkan {
 ComputePipeline::ComputePipeline(const Instance& instance, Scheduler& scheduler,
                                 DescriptorHeap& desc_heap, const Shader::Profile& profile,
                                 vk::PipelineCache pipeline_cache, ComputePipelineKey compute_key_,
-                                 const Shader::Info& info_, vk::ShaderModule module)
+                                 const Shader::Info& info_, vk::ShaderModule module,
+                                 SerializationSupport& sdata, bool preloading /*=false*/)
    : Pipeline{instance, scheduler, desc_heap, profile, pipeline_cache, true},
      compute_key{compute_key_} {
    auto& info = stages[int(Shader::LogicalStage::Compute)];
@@ -29,7 +30,11 @@ ComputePipeline::ComputePipeline(const Instance& instance, Scheduler& scheduler,
    u32 binding{};
    boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
    for (const auto& buffer : info->buffers) {
-        const auto sharp = buffer.GetSharp(*info);
+        // During deserialization, we don't have access to the UD to fetch sharp data. To address
+        // this properly we need to track shaprs or portion of them in `sdata`, but since we're
+        // interested only in "is storage" flag (which is not even effective atm), we can take a
+        // shortcut there.
+        const auto sharp = preloading ? AmdGpu::Buffer{} : buffer.GetSharp(*info);
        bindings.push_back({
            .binding = binding++,
            .descriptorType = buffer.IsStorage(sharp) ? vk::DescriptorType::eStorageBuffer
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -11,6 +11,10 @@ class BufferCache;
 class TextureCache;
 } // namespace VideoCore

+namespace Serialization {
+struct Archive;
+}
+
 namespace Vulkan {

 class Instance;
@@ -26,14 +30,24 @@ struct ComputePipelineKey {
    friend bool operator!=(const ComputePipelineKey& lhs, const ComputePipelineKey& rhs) {
        return !(lhs == rhs);
    }
+
+    void Serialize(Serialization::Archive& ar) const;
+    bool Deserialize(Serialization::Archive& ar);
 };

 class ComputePipeline : public Pipeline {
 public:
+    struct SerializationSupport {
+        u32 dummy{};
+
+        void Serialize(Serialization::Archive& ar) const;
+        bool Deserialize(Serialization::Archive& ar);
+    };
+
    ComputePipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap,
                    const Shader::Profile& profile, vk::PipelineCache pipeline_cache,
                    ComputePipelineKey compute_key, const Shader::Info& info,
-                    vk::ShaderModule module);
+                    vk::ShaderModule module, SerializationSupport& sdata, bool preloading);
    ~ComputePipeline();

 private:
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -41,12 +41,12 @@ GraphicsPipeline::GraphicsPipeline(
    vk::PipelineCache pipeline_cache, std::span<const Shader::Info*, MaxShaderStages> infos,
    std::span<const Shader::RuntimeInfo, MaxShaderStages> runtime_infos,
    std::optional<const Shader::Gcn::FetchShaderData> fetch_shader_,
-    std::span<const vk::ShaderModule> modules)
+    std::span<const vk::ShaderModule> modules, SerializationSupport& sdata, bool preloading)
    : Pipeline{instance, scheduler, desc_heap, profile, pipeline_cache}, key{key_},
      fetch_shader{std::move(fetch_shader_)} {
    const vk::Device device = instance.GetDevice();
    std::ranges::copy(infos, stages.begin());
-    BuildDescSetLayout();
+    BuildDescSetLayout(preloading);
    const auto debug_str = GetDebugString();

    const vk::PushConstantRange push_constants = {
@@ -68,27 +68,26 @@ GraphicsPipeline::GraphicsPipeline(
    pipeline_layout = std::move(layout);
    SetObjectName(device, *pipeline_layout, "Graphics PipelineLayout {}", debug_str);

-    VertexInputs<vk::VertexInputAttributeDescription> vertex_attributes;
-    VertexInputs<vk::VertexInputBindingDescription> vertex_bindings;
-    VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT> divisors;
-    VertexInputs<AmdGpu::Buffer> guest_buffers;
-    if (!instance.IsVertexInputDynamicState()) {
-        const auto& vs_info = runtime_infos[u32(Shader::LogicalStage::Vertex)].vs_info;
-        GetVertexInputs(vertex_attributes, vertex_bindings, divisors, guest_buffers,
-                        vs_info.step_rate_0, vs_info.step_rate_1);
+    if (!preloading) {
+        VertexInputs<AmdGpu::Buffer> guest_buffers;
+        if (!instance.IsVertexInputDynamicState()) {
+            const auto& vs_info = runtime_infos[u32(Shader::LogicalStage::Vertex)].vs_info;
+            GetVertexInputs(sdata.vertex_attributes, sdata.vertex_bindings, sdata.divisors,
+                            guest_buffers, vs_info.step_rate_0, vs_info.step_rate_1);
+        }
    }

    const vk::PipelineVertexInputDivisorStateCreateInfo divisor_state = {
-        .vertexBindingDivisorCount = static_cast<u32>(divisors.size()),
-        .pVertexBindingDivisors = divisors.data(),
+        .vertexBindingDivisorCount = static_cast<u32>(sdata.divisors.size()),
+        .pVertexBindingDivisors = sdata.divisors.data(),
    };

    const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
-        .pNext = divisors.empty() ? nullptr : &divisor_state,
-        .vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size()),
-        .pVertexBindingDescriptions = vertex_bindings.data(),
-        .vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()),
-        .pVertexAttributeDescriptions = vertex_attributes.data(),
+        .pNext = sdata.divisors.empty() ? nullptr : &divisor_state,
+        .vertexBindingDescriptionCount = static_cast<u32>(sdata.vertex_bindings.size()),
+        .pVertexBindingDescriptions = sdata.vertex_bindings.data(),
+        .vertexAttributeDescriptionCount = static_cast<u32>(sdata.vertex_attributes.size()),
+        .pVertexAttributeDescriptions = sdata.vertex_attributes.data(),
    };

    const auto topology = LiverpoolToVK::PrimitiveType(key.prim_type);
@@ -98,7 +97,6 @@ GraphicsPipeline::GraphicsPipeline(

    const bool is_rect_list = key.prim_type == AmdGpu::PrimitiveType::RectList;
    const bool is_quad_list = key.prim_type == AmdGpu::PrimitiveType::QuadList;
-    const auto& fs_info = runtime_infos[u32(Shader::LogicalStage::Fragment)].fs_info;
    const vk::PipelineTessellationStateCreateInfo tessellation_state = {
        .patchControlPoints = is_rect_list ? 3U : (is_quad_list ? 4U : key.patch_control_points),
    };
@@ -128,12 +126,15 @@ GraphicsPipeline::GraphicsPipeline(
        raster_chain.unlink<vk::PipelineRasterizationDepthClipStateCreateInfoEXT>();
    }

-    const vk::PipelineMultisampleStateCreateInfo multisampling = {
-        .rasterizationSamples = LiverpoolToVK::NumSamples(
-            key.num_samples, instance.GetColorSampleCounts() & instance.GetDepthSampleCounts()),
-        .sampleShadingEnable =
-            fs_info.addr_flags.persp_sample_ena || fs_info.addr_flags.linear_sample_ena,
-    };
+    if (!preloading) {
+        const auto& fs_info = runtime_infos[u32(Shader::LogicalStage::Fragment)].fs_info;
+        sdata.multisampling = {
+            .rasterizationSamples = LiverpoolToVK::NumSamples(
+                key.num_samples, instance.GetColorSampleCounts() & instance.GetDepthSampleCounts()),
+            .sampleShadingEnable =
+                fs_info.addr_flags.persp_sample_ena || fs_info.addr_flags.linear_sample_ena,
+        };
+    }

    const vk::PipelineViewportDepthClipControlCreateInfoEXT clip_control = {
        .negativeOneToOne = key.clip_space == AmdGpu::ClipSpace::MinusWToW,
@@ -164,7 +165,7 @@ GraphicsPipeline::GraphicsPipeline(
    }
    if (instance.IsVertexInputDynamicState()) {
        dynamic_states.push_back(vk::DynamicState::eVertexInputEXT);
-    } else if (!vertex_bindings.empty()) {
+    } else if (!sdata.vertex_bindings.empty()) {
        dynamic_states.push_back(vk::DynamicState::eVertexInputBindingStride);
    }

@@ -200,10 +201,13 @@ GraphicsPipeline::GraphicsPipeline(
        });
    } else if (is_rect_list || is_quad_list) {
        const auto type = is_quad_list ? AuxShaderType::QuadListTCS : AuxShaderType::RectListTCS;
-        auto tcs = Shader::Backend::SPIRV::EmitAuxilaryTessShader(type, fs_info);
+        if (!preloading) {
+            const auto& fs_info = runtime_infos[u32(Shader::LogicalStage::Fragment)].fs_info;
+            sdata.tcs = Shader::Backend::SPIRV::EmitAuxilaryTessShader(type, fs_info);
+        }
        shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
            .stage = vk::ShaderStageFlagBits::eTessellationControl,
-            .module = CompileSPV(tcs, instance.GetDevice()),
+            .module = CompileSPV(sdata.tcs, instance.GetDevice()),
            .pName = "main",
        });
    }
@@ -215,11 +219,14 @@ GraphicsPipeline::GraphicsPipeline(
            .pName = "main",
        });
    } else if (is_rect_list || is_quad_list) {
-        auto tes =
-            Shader::Backend::SPIRV::EmitAuxilaryTessShader(AuxShaderType::PassthroughTES, fs_info);
+        if (!preloading) {
+            const auto& fs_info = runtime_infos[u32(Shader::LogicalStage::Fragment)].fs_info;
+            sdata.tes = Shader::Backend::SPIRV::EmitAuxilaryTessShader(
+                AuxShaderType::PassthroughTES, fs_info);
+        }
        shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
            .stage = vk::ShaderStageFlagBits::eTessellationEvaluation,
-            .module = CompileSPV(tes, instance.GetDevice()),
+            .module = CompileSPV(sdata.tes, instance.GetDevice()),
            .pName = "main",
        });
    }
@@ -360,7 +367,7 @@ GraphicsPipeline::GraphicsPipeline(
        .pTessellationState = &tessellation_state,
        .pViewportState = &viewport_info,
        .pRasterizationState = &raster_chain.get(),
-        .pMultisampleState = &multisampling,
+        .pMultisampleState = &sdata.multisampling,
        .pColorBlendState = &color_blending,
        .pDynamicState = &dynamic_info,
        .layout = *pipeline_layout,
@@ -428,7 +435,7 @@ template void GraphicsPipeline::GetVertexInputs(
    VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT>& divisors,
    VertexInputs<AmdGpu::Buffer>& guest_buffers, u32 step_rate_0, u32 step_rate_1) const;

-void GraphicsPipeline::BuildDescSetLayout() {
+void GraphicsPipeline::BuildDescSetLayout(bool preloading) {
    boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
    u32 binding{};

@@ -438,7 +445,9 @@ void GraphicsPipeline::BuildDescSetLayout() {
        }
        const auto stage_bit = LogicalStageToStageBit[u32(stage->l_stage)];
        for (const auto& buffer : stage->buffers) {
-            const auto sharp = buffer.GetSharp(*stage);
+            const auto sharp =
+                preloading ? AmdGpu::Buffer{}
+                           : buffer.GetSharp(*stage); // See for the comment in compute PL creation
            bindings.push_back({
                .binding = binding++,
                .descriptorType = buffer.IsStorage(sharp) ? vk::DescriptorType::eStorageBuffer
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -63,17 +63,33 @@ struct GraphicsPipelineKey {
    bool operator==(const GraphicsPipelineKey& key) const noexcept {
        return std::memcmp(this, &key, sizeof(key)) == 0;
    }
+
+    void Serialize(Serialization::Archive& ar) const;
+    bool Deserialize(Serialization::Archive& ar);
 };

 class GraphicsPipeline : public Pipeline {
 public:
+    struct SerializationSupport {
+        VertexInputs<vk::VertexInputAttributeDescription> vertex_attributes{};
+        VertexInputs<vk::VertexInputBindingDescription> vertex_bindings{};
+        VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT> divisors{};
+        vk::PipelineMultisampleStateCreateInfo multisampling{};
+        std::vector<u32> tcs{};
+        std::vector<u32> tes{};
+
+        void Serialize(Serialization::Archive& ar) const;
+        bool Deserialize(Serialization::Archive& ar);
+    };
+
    GraphicsPipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap,
                     const Shader::Profile& profile, const GraphicsPipelineKey& key,
                     vk::PipelineCache pipeline_cache,
                     std::span<const Shader::Info*, MaxShaderStages> stages,
                     std::span<const Shader::RuntimeInfo, MaxShaderStages> runtime_infos,
                     std::optional<const Shader::Gcn::FetchShaderData> fetch_shader,
-                     std::span<const vk::ShaderModule> modules);
+                     std::span<const vk::ShaderModule> modules, SerializationSupport& sdata,
+                     bool preloading);
    ~GraphicsPipeline();

    const std::optional<const Shader::Gcn::FetchShaderData>& GetFetchShader() const noexcept {
@@ -92,7 +108,7 @@ public:
                         u32 step_rate_1) const;

 private:
-    void BuildDescSetLayout();
+    void BuildDescSetLayout(bool preloading);

 private:
    GraphicsPipelineKey key;
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -13,9 +13,10 @@
 #include "shader_recompiler/recompiler.h"
 #include "shader_recompiler/runtime_info.h"
 #include "video_core/amdgpu/liverpool.h"
+#include "video_core/cache_storage.h"
 #include "video_core/renderer_vulkan/liverpool_to_vk.h"
 #include "video_core/renderer_vulkan/vk_instance.h"
-#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_pipeline_serialization.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_shader_util.h"

@@ -223,6 +224,13 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
      desc_heap{instance, scheduler.GetMasterSemaphore(), DescriptorHeapSizes} {
    const auto& vk12_props = instance.GetVk12Properties();
    profile = Shader::Profile{
+        // When binding a UBO, we calculate its size considering the offset in the larger buffer
+        // cache underlying resource. In some cases, it may produce sizes exceeding the system
+        // maximum allowed UBO range, so we need to reduce the threshold to prevent issues.
+        .max_ubo_size = instance.UniformMaxSize() - instance.UniformMinAlignment(),
+        .max_viewport_width = instance.GetMaxViewportWidth(),
+        .max_viewport_height = instance.GetMaxViewportHeight(),
+        .max_shared_memory_size = instance.MaxComputeSharedMemorySize(),
        .supported_spirv = SpirvVersion1_6,
        .subgroup_size = instance.SubgroupSize(),
        .support_int8 = instance.IsShaderInt8Supported(),
@@ -258,14 +266,10 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
                              instance.GetDriverID() == vk::DriverId::eMoltenvk,
        .needs_buffer_offsets = instance.StorageMinAlignment() > 4,
        .needs_unorm_fixup = instance.GetDriverID() == vk::DriverId::eMoltenvk,
-        // When binding a UBO, we calculate its size considering the offset in the larger buffer
-        // cache underlying resource. In some cases, it may produce sizes exceeding the system
-        // maximum allowed UBO range, so we need to reduce the threshold to prevent issues.
-        .max_ubo_size = instance.UniformMaxSize() - instance.UniformMinAlignment(),
-        .max_viewport_width = instance.GetMaxViewportWidth(),
-        .max_viewport_height = instance.GetMaxViewportHeight(),
-        .max_shared_memory_size = instance.MaxComputeSharedMemorySize(),
    };
+
+    WarmUp();
+
    auto [cache_result, cache] = instance.GetDevice().createPipelineCacheUnique({});
    ASSERT_MSG(cache_result == vk::Result::eSuccess, "Failed to create pipeline cache: {}",
               vk::to_string(cache_result));
@@ -283,9 +287,14 @@ const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() {
        const auto pipeline_hash = std::hash<GraphicsPipelineKey>{}(graphics_key);
        LOG_INFO(Render_Vulkan, "Compiling graphics pipeline {:#x}", pipeline_hash);

-        it.value() = std::make_unique<GraphicsPipeline>(instance, scheduler, desc_heap, profile,
-                                                        graphics_key, *pipeline_cache, infos,
-                                                        runtime_infos, fetch_shader, modules);
+        GraphicsPipeline::SerializationSupport sdata{};
+        it.value() = std::make_unique<GraphicsPipeline>(
+            instance, scheduler, desc_heap, profile, graphics_key, *pipeline_cache, infos,
+            runtime_infos, fetch_shader, modules, sdata, false);
+
+        RegisterPipelineData(graphics_key, pipeline_hash, sdata);
+        ++num_new_pipelines;
+
        if (Config::collectShadersForDebug()) {
            for (auto stage = 0; stage < MaxShaderStages; ++stage) {
                if (infos[stage]) {
@@ -294,6 +303,7 @@ const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() {
                }
            }
        }
+        fetch_shader.reset();
    }
    return it->second.get();
 }
@@ -307,9 +317,13 @@ const ComputePipeline* PipelineCache::GetComputePipeline() {
        const auto pipeline_hash = std::hash<ComputePipelineKey>{}(compute_key);
        LOG_INFO(Render_Vulkan, "Compiling compute pipeline {:#x}", pipeline_hash);

-        it.value() =
-            std::make_unique<ComputePipeline>(instance, scheduler, desc_heap, profile,
-                                              *pipeline_cache, compute_key, *infos[0], modules[0]);
+        ComputePipeline::SerializationSupport sdata{};
+        it.value() = std::make_unique<ComputePipeline>(instance, scheduler, desc_heap, profile,
+                                                       *pipeline_cache, compute_key, *infos[0],
+                                                       modules[0], sdata, false);
+        RegisterPipelineData(compute_key, sdata);
+        ++num_new_pipelines;
+
        if (Config::collectShadersForDebug()) {
            auto& m = modules[0];
            module_related_pipelines[m].emplace_back(compute_key);
@@ -445,6 +459,7 @@ bool PipelineCache::RefreshGraphicsStages() {
    };

    infos.fill(nullptr);
+    modules.fill(nullptr);
    bind_stage(Stage::Fragment, LogicalStage::Fragment);

    const auto* fs_info = infos[static_cast<u32>(LogicalStage::Fragment)];
@@ -515,7 +530,7 @@ bool PipelineCache::RefreshComputeKey() {
 }

 vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, Shader::RuntimeInfo& runtime_info,
-                                              std::span<const u32> code, size_t perm_idx,
+                                              const std::span<const u32>& code, size_t perm_idx,
                                              Shader::Backend::Bindings& binding) {
    LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash,
             perm_idx != 0 ? "(permutation)" : "");
@@ -536,6 +551,8 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, Shader::Runtim
        module = CompileSPV(spv, instance.GetDevice());
    }

+    RegisterShaderBinary(std::move(spv), info.pgm_hash, perm_idx);
+
    const auto name = GetShaderName(info.stage, info.pgm_hash, perm_idx);
    Vulkan::SetObjectName(instance.GetDevice(), module, name);
    if (Config::collectShadersForDebug()) {
@@ -546,7 +563,7 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, Shader::Runtim
 }

 PipelineCache::Result PipelineCache::GetProgram(Stage stage, LogicalStage l_stage,
-                                                Shader::ShaderParams params,
+                                                const Shader::ShaderParams& params,
                                                Shader::Backend::Bindings& binding) {
    auto runtime_info = BuildRuntimeInfo(stage, l_stage);
    auto [it_pgm, new_program] = program_cache.try_emplace(params.hash);
@@ -555,32 +572,42 @@ PipelineCache::Result PipelineCache::GetProgram(Stage stage, LogicalStage l_stag
        auto& program = it_pgm.value();
        auto start = binding;
        const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding);
-        const auto spec = Shader::StageSpecialization(program->info, runtime_info, profile, start);
+        auto spec = Shader::StageSpecialization(program->info, runtime_info, profile, start);
+        const auto perm_hash = HashCombine(params.hash, 0);
+
+        RegisterShaderMeta(program->info, spec.fetch_shader_data, spec, perm_hash, 0);
        program->AddPermut(module, std::move(spec));
-        return std::make_tuple(&program->info, module, spec.fetch_shader_data,
-                               HashCombine(params.hash, 0));
+        return std::make_tuple(&program->info, module, program->modules[0].spec.fetch_shader_data,
+                               perm_hash);
    }
-    it_pgm.value()->info.user_data = params.user_data;

    auto& program = it_pgm.value();
    auto& info = program->info;
+    info.pgm_base = params.Base(); // Needs to be actualized for inline cbuffer address fixup
+    info.user_data = params.user_data;
    info.RefreshFlatBuf();
-    const auto spec = Shader::StageSpecialization(info, runtime_info, profile, binding);
+    auto spec = Shader::StageSpecialization(info, runtime_info, profile, binding);
+
    size_t perm_idx = program->modules.size();
+    u64 perm_hash = HashCombine(params.hash, perm_idx);
+
    vk::ShaderModule module{};

    const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec);
    if (it == program->modules.end()) {
        auto new_info = Shader::Info(stage, l_stage, params);
        module = CompileModule(new_info, runtime_info, params.code, perm_idx, binding);
+
+        RegisterShaderMeta(info, spec.fetch_shader_data, spec, perm_hash, perm_idx);
        program->AddPermut(module, std::move(spec));
    } else {
        info.AddBindings(binding);
        module = it->module;
        perm_idx = std::distance(program->modules.begin(), it);
+        perm_hash = HashCombine(params.hash, perm_idx);
    }
-    return std::make_tuple(&info, module, spec.fetch_shader_data,
-                           HashCombine(params.hash, perm_idx));
+    return std::make_tuple(&program->info, module,
+                           program->modules[perm_idx].spec.fetch_shader_data, perm_hash);
 }

 std::optional<vk::ShaderModule> PipelineCache::ReplaceShader(vk::ShaderModule module,
@@ -654,5 +681,4 @@ std::optional<std::vector<u32>> PipelineCache::GetShaderPatch(u64 hash, Shader::
    file.Read(code);
    return code;
 }
-
 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -23,6 +23,10 @@ namespace AmdGpu {
 class Liverpool;
 }

+namespace Serialization {
+struct Archive;
+}
+
 namespace Shader {
 struct Info;
 }
@@ -38,17 +42,25 @@ struct Program {
        vk::ShaderModule module;
        Shader::StageSpecialization spec;
    };
-    using ModuleList = boost::container::small_vector<Module, 8>;
+    static constexpr size_t MaxPermutations = 8;
+    using ModuleList = boost::container::small_vector<Module, MaxPermutations>;

    Shader::Info info;
-    ModuleList modules;
+    ModuleList modules{};

-    explicit Program(Shader::Stage stage, Shader::LogicalStage l_stage, Shader::ShaderParams params)
+    Program() = default;
+    Program(Shader::Stage stage, Shader::LogicalStage l_stage, Shader::ShaderParams params)
        : info{stage, l_stage, params} {}

-    void AddPermut(vk::ShaderModule module, const Shader::StageSpecialization&& spec) {
+    void AddPermut(vk::ShaderModule module, Shader::StageSpecialization&& spec) {
        modules.emplace_back(module, std::move(spec));
    }
+
+    void InsertPermut(vk::ShaderModule module, Shader::StageSpecialization&& spec,
+                      size_t perm_idx) {
+        modules.resize(std::max(modules.size(), perm_idx + 1)); // <-- beware of realloc
+        modules[perm_idx] = {module, std::move(spec)};
+    }
 };

 class PipelineCache {
@@ -57,6 +69,13 @@ public:
                           AmdGpu::Liverpool* liverpool);
    ~PipelineCache();

+    void WarmUp();
+    void Sync();
+
+    bool LoadComputePipeline(Serialization::Archive& ar);
+    bool LoadGraphicsPipeline(Serialization::Archive& ar);
+    bool LoadPipelineStage(Serialization::Archive& ar, size_t stage);
+
    const GraphicsPipeline* GetGraphicsPipeline();

    const ComputePipeline* GetComputePipeline();
@@ -64,7 +83,7 @@ public:
    using Result = std::tuple<const Shader::Info*, vk::ShaderModule,
                              std::optional<Shader::Gcn::FetchShaderData>, u64>;
    Result GetProgram(Shader::Stage stage, Shader::LogicalStage l_stage,
-                      Shader::ShaderParams params, Shader::Backend::Bindings& binding);
+                      const Shader::ShaderParams& params, Shader::Backend::Bindings& binding);

    std::optional<vk::ShaderModule> ReplaceShader(vk::ShaderModule module,
                                                  std::span<const u32> spv_code);
@@ -86,10 +105,14 @@ private:
    std::optional<std::vector<u32>> GetShaderPatch(u64 hash, Shader::Stage stage, size_t perm_idx,
                                                   std::string_view ext);
    vk::ShaderModule CompileModule(Shader::Info& info, Shader::RuntimeInfo& runtime_info,
-                                   std::span<const u32> code, size_t perm_idx,
+                                   const std::span<const u32>& code, size_t perm_idx,
                                   Shader::Backend::Bindings& binding);
    const Shader::RuntimeInfo& BuildRuntimeInfo(Shader::Stage stage, Shader::LogicalStage l_stage);

+    [[nodiscard]] bool IsPipelineCacheDirty() const {
+        return num_new_pipelines > 0;
+    }
+
 private:
    const Instance& instance;
    Scheduler& scheduler;
@@ -108,6 +131,7 @@ private:
    std::optional<Shader::Gcn::FetchShaderData> fetch_shader{};
    GraphicsPipelineKey graphics_key{};
    ComputePipelineKey compute_key{};
+    u32 num_new_pipelines{}; // new pipelines added to the cache since the game start

    // Only if Config::collectShadersForDebug()
    tsl::robin_map<vk::ShaderModule,
--- a/src/video_core/renderer_vulkan/vk_pipeline_serialization.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_serialization.cpp
@@ -0,0 +1,480 @@
+// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "common/config.h"
+#include "common/serdes.h"
+#include "shader_recompiler/frontend/fetch_shader.h"
+#include "shader_recompiler/info.h"
+#include "video_core/cache_storage.h"
+#include "video_core/renderer_vulkan/vk_instance.h"
+#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_shader_util.h"
+
+namespace Serialization {
+/* You should increment versions below once corresponding serialization scheme is changed. */
+static constexpr u32 ShaderBinaryVersion = 1u;
+static constexpr u32 ShaderMetaVersion = 1u;
+static constexpr u32 PipelineKeyVersion = 1u;
+} // namespace Serialization
+
+namespace Vulkan {
+
+void RegisterPipelineData(const ComputePipelineKey& key,
+                          ComputePipeline::SerializationSupport& sdata) {
+    if (!Storage::DataBase::Instance().IsOpened()) {
+        return;
+    }
+
+    Serialization::Archive ar{};
+    Serialization::Writer pldata{ar};
+
+    pldata.Write(Serialization::PipelineKeyVersion);
+    pldata.Write(u32{1}); // compute
+
+    key.Serialize(ar);
+    sdata.Serialize(ar);
+
+    Storage::DataBase::Instance().Save(Storage::BlobType::PipelineKey,
+                                       fmt::format("c_{:#018x}", key.value), ar.TakeOff());
+}
+
+void RegisterPipelineData(const GraphicsPipelineKey& key, u64 hash,
+                          GraphicsPipeline::SerializationSupport& sdata) {
+    if (!Storage::DataBase::Instance().IsOpened()) {
+        return;
+    }
+
+    Serialization::Archive ar{};
+    Serialization::Writer pldata{ar};
+
+    pldata.Write(Serialization::PipelineKeyVersion);
+    pldata.Write(u32{0}); // graphics
+
+    key.Serialize(ar);
+    sdata.Serialize(ar);
+
+    Storage::DataBase::Instance().Save(Storage::BlobType::PipelineKey,
+                                       fmt::format("g_{:#018x}", hash), ar.TakeOff());
+}
+
+void RegisterShaderMeta(const Shader::Info& info,
+                        const std::optional<Shader::Gcn::FetchShaderData>& fetch_shader_data,
+                        const Shader::StageSpecialization& spec, size_t perm_hash,
+                        size_t perm_idx) {
+    if (!Storage::DataBase::Instance().IsOpened()) {
+        return;
+    }
+
+    Serialization::Archive ar;
+    Serialization::Writer meta{ar};
+
+    meta.Write(Serialization::ShaderMetaVersion);
+    meta.Write(Serialization::ShaderBinaryVersion);
+
+    meta.Write(perm_hash);
+    meta.Write(perm_idx);
+
+    spec.Serialize(ar);
+    info.Serialize(ar);
+
+    Storage::DataBase::Instance().Save(Storage::BlobType::ShaderMeta,
+                                       fmt::format("{:#018x}", perm_hash), ar.TakeOff());
+}
+
+void RegisterShaderBinary(std::vector<u32>&& spv, u64 pgm_hash, size_t perm_idx) {
+    if (!Storage::DataBase::Instance().IsOpened()) {
+        return;
+    }
+
+    Storage::DataBase::Instance().Save(Storage::BlobType::ShaderBinary,
+                                       fmt::format("{:#018x}_{}", pgm_hash, perm_idx),
+                                       std::move(spv));
+}
+
+bool LoadShaderMeta(Serialization::Archive& ar, Shader::Info& info,
+                    std::optional<Shader::Gcn::FetchShaderData>& fetch_shader_data,
+                    Shader::StageSpecialization& spec, size_t& perm_idx) {
+    Serialization::Reader meta{ar};
+
+    u32 meta_version{};
+    meta.Read(meta_version);
+    if (meta_version != Serialization::ShaderMetaVersion) {
+        return false;
+    }
+
+    u32 binary_version{};
+    meta.Read(binary_version);
+    if (binary_version != Serialization::ShaderBinaryVersion) {
+        return false;
+    }
+
+    u64 perm_hash_ar{};
+    meta.Read(perm_hash_ar);
+    meta.Read(perm_idx);
+
+    spec.Deserialize(ar);
+    info.Deserialize(ar);
+
+    fetch_shader_data = spec.fetch_shader_data;
+    return true;
+}
+
+void ComputePipelineKey::Serialize(Serialization::Archive& ar) const {
+    Serialization::Writer key{ar};
+    key.Write(value);
+}
+
+bool ComputePipelineKey::Deserialize(Serialization::Archive& ar) {
+    Serialization::Reader key{ar};
+    key.Read(value);
+    return true;
+}
+
+void ComputePipeline::SerializationSupport::Serialize(Serialization::Archive& ar) const {
+    // Nothing here yet
+    return;
+}
+
+bool ComputePipeline::SerializationSupport::Deserialize(Serialization::Archive& ar) {
+    // Nothing here yet
+    return true;
+}
+
+bool PipelineCache::LoadComputePipeline(Serialization::Archive& ar) {
+    compute_key.Deserialize(ar);
+
+    ComputePipeline::SerializationSupport sdata{};
+    sdata.Deserialize(ar);
+
+    std::vector<u8> meta_blob;
+    Storage::DataBase::Instance().Load(Storage::BlobType::ShaderMeta,
+                                       fmt::format("{:#018x}", compute_key.value), meta_blob);
+    if (meta_blob.empty()) {
+        return false;
+    }
+
+    Serialization::Archive meta_ar{std::move(meta_blob)};
+
+    if (!LoadPipelineStage(meta_ar, 0)) {
+        return false;
+    }
+
+    const auto [it, is_new] = compute_pipelines.try_emplace(compute_key);
+    ASSERT(is_new);
+
+    it.value() =
+        std::make_unique<ComputePipeline>(instance, scheduler, desc_heap, profile, *pipeline_cache,
+                                          compute_key, *infos[0], modules[0], sdata, true);
+
+    infos.fill(nullptr);
+    modules.fill(nullptr);
+
+    return true;
+}
+
+void GraphicsPipelineKey::Serialize(Serialization::Archive& ar) const {
+    Serialization::Writer key{ar};
+
+    key.Write(this, sizeof(*this));
+}
+
+bool GraphicsPipelineKey::Deserialize(Serialization::Archive& ar) {
+    Serialization::Reader key{ar};
+
+    key.Read(this, sizeof(*this));
+    return true;
+}
+
+void GraphicsPipeline::SerializationSupport::Serialize(Serialization::Archive& ar) const {
+    Serialization::Writer sdata{ar};
+
+    sdata.Write(&vertex_attributes, sizeof(vertex_attributes));
+    sdata.Write(&vertex_bindings, sizeof(vertex_bindings));
+    sdata.Write(&divisors, sizeof(divisors));
+    sdata.Write(multisampling);
+    sdata.Write(tcs);
+    sdata.Write(tes);
+}
+
+bool GraphicsPipeline::SerializationSupport::Deserialize(Serialization::Archive& ar) {
+    Serialization::Reader sdata{ar};
+
+    sdata.Read(&vertex_attributes, sizeof(vertex_attributes));
+    sdata.Read(&vertex_bindings, sizeof(vertex_bindings));
+    sdata.Read(&divisors, sizeof(divisors));
+    sdata.Read(multisampling);
+    sdata.Read(tcs);
+    sdata.Read(tes);
+    return true;
+}
+
+bool PipelineCache::LoadGraphicsPipeline(Serialization::Archive& ar) {
+    graphics_key.Deserialize(ar);
+
+    GraphicsPipeline::SerializationSupport sdata{};
+    sdata.Deserialize(ar);
+
+    for (int stage_idx = 0; stage_idx < MaxShaderStages; ++stage_idx) {
+        const auto& hash = graphics_key.stage_hashes[stage_idx];
+        if (!hash) {
+            continue;
+        }
+
+        std::vector<u8> meta_blob;
+        Storage::DataBase::Instance().Load(Storage::BlobType::ShaderMeta,
+                                           fmt::format("{:#018x}", hash), meta_blob);
+        if (meta_blob.empty()) {
+            return false;
+        }
+
+        Serialization::Archive meta_ar{std::move(meta_blob)};
+
+        if (!LoadPipelineStage(meta_ar, stage_idx)) {
+            return false;
+        }
+    }
+
+    const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key);
+    ASSERT(is_new);
+
+    it.value() = std::make_unique<GraphicsPipeline>(
+        instance, scheduler, desc_heap, profile, graphics_key, *pipeline_cache, infos,
+        runtime_infos, fetch_shader, modules, sdata, true);
+
+    infos.fill(nullptr);
+    modules.fill(nullptr);
+    fetch_shader.reset();
+
+    return true;
+}
+
+bool PipelineCache::LoadPipelineStage(Serialization::Archive& ar, size_t stage) {
+    auto program = std::make_unique<Program>();
+    Shader::StageSpecialization spec{};
+    spec.info = &program->info;
+    size_t perm_idx{};
+    if (!LoadShaderMeta(ar, program->info, fetch_shader, spec, perm_idx)) {
+        return false;
+    }
+
+    std::vector<u32> spv{};
+    Storage::DataBase::Instance().Load(Storage::BlobType::ShaderBinary,
+                                       fmt::format("{:#018x}_{}", program->info.pgm_hash, perm_idx),
+                                       spv);
+    if (spv.empty()) {
+        return false;
+    }
+
+    // Permutation hash depends on shader variation index. To prevent collisions, we need insert it
+    // at the exact position rather than append
+
+    vk::ShaderModule module{};
+
+    auto [it_pgm, new_program] = program_cache.try_emplace(program->info.pgm_hash);
+    if (new_program) {
+        module = CompileSPV(spv, instance.GetDevice());
+        it_pgm.value() = std::move(program);
+    } else {
+        const auto& it = std::ranges::find(it_pgm.value()->modules, spec, &Program::Module::spec);
+        if (it != it_pgm.value()->modules.end()) {
+            // If the permutation is already preloaded, make sure it has the same permutation index
+            const auto idx = std::distance(it_pgm.value()->modules.begin(), it);
+            ASSERT_MSG(perm_idx == idx, "Permutation {} is already inserted at {}! ({}_{:x})",
+                       perm_idx, idx, program->info.stage, program->info.pgm_hash);
+            module = it->module;
+        } else {
+            module = CompileSPV(spv, instance.GetDevice());
+        }
+    }
+    it_pgm.value()->InsertPermut(module, std::move(spec), perm_idx);
+
+    infos[stage] = &it_pgm.value()->info;
+    modules[stage] = module;
+
+    return true;
+}
+
+void PipelineCache::WarmUp() {
+    if (!Config::isPipelineCacheEnabled()) {
+        return;
+    }
+
+    Storage::DataBase::Instance().Open();
+
+    // Check if cache is compatible
+    std::vector<u8> profile_data{};
+    Storage::DataBase::Instance().Load(Storage::BlobType::ShaderProfile, "profile", profile_data);
+    if (profile_data.empty()) {
+        Storage::DataBase::Instance().FinishPreload();
+
+        profile_data.resize(sizeof(profile));
+        std::memcpy(profile_data.data(), &profile, sizeof(profile));
+        Storage::DataBase::Instance().Save(Storage::BlobType::ShaderProfile, "profile",
+                                           std::move(profile_data));
+        return;
+    }
+    if (std::memcmp(profile_data.data(), &profile, sizeof(profile)) != 0) {
+        LOG_WARNING(Render,
+                    "Pipeline cache isn't compatible with current system. Ignoring the cache");
+        return;
+    }
+
+    u32 num_pipelines{};
+    u32 num_total_pipelines{};
+
+    Storage::DataBase::Instance().ForEachBlob(
+        Storage::BlobType::PipelineKey, [&](std::vector<u8>&& data) {
+            ++num_total_pipelines;
+
+            Serialization::Archive ar{std::move(data)};
+            Serialization::Reader pldata{ar};
+
+            u32 version{};
+            pldata.Read(version);
+            if (version != Serialization::PipelineKeyVersion) {
+                return;
+            }
+
+            u32 is_compute{};
+            pldata.Read(is_compute);
+
+            bool result{};
+            if (is_compute) {
+                result = LoadComputePipeline(ar);
+            } else {
+                result = LoadGraphicsPipeline(ar);
+            }
+
+            if (result) {
+                ++num_pipelines;
+            }
+        });
+
+    LOG_INFO(Render, "Preloaded {} pipelines", num_pipelines);
+    if (num_total_pipelines > num_pipelines) {
+        LOG_WARNING(Render, "{} stale pipelines were found. Consider re-generating the cache",
+                    num_total_pipelines - num_pipelines);
+    }
+
+    Storage::DataBase::Instance().FinishPreload();
+}
+
+void PipelineCache::Sync() {
+    Storage::DataBase::Instance().Close();
+}
+
+} // namespace Vulkan
+
+namespace Shader {
+
+void Info::Serialize(Serialization::Archive& ar) const {
+    Serialization::Writer info{ar};
+
+    info.Write(this, sizeof(InfoPersistent));
+    info.Write(flattened_ud_buf);
+    srt_info.Serialize(ar);
+}
+
+bool Info::Deserialize(Serialization::Archive& ar) {
+    Serialization::Reader info{ar};
+
+    info.Read(this, sizeof(Shader::InfoPersistent));
+    info.Read(flattened_ud_buf);
+
+    return srt_info.Deserialize(ar);
+}
+
+void Gcn::FetchShaderData::Serialize(Serialization::Archive& ar) const {
+    Serialization::Writer fetch{ar};
+    ar.Grow(6 + attributes.size() * sizeof(VertexAttribute));
+
+    fetch.Write(size);
+    fetch.Write(vertex_offset_sgpr);
+    fetch.Write(instance_offset_sgpr);
+    fetch.Write(attributes);
+}
+
+bool Gcn::FetchShaderData::Deserialize(Serialization::Archive& ar) {
+    Serialization::Reader fetch{ar};
+
+    fetch.Read(size);
+    fetch.Read(vertex_offset_sgpr);
+    fetch.Read(instance_offset_sgpr);
+    fetch.Read(attributes);
+
+    return true;
+}
+
+void PersistentSrtInfo::Serialize(Serialization::Archive& ar) const {
+    Serialization::Writer srt{ar};
+
+    srt.Write(this, sizeof(*this));
+    if (walker_func_size) {
+        srt.Write(reinterpret_cast<void*>(walker_func), walker_func_size);
+    }
+}
+
+bool PersistentSrtInfo::Deserialize(Serialization::Archive& ar) {
+    Serialization::Reader srt{ar};
+
+    srt.Read(this, sizeof(*this));
+
+    if (walker_func_size) {
+        walker_func = RegisterWalkerCode(ar.CurrPtr(), walker_func_size);
+        ar.Advance(walker_func_size);
+    }
+
+    return true;
+}
+
+void StageSpecialization::Serialize(Serialization::Archive& ar) const {
+    Serialization::Writer spec{ar};
+
+    spec.Write(start);
+    spec.Write(runtime_info);
+
+    spec.Write(bitset.to_string());
+
+    if (fetch_shader_data) {
+        spec.Write(sizeof(*fetch_shader_data));
+        fetch_shader_data->Serialize(ar);
+    } else {
+        spec.Write(size_t{0});
+    }
+
+    spec.Write(vs_attribs);
+    spec.Write(buffers);
+    spec.Write(images);
+    spec.Write(fmasks);
+    spec.Write(samplers);
+}
+
+bool StageSpecialization::Deserialize(Serialization::Archive& ar) {
+    Serialization::Reader spec{ar};
+
+    spec.Read(start);
+    spec.Read(runtime_info);
+
+    std::string bits{};
+    spec.Read(bits);
+    bitset = std::bitset<MaxStageResources>(bits);
+
+    u64 fetch_data_size{};
+    spec.Read(fetch_data_size);
+
+    if (fetch_data_size) {
+        Gcn::FetchShaderData fetch_data;
+        fetch_data.Deserialize(ar);
+        fetch_shader_data = fetch_data;
+    }
+
+    spec.Read(vs_attribs);
+    spec.Read(buffers);
+    spec.Read(images);
+    spec.Read(fmasks);
+    spec.Read(samplers);
+
+    return true;
+}
+
+} // namespace Shader
--- a/src/video_core/renderer_vulkan/vk_pipeline_serialization.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_serialization.h
@@ -0,0 +1,21 @@
+// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include "shader_recompiler/frontend/fetch_shader.h"
+#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_shader_util.h"
+
+namespace Vulkan {
+
+void RegisterPipelineData(const ComputePipelineKey& key,
+                          ComputePipeline::SerializationSupport& sdata);
+void RegisterPipelineData(const GraphicsPipelineKey& key, u64 hash,
+                          GraphicsPipeline::SerializationSupport& sdata);
+void RegisterShaderMeta(const Shader::Info& info,
+                        const std::optional<Shader::Gcn::FetchShaderData>& fetch_shader_data,
+                        const Shader::StageSpecialization& spec, size_t perm_hash, size_t perm_idx);
+void RegisterShaderBinary(std::vector<u32>&& spv, u64 pgm_hash, size_t perm_idx);
+
+} // namespace Vulkan