diff --git a/.gitmodules b/.gitmodules index 25b5d307b..b2d65f32d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -106,3 +106,6 @@ [submodule "externals/libusb"] path = externals/libusb url = https://github.com/libusb/libusb-cmake.git +[submodule "externals/cereal"] + path = externals/cereal + url = https://github.com/USCiLab/cereal diff --git a/CMakeLists.txt b/CMakeLists.txt index 24a81243f..c572128e0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -244,6 +244,8 @@ find_package(ZLIB 1.3 MODULE) find_package(Zydis 5.0.0 CONFIG) find_package(pugixml 1.14 CONFIG) find_package(libusb 1.0.27 MODULE) +find_package(cereal 1.3.2 CONFIG) + if (APPLE) find_package(date 3.0.1 CONFIG) endif() @@ -693,6 +695,7 @@ set(COMMON src/common/logging/backend.cpp src/common/rdtsc.h src/common/recursive_lock.cpp src/common/recursive_lock.h + src/common/serialization.h src/common/sha1.h src/common/shared_first_mutex.h src/common/signal_context.h @@ -931,6 +934,9 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/buffer_cache/region_manager.h src/video_core/renderer_vulkan/liverpool_to_vk.cpp src/video_core/renderer_vulkan/liverpool_to_vk.h + src/video_core/renderer_vulkan/shader_cache.cpp + src/video_core/renderer_vulkan/shader_cache.h + src/video_core/renderer_vulkan/shader_cache_serialization.h src/video_core/renderer_vulkan/vk_common.cpp src/video_core/renderer_vulkan/vk_common.h src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -1117,7 +1123,7 @@ endif() create_target_directory_groups(shadps4) target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API FFmpeg::ffmpeg Dear_ImGui gcn half::half ZLIB::ZLIB PNG::PNG) -target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::glslang SDL3::SDL3 pugixml::pugixml stb::headers libusb::usb) +target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::glslang SDL3::SDL3 pugixml::pugixml stb::headers libusb::usb cereal::cereal) target_compile_definitions(shadps4 PRIVATE IMGUI_USER_CONFIG="imgui/imgui_config.h") target_compile_definitions(Dear_ImGui PRIVATE IMGUI_USER_CONFIG="${PROJECT_SOURCE_DIR}/src/imgui/imgui_config.h") diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 89b0fbfdd..ae52dae9d 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -216,6 +216,16 @@ if (NOT TARGET stb::headers) add_library(stb::headers ALIAS stb) endif() +# cereal +if (NOT TARGET cereal::cereal AND NOT APPLE) + set(SKIP_PERFORMANCE_COMPARISON ON "") + set(BUILD_SANDBOX OFF "") + set(BUILD_TESTS OFF "") + set(BUILD_DOC OFF "") + set(SKIP_PORTABILITY_TEST ON "") + add_subdirectory(cereal) +endif () + # Apple-only dependencies if (APPLE) # date diff --git a/externals/cereal b/externals/cereal new file mode 160000 index 000000000..a56bad8bb --- /dev/null +++ b/externals/cereal @@ -0,0 +1 @@ +Subproject commit a56bad8bbb770ee266e930c95d37fff2a5be7fea diff --git a/src/common/config.cpp b/src/common/config.cpp index a1b12ee5d..72569a99a 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -72,6 +72,7 @@ static bool readbackLinearImagesEnabled = false; static bool directMemoryAccessEnabled = false; static bool shouldDumpShaders = false; static bool shouldPatchShaders = false; +static bool shaderCachePreloadEnabled = false; static u32 vblankDivider = 1; static bool isFullscreen = false; static std::string fullscreenMode = "Windowed"; @@ -107,11 +108,12 @@ u32 m_language = 1; // english static std::string trophyKey = ""; // Expected number of items in the config file -static constexpr u64 total_entries = 54; +static constexpr u64 total_entries = 55; int getVolumeSlider() { return volumeSlider; } + bool allowHDR() { return isHDRAllowed; } @@ -297,6 +299,10 @@ bool patchShaders() { return shouldPatchShaders; } +bool getShaderCachePreloadEnabled() { + return shaderCachePreloadEnabled; +} + bool isRdocEnabled() { return rdocEnable; } @@ -413,6 +419,10 @@ void setDumpShaders(bool enable) { shouldDumpShaders = enable; } +void setShaderCachePreloadEnabled(bool enable) { + shaderCachePreloadEnabled = enable; +} + void setVkValidation(bool enable) { vkValidation = enable; } @@ -673,6 +683,8 @@ void load(const std::filesystem::path& path) { toml::find_or(gpu, "directMemoryAccess", directMemoryAccessEnabled); shouldDumpShaders = toml::find_or(gpu, "dumpShaders", shouldDumpShaders); shouldPatchShaders = toml::find_or(gpu, "patchShaders", shouldPatchShaders); + shaderCachePreloadEnabled = + toml::find_or(gpu, "shaderCachePreload", shaderCachePreloadEnabled); vblankDivider = toml::find_or(gpu, "vblankDivider", vblankDivider); isFullscreen = toml::find_or(gpu, "Fullscreen", isFullscreen); fullscreenMode = toml::find_or(gpu, "FullscreenMode", fullscreenMode); @@ -847,6 +859,7 @@ void save(const std::filesystem::path& path) { data["GPU"]["directMemoryAccess"] = directMemoryAccessEnabled; data["GPU"]["dumpShaders"] = shouldDumpShaders; data["GPU"]["patchShaders"] = shouldPatchShaders; + data["GPU"]["shaderCachePreload"] = shaderCachePreloadEnabled; data["GPU"]["vblankDivider"] = vblankDivider; data["GPU"]["Fullscreen"] = isFullscreen; data["GPU"]["FullscreenMode"] = fullscreenMode; @@ -951,6 +964,7 @@ void setDefaultValues() { directMemoryAccessEnabled = false; shouldDumpShaders = false; shouldPatchShaders = false; + shaderCachePreloadEnabled = false; vblankDivider = 1; isFullscreen = false; fullscreenMode = "Windowed"; @@ -1053,6 +1067,7 @@ analog_deadzone = rightjoystick, 2, 127 override_controller_color = false, 0, 0, 255 )"; } + std::filesystem::path GetFoolproofKbmConfigFile(const std::string& game_id) { // Read configuration file of the game, and if it doesn't exist, generate it from default // If that doesn't exist either, generate that from getDefaultConfig() and try again @@ -1089,4 +1104,4 @@ std::filesystem::path GetFoolproofKbmConfigFile(const std::string& game_id) { return config_file; } -} // namespace Config +} // namespace Config \ No newline at end of file diff --git a/src/common/config.h b/src/common/config.h index 4ace4d316..fee2fd3ac 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -58,6 +58,8 @@ bool directMemoryAccess(); void setDirectMemoryAccess(bool enable); bool dumpShaders(); void setDumpShaders(bool enable); +bool getShaderCachePreloadEnabled(); +void setShaderCachePreloadEnabled(); u32 vblankDiv(); void setVblankDiv(u32 value); bool getisTrophyPopupDisabled(); @@ -141,4 +143,4 @@ void setDefaultValues(); // todo: name and function location pending std::filesystem::path GetFoolproofKbmConfigFile(const std::string& game_id = ""); -}; // namespace Config +}; // namespace Config \ No newline at end of file diff --git a/src/common/hash.h b/src/common/hash.h index d5cacedd7..7f819fd4b 100644 --- a/src/common/hash.h +++ b/src/common/hash.h @@ -5,10 +5,10 @@ #include "common/types.h" -[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) { - return seed ^ (hash + 0x9e3779b9 + (seed << 12) + (seed >> 4)); -} +template +[[nodiscard]] constexpr u64 HashCombine(T1 seed, T2 hash) noexcept { + u64 s = static_cast(seed); + u64 h = static_cast(hash); -[[nodiscard]] inline u32 HashCombine(const u32 seed, const u32 hash) { - return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2)); -} \ No newline at end of file + return s ^ (h + 0x9e3779b9 + (s << 12) + (s >> 4)); +} diff --git a/src/common/serialization.h b/src/common/serialization.h new file mode 100644 index 000000000..efb50c889 --- /dev/null +++ b/src/common/serialization.h @@ -0,0 +1,30 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include + +#include + +namespace cereal { + +// boost::small_vector +template +void save(Archive& ar, boost::container::small_vector const& smallVector) { + ar(make_size_tag(static_cast(smallVector.size()))); + for (auto const& element : smallVector) + ar(element); +} + +template +void load(Archive& ar, boost::container::small_vector& smallVector) { + u32 elementCount; + ar(make_size_tag(elementCount)); + smallVector.resize(elementCount); + for (auto& element : smallVector) + ar(element); +} + +} // namespace cereal \ No newline at end of file diff --git a/src/emulator.cpp b/src/emulator.cpp index 9485b0e23..edadac6fb 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -41,6 +41,7 @@ #include "core/memory.h" #include "emulator.h" #include "video_core/renderdoc.h" +#include "video_core/renderer_vulkan/shader_cache.h" Frontend::WindowSDL* g_window = nullptr; @@ -259,6 +260,15 @@ void Emulator::Run(std::filesystem::path file, const std::vector ar } VideoCore::SetOutputDir(mount_captures_dir, id); + // Initialize shader cache + if (!std::filesystem::exists(SHADER_CACHE_DIR)) { + std::filesystem::create_directories(SHADER_CACHE_DIR); + LOG_INFO(Loader, "Created shader cache directory: {}", SHADER_CACHE_DIR.string()); + } + ShaderCache::InitializeShaderCache(); + LOG_INFO(Loader, "{} shaders in cache {}", ShaderCache::shader_registry.size(), + Config::getShaderCachePreloadEnabled() != 0 ? "(preloaded) " : ""); + // Initialize kernel and library facilities. Libraries::InitHLELibs(&linker->GetHLESymbols()); diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 11dd9c05e..6318a12c1 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -93,6 +93,8 @@ struct ImageResource { using ImageResourceList = boost::container::small_vector; struct SamplerResource { + SamplerResource() = default; + std::variant sampler; u32 associated_image : 4; u32 disable_aniso : 1; diff --git a/src/video_core/renderer_vulkan/shader_cache.cpp b/src/video_core/renderer_vulkan/shader_cache.cpp new file mode 100644 index 000000000..8ff9a3006 --- /dev/null +++ b/src/video_core/renderer_vulkan/shader_cache.cpp @@ -0,0 +1,398 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include +#include + +#include + +#include "common/config.h" +#include "common/hash.h" +#include "common/io_file.h" +#include "common/logging/log.h" +#include "common/path_util.h" +#include "shader_cache.h" +#include "shader_recompiler/info.h" +#include "shader_recompiler/ir/type.h" +#include "shader_recompiler/specialization.h" +#include "video_core/renderer_vulkan/shader_cache_serialization.h" + +namespace ShaderCache { + +u64 CalculateSpecializationHash(const Shader::StageSpecialization& spec) { + u64 hash = 0; + + const auto& runtime_info = spec.runtime_info; + hash = HashCombine(hash, static_cast(runtime_info.stage)); + hash = HashCombine(hash, runtime_info.num_user_data); + hash = HashCombine(hash, runtime_info.num_input_vgprs); + hash = HashCombine(hash, runtime_info.num_allocated_vgprs); + hash = HashCombine(hash, static_cast(runtime_info.fp_denorm_mode32)); + hash = HashCombine(hash, static_cast(runtime_info.fp_round_mode32)); + + switch (runtime_info.stage) { + case Shader::Stage::Local: + hash = HashCombine(hash, runtime_info.ls_info.ls_stride); + break; + case Shader::Stage::Export: + hash = HashCombine(hash, runtime_info.es_info.vertex_data_size); + break; + case Shader::Stage::Vertex: + hash = HashCombine(hash, runtime_info.vs_info.num_outputs); + for (size_t i = 0; + i < runtime_info.vs_info.num_outputs && i < runtime_info.vs_info.outputs.size(); ++i) { + const auto& output_map = runtime_info.vs_info.outputs[i]; + for (const auto& output : output_map) { + hash = HashCombine(hash, static_cast(output)); + } + } + hash = HashCombine(hash, runtime_info.vs_info.emulate_depth_negative_one_to_one); + hash = HashCombine(hash, runtime_info.vs_info.clip_disable); + hash = HashCombine(hash, static_cast(runtime_info.vs_info.tess_type)); + hash = HashCombine(hash, static_cast(runtime_info.vs_info.tess_topology)); + hash = HashCombine(hash, static_cast(runtime_info.vs_info.tess_partitioning)); + hash = HashCombine(hash, runtime_info.vs_info.hs_output_cp_stride); + break; + case Shader::Stage::Hull: + hash = HashCombine(hash, runtime_info.hs_info.num_input_control_points); + hash = HashCombine(hash, runtime_info.hs_info.num_threads); + hash = HashCombine(hash, static_cast(runtime_info.hs_info.tess_type)); + hash = HashCombine(hash, runtime_info.hs_info.ls_stride); + hash = HashCombine(hash, runtime_info.hs_info.hs_output_cp_stride); + hash = HashCombine(hash, runtime_info.hs_info.hs_output_base); + break; + case Shader::Stage::Geometry: + hash = HashCombine(hash, runtime_info.gs_info.num_invocations); + hash = HashCombine(hash, runtime_info.gs_info.output_vertices); + hash = HashCombine(hash, runtime_info.gs_info.in_vertex_data_size); + hash = HashCombine(hash, runtime_info.gs_info.out_vertex_data_size); + hash = HashCombine(hash, static_cast(runtime_info.gs_info.in_primitive)); + for (const auto& out_prim : runtime_info.gs_info.out_primitive) { + hash = HashCombine(hash, static_cast(out_prim)); + } + hash = HashCombine(hash, runtime_info.gs_info.vs_copy_hash); + break; + case Shader::Stage::Fragment: + hash = HashCombine(hash, runtime_info.fs_info.en_flags.raw); + hash = HashCombine(hash, runtime_info.fs_info.addr_flags.raw); + hash = HashCombine(hash, runtime_info.fs_info.num_inputs); + + for (u32 i = 0; + i < runtime_info.fs_info.num_inputs && i < runtime_info.fs_info.inputs.size(); ++i) { + const auto& input = runtime_info.fs_info.inputs[i]; + hash = HashCombine(hash, input.param_index); + hash = HashCombine(hash, input.is_default); + hash = HashCombine(hash, input.is_flat); + hash = HashCombine(hash, input.default_value); + } + + for (const auto& color_buffer : runtime_info.fs_info.color_buffers) { + hash = HashCombine(hash, static_cast(color_buffer.num_format)); + hash = HashCombine(hash, static_cast(color_buffer.num_conversion)); + hash = HashCombine(hash, static_cast(color_buffer.export_format)); + hash = HashCombine(hash, color_buffer.needs_unorm_fixup); + hash = HashCombine(hash, color_buffer.swizzle.r); + hash = HashCombine(hash, color_buffer.swizzle.g); + hash = HashCombine(hash, color_buffer.swizzle.b); + hash = HashCombine(hash, color_buffer.swizzle.a); + } + break; + case Shader::Stage::Compute: + hash = HashCombine(hash, runtime_info.cs_info.shared_memory_size); + for (u32 i = 0; i < 3; ++i) { + hash = HashCombine(hash, runtime_info.cs_info.workgroup_size[i]); + hash = HashCombine(hash, runtime_info.cs_info.tgid_enable[i]); + } + break; + } + + if (spec.fetch_shader_data) { + const auto& fetch_shader = *spec.fetch_shader_data; + hash = HashCombine(hash, fetch_shader.size); + hash = HashCombine(hash, static_cast(fetch_shader.vertex_offset_sgpr)); + hash = HashCombine(hash, static_cast(fetch_shader.instance_offset_sgpr)); + + for (const auto& attr : fetch_shader.attributes) { + hash = HashCombine(hash, static_cast(attr.semantic)); + hash = HashCombine(hash, static_cast(attr.dest_vgpr)); + hash = HashCombine(hash, static_cast(attr.num_elements)); + hash = HashCombine(hash, static_cast(attr.sgpr_base)); + hash = HashCombine(hash, static_cast(attr.dword_offset)); + hash = HashCombine(hash, static_cast(attr.instance_data)); + } + } + + for (const auto& vs_attrib : spec.vs_attribs) { + hash = HashCombine(hash, static_cast(vs_attrib.num_class)); + hash = HashCombine(hash, vs_attrib.dst_select.r); + hash = HashCombine(hash, vs_attrib.dst_select.g); + hash = HashCombine(hash, vs_attrib.dst_select.b); + hash = HashCombine(hash, vs_attrib.dst_select.a); + } + + const std::string bitset_str = spec.bitset.to_string(); + for (size_t i = 0; i < bitset_str.size(); i += 8) { + size_t end = std::min(i + 8, bitset_str.size()); + std::string chunk = bitset_str.substr(i, end - i); + u8 value = 0; + for (size_t j = 0; j < chunk.size(); ++j) { + if (chunk[j] == '1') { + value |= (1 << j); + } + } + hash = HashCombine(hash, value); + } + + for (const auto& buffer : spec.buffers) { + hash = HashCombine(hash, buffer.stride); + hash = HashCombine(hash, buffer.is_storage); + hash = HashCombine(hash, buffer.is_formatted); + hash = HashCombine(hash, buffer.swizzle_enable); + + if (buffer.is_formatted) { + hash = HashCombine(hash, buffer.data_format); + hash = HashCombine(hash, buffer.num_format); + hash = HashCombine(hash, buffer.dst_select.r); + hash = HashCombine(hash, buffer.dst_select.g); + hash = HashCombine(hash, buffer.dst_select.b); + hash = HashCombine(hash, buffer.dst_select.a); + hash = HashCombine(hash, static_cast(buffer.num_conversion)); + } + + if (buffer.swizzle_enable) { + hash = HashCombine(hash, buffer.index_stride); + hash = HashCombine(hash, buffer.element_size); + } + } + + for (const auto& image : spec.images) { + hash = HashCombine(hash, static_cast(image.type)); + hash = HashCombine(hash, image.is_integer); + hash = HashCombine(hash, image.is_storage); + hash = HashCombine(hash, image.is_cube); + + if (image.is_storage) { + hash = HashCombine(hash, image.dst_select.r); + hash = HashCombine(hash, image.dst_select.g); + hash = HashCombine(hash, image.dst_select.b); + hash = HashCombine(hash, image.dst_select.a); + } + + hash = HashCombine(hash, static_cast(image.num_conversion)); + } + + for (const auto& fmask : spec.fmasks) { + hash = HashCombine(hash, fmask.width); + hash = HashCombine(hash, fmask.height); + } + + for (const auto& sampler : spec.samplers) { + hash = HashCombine(hash, sampler.force_unnormalized); + } + + hash = HashCombine(hash, spec.start.buffer); + hash = HashCombine(hash, spec.start.unified); + hash = HashCombine(hash, spec.start.user_data); + + if (spec.info) { + hash = HashCombine(hash, spec.info->pgm_hash); + hash = HashCombine(hash, static_cast(spec.info->stage)); + hash = HashCombine(hash, static_cast(spec.info->l_stage)); + hash = HashCombine(hash, spec.info->has_storage_images); + hash = HashCombine(hash, spec.info->has_discard); + hash = HashCombine(hash, spec.info->has_image_gather); + hash = HashCombine(hash, spec.info->has_image_query); + hash = HashCombine(hash, spec.info->uses_lane_id); + hash = HashCombine(hash, spec.info->uses_group_quad); + hash = HashCombine(hash, spec.info->uses_group_ballot); + hash = HashCombine(hash, spec.info->uses_fp16); + hash = HashCombine(hash, spec.info->uses_fp64); + hash = HashCombine(hash, spec.info->uses_pack_10_11_11); + hash = HashCombine(hash, spec.info->uses_unpack_10_11_11); + hash = HashCombine(hash, spec.info->stores_tess_level_outer); + hash = HashCombine(hash, spec.info->stores_tess_level_inner); + hash = HashCombine(hash, spec.info->translation_failed); + hash = HashCombine(hash, spec.info->mrt_mask); + hash = HashCombine(hash, spec.info->has_fetch_shader); + hash = HashCombine(hash, spec.info->fetch_shader_sgpr_base); + + for (size_t i = 0; i < spec.info->loads.flags.size(); ++i) { + hash = HashCombine(hash, spec.info->loads.flags[i]); + } + + for (size_t i = 0; i < spec.info->stores.flags.size(); ++i) { + hash = HashCombine(hash, spec.info->stores.flags[i]); + } + + hash = HashCombine(hash, spec.info->ud_mask.mask); + + hash = HashCombine(hash, spec.info->uses_patches); + } + + return hash; +} + +bool CheckShaderCache(std::string shader_id) { + if (Config::getShaderCachePreloadEnabled()) { + return shader_cache.contains(shader_id); + } + + return shader_registry.contains(shader_id); +} + +void InitializeShaderCache() { + if (!std::filesystem::exists(SHADER_CACHE_REGISTRY_PATH) || + std::filesystem::file_size(SHADER_CACHE_REGISTRY_PATH) == 0) { + return; + } + std::ifstream registry_file(SHADER_CACHE_REGISTRY_PATH, std::ios::binary); + cereal::BinaryInputArchive registry_ar(registry_file); + while (registry_file.tellg() < std::filesystem::file_size(SHADER_CACHE_REGISTRY_PATH)) { + std::string shader_key; + u64 offset; + registry_ar(shader_key, offset); + shader_registry[shader_key] = offset; + } + if (Config::getShaderCachePreloadEnabled()) { + std::ifstream blob_file(SHADER_CACHE_BLOB_PATH, std::ios::binary); + for (auto const& [shader_key, offset] : shader_registry) { + blob_file.seekg(offset, std::ios::beg); + { + cereal::BinaryInputArchive blob_ar(blob_file); + std::string resources; + std::vector spv; + blob_ar(spv, resources); + shader_cache[shader_key] = std::make_pair(spv, resources); + } + } + } +} + +void GetShader(std::string shader_id, Shader::Info& info, std::vector& spv) { + std::string resources; + if (Config::getShaderCachePreloadEnabled()) { + auto& entry = shader_cache[shader_id]; + spv = entry.first; + resources = entry.second; + } else { + std::ifstream blob_file(SHADER_CACHE_BLOB_PATH, std::ios::binary); + blob_file.seekg(shader_registry[shader_id], std::ios::beg); + cereal::BinaryInputArchive ar(blob_file); + + ar(spv, resources); + } + + std::istringstream info_serialized(resources); + DeserializeInfo(info_serialized, info); +} + +void AddShader(std::string shader_id, std::vector spv, std::ostringstream& info_serialized) { + std::ofstream registry_file(SHADER_CACHE_REGISTRY_PATH, std::ios::binary | std::ios::app); + registry_file.seekp(0, std::ios::end); + cereal::BinaryOutputArchive reg_ar(registry_file); + + std::ofstream blob_file(SHADER_CACHE_BLOB_PATH, std::ios::binary | std::ios::app); + blob_file.seekp(0, std::ios::end); + cereal::BinaryOutputArchive blob_ar(blob_file); + + u64 offset = static_cast(blob_file.tellp()); + reg_ar(shader_id, offset); + + std::string info_blob = info_serialized.str(); + blob_ar(spv, info_blob); + + shader_registry[shader_id] = offset; + if (Config::getShaderCachePreloadEnabled()) { + shader_cache[shader_id] = std::make_pair(spv, info_blob); + } +} + +void SerializeInfo(std::ostream& info_serialized, Shader::Info& info) { + cereal::BinaryOutputArchive ar(info_serialized); + ar << info.ud_mask; + ar << info.gs_copy_data; + ar << info.uses_patches; + ar << info.buffers; + ar << info.images; + ar << info.samplers; + ar << info.fmasks; + ar << info.fs_interpolation; + ar << info.tess_consts_ptr_base; + ar << info.tess_consts_dword_offset; + ar << info.stage; + ar << info.l_stage; + ar << info.pgm_hash; + ar << info.pgm_base; // ! + ar << info.has_storage_images; + ar << info.has_discard; + ar << info.has_image_gather; + ar << info.has_image_query; + ar << info.uses_buffer_atomic_float_min_max; + ar << info.uses_image_atomic_float_min_max; + ar << info.uses_lane_id; + ar << info.uses_group_quad; + ar << info.uses_group_ballot; + ar << info.shared_types; + ar << info.uses_fp16; + ar << info.uses_fp64; + ar << info.uses_pack_10_11_11; + ar << info.uses_unpack_10_11_11; + ar << info.uses_buffer_int64_atomics; + ar << info.uses_shared_int64_atomics; + ar << info.stores_tess_level_outer; + ar << info.stores_tess_level_inner; + ar << info.translation_failed; + ar << info.mrt_mask; + ar << info.has_fetch_shader; + ar << info.fetch_shader_sgpr_base; // ! + ar << info.readconst_types; + ar << info.uses_dma; + ar << info.srt_info.flattened_bufsize_dw; +} + +void DeserializeInfo(std::istream& info_serialized, Shader::Info& info) { + cereal::BinaryInputArchive ar(info_serialized); + ar >> info.ud_mask; + ar >> info.gs_copy_data; + ar >> info.uses_patches; + ar >> info.buffers; + ar >> info.images; + ar >> info.samplers; + ar >> info.fmasks; + ar >> info.fs_interpolation; + ar >> info.tess_consts_ptr_base; + ar >> info.tess_consts_dword_offset; + ar >> info.stage; + ar >> info.l_stage; + ar >> info.pgm_hash; + ar >> info.pgm_base; // ! + ar >> info.has_storage_images; + ar >> info.has_discard; + ar >> info.has_image_gather; + ar >> info.has_image_query; + ar >> info.uses_buffer_atomic_float_min_max; + ar >> info.uses_image_atomic_float_min_max; + ar >> info.uses_lane_id; + ar >> info.uses_group_quad; + ar >> info.uses_group_ballot; + ar >> info.shared_types; + ar >> info.uses_fp16; + ar >> info.uses_fp64; + ar >> info.uses_pack_10_11_11; + ar >> info.uses_unpack_10_11_11; + ar >> info.uses_buffer_int64_atomics; + ar >> info.uses_shared_int64_atomics; + ar >> info.stores_tess_level_outer; + ar >> info.stores_tess_level_inner; + ar >> info.translation_failed; + ar >> info.mrt_mask; + ar >> info.has_fetch_shader; + ar >> info.fetch_shader_sgpr_base; // ! + ar >> info.readconst_types; + ar >> info.uses_dma; + ar >> info.srt_info.flattened_bufsize_dw; +} + +} // namespace ShaderCache \ No newline at end of file diff --git a/src/video_core/renderer_vulkan/shader_cache.h b/src/video_core/renderer_vulkan/shader_cache.h new file mode 100644 index 000000000..975b8290e --- /dev/null +++ b/src/video_core/renderer_vulkan/shader_cache.h @@ -0,0 +1,39 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include + +#include "common/elf_info.h" +#include "shader_recompiler/info.h" +#include "shader_recompiler/specialization.h" + +namespace ShaderCache { + +#define SHADER_CACHE_DIR \ + (Common::FS::GetUserPath(Common::FS::PathType::ShaderDir) / "cache" / "portable") + +#define SHADER_CACHE_BLOB_PATH \ + (SHADER_CACHE_DIR / (std::string{Common::ElfInfo::Instance().GameSerial()} + "_shaders.bin")) + +#define SHADER_CACHE_REGISTRY_PATH \ + (SHADER_CACHE_DIR / \ + (std::string{Common::ElfInfo::Instance().GameSerial()} + "_shaders_registry.bin")) + +inline std::map shader_registry; // shader_key:offset +inline std::map, std::string>> shader_cache; +// only used when preload active // shader_key:blob[spv,info] + +u64 CalculateSpecializationHash(const Shader::StageSpecialization& spec); +void InitializeShaderCache(); +void SerializeInfo(std::ostream& info_serialized, Shader::Info& info); +void DeserializeInfo(std::istream& info_serialized, Shader::Info& info); + +bool CheckShaderCache(std::string shader_id); +void GetShader(std::string shader_id, Shader::Info& info, std::vector& spv); +void AddShader(std::string shader_id, std::vector spv, std::ostringstream& info_serialized); + +} // namespace ShaderCache \ No newline at end of file diff --git a/src/video_core/renderer_vulkan/shader_cache_serialization.h b/src/video_core/renderer_vulkan/shader_cache_serialization.h new file mode 100644 index 000000000..f3f5f22af --- /dev/null +++ b/src/video_core/renderer_vulkan/shader_cache_serialization.h @@ -0,0 +1,74 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "common/serialization.h" +#include "shader_recompiler/info.h" + +namespace cereal { + +// Shader::Info::UserDataMask +template +void serialize(Archive& ar, Shader::Info::UserDataMask& mask) { + ar(mask.mask); +} + +// Shader::CopyShaderData +template +void serialize(Archive& ar, Shader::CopyShaderData& data) { + ar(data.attr_map, data.num_attrs, data.output_vertices); +} + +// AmdGPU::Buffer +template +void serialize(Archive& ar, AmdGpu::Buffer& buffer) { + ar(cereal::binary_data(reinterpret_cast(&buffer), sizeof(buffer))); + // is base_adress cacheable? +} + +// Shader::BufferResource +template +void serialize(Archive& ar, Shader::BufferResource& buffer) { + ar(buffer.sharp_idx, buffer.used_types, buffer.inline_cbuf, buffer.buffer_type, + buffer.instance_attrib, buffer.is_written, buffer.is_formatted); +} + +// Shader::ImageResource +template +void serialize(Archive& ar, Shader::ImageResource& image) { + ar(image.sharp_idx, image.is_depth, image.is_atomic, image.is_array, image.is_written, + image.is_r128); +} + +// AmdGpu::Sampler +template +void serialize(Archive& ar, AmdGpu::Sampler& sampler) { + ar(cereal::binary_data(reinterpret_cast(&sampler), sizeof(sampler))); +} + +// Shader::SamplerResource +template +void serialize(Archive& ar, Shader::SamplerResource& sampler) { + ar(sampler.sampler); + ar(static_cast(sampler.associated_image), static_cast(sampler.disable_aniso)); +} + +// Shader::FMaskResource +template +void serialize(Archive& ar, Shader::FMaskResource& fmask) { + cereal::binary_data(reinterpret_cast(&fmask), sizeof(fmask)); +} + +// Shader::Info::Interpolation +template +void serialize(Archive& ar, Shader::Info::Interpolation& interpolation) { + ar(interpolation.primary, interpolation.auxiliary); +} +} // namespace cereal \ No newline at end of file diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index bce16cbff..3ceb1fc9b 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -12,6 +12,7 @@ #include "shader_recompiler/info.h" #include "shader_recompiler/recompiler.h" #include "shader_recompiler/runtime_info.h" +#include "video_core/renderer_vulkan/shader_cache.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_presenter.h" @@ -496,14 +497,37 @@ bool PipelineCache::RefreshComputeKey() { vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, Shader::RuntimeInfo& runtime_info, std::span code, size_t perm_idx, - Shader::Backend::Bindings& binding) { + Shader::Backend::Bindings& binding, + Shader::StageSpecialization spec) { LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash, perm_idx != 0 ? "(permutation)" : ""); + DumpShader(code, info.pgm_hash, info.stage, perm_idx, "bin"); - const auto ir_program = Shader::TranslateProgram(code, pools, info, runtime_info, profile); - auto spv = Shader::Backend::SPIRV::EmitSPIRV(profile, runtime_info, ir_program, binding); - DumpShader(spv, info.pgm_hash, info.stage, perm_idx, "spv"); + std::string shader_name = GetShaderName(info.stage, info.pgm_hash, perm_idx); + + std::vector spv; + std::string shader_id = std::to_string(::ShaderCache::CalculateSpecializationHash(spec)); + if (::ShaderCache::CheckShaderCache(shader_id)) { + LOG_INFO(Render_Vulkan, "Loaded shader {} {:#x} {}from cache", info.stage, info.pgm_hash, + perm_idx != 0 ? "(permutation) " : ""); + ::ShaderCache::GetShader(shader_id, info, spv); + info.RefreshFlatBuf(); + } else { + LOG_INFO(Render_Vulkan, "Shader {} {:#x} {}not in cache", info.stage, info.pgm_hash, + perm_idx != 0 ? "(permutation) " : ""); + const auto ir_program = Shader::TranslateProgram(code, pools, info, runtime_info, profile); + spv = Shader::Backend::SPIRV::EmitSPIRV(profile, runtime_info, ir_program, binding); + std::ostringstream info_serialized; + ::ShaderCache::SerializeInfo(info_serialized, info); + + ::ShaderCache::AddShader(shader_id, spv, info_serialized); + LOG_INFO(Render_Vulkan, "Shader ID: {}", shader_id); + DumpShader(spv, info.pgm_hash, info.stage, perm_idx, "spv"); + + LOG_INFO(Render_Vulkan, "Compiled shader {} {:#x} {}and saved it to cache", info.stage, + info.pgm_hash, perm_idx != 0 ? "(permutation) " : ""); + } vk::ShaderModule module; @@ -518,6 +542,7 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, Shader::Runtim const auto name = GetShaderName(info.stage, info.pgm_hash, perm_idx); Vulkan::SetObjectName(instance.GetDevice(), module, name); + if (Config::collectShadersForDebug()) { DebugState.CollectShader(name, info.l_stage, module, spv, code, patch ? *patch : std::span{}, is_patched); @@ -534,8 +559,10 @@ PipelineCache::Result PipelineCache::GetProgram(Stage stage, LogicalStage l_stag it_pgm.value() = std::make_unique(stage, l_stage, params); auto& program = it_pgm.value(); auto start = binding; - const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding); - const auto spec = Shader::StageSpecialization(program->info, runtime_info, profile, start); + Shader::StageSpecialization spec = + Shader::StageSpecialization(program->info, runtime_info, profile, start); + const auto module = + CompileModule(program->info, runtime_info, params.code, 0, binding, spec); program->AddPermut(module, std::move(spec)); return std::make_tuple(&program->info, module, spec.fetch_shader_data, HashCombine(params.hash, 0)); @@ -552,7 +579,7 @@ PipelineCache::Result PipelineCache::GetProgram(Stage stage, LogicalStage l_stag const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec); if (it == program->modules.end()) { auto new_info = Shader::Info(stage, l_stage, params); - module = CompileModule(new_info, runtime_info, params.code, perm_idx, binding); + module = CompileModule(new_info, runtime_info, params.code, perm_idx, binding, spec); program->AddPermut(module, std::move(spec)); } else { info.AddBindings(binding); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index ba3407b48..cf78def7a 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -82,7 +82,8 @@ private: std::string_view ext); vk::ShaderModule CompileModule(Shader::Info& info, Shader::RuntimeInfo& runtime_info, std::span code, size_t perm_idx, - Shader::Backend::Bindings& binding); + Shader::Backend::Bindings& binding, + Shader::StageSpecialization spec); const Shader::RuntimeInfo& BuildRuntimeInfo(Shader::Stage stage, Shader::LogicalStage l_stage); private: