From eb09c4ccce4ca6ab8ed08f9e53926c52dd52d8a5 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Thu, 1 May 2025 20:10:42 -0700 Subject: [PATCH 01/10] vk_presenter: Use correct format for output frame image and view. (#2871) --- .../renderer_vulkan/vk_presenter.cpp | 22 +++++++++++++++++-- src/video_core/renderer_vulkan/vk_presenter.h | 8 ++++--- src/video_core/texture_cache/image_info.cpp | 7 +++--- 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_presenter.cpp b/src/video_core/renderer_vulkan/vk_presenter.cpp index 6bd4b26fa..09dd23cb6 100644 --- a/src/video_core/renderer_vulkan/vk_presenter.cpp +++ b/src/video_core/renderer_vulkan/vk_presenter.cpp @@ -270,7 +270,25 @@ Frame* Presenter::PrepareLastFrame() { return frame; } -Frame* Presenter::PrepareFrameInternal(VideoCore::ImageId image_id, bool is_eop) { +static vk::Format GetFrameViewFormat(const Libraries::VideoOut::PixelFormat format) { + switch (format) { + case Libraries::VideoOut::PixelFormat::A8B8G8R8Srgb: + return vk::Format::eR8G8B8A8Srgb; + case Libraries::VideoOut::PixelFormat::A8R8G8B8Srgb: + return vk::Format::eB8G8R8A8Srgb; + case Libraries::VideoOut::PixelFormat::A2R10G10B10: + case Libraries::VideoOut::PixelFormat::A2R10G10B10Srgb: + case Libraries::VideoOut::PixelFormat::A2R10G10B10Bt2020Pq: + return vk::Format::eA2R10G10B10UnormPack32; + default: + break; + } + UNREACHABLE_MSG("Unknown format={}", static_cast(format)); + return {}; +} + +Frame* Presenter::PrepareFrameInternal(VideoCore::ImageId image_id, + const Libraries::VideoOut::PixelFormat format, bool is_eop) { // Request a free presentation frame. Frame* frame = GetRenderFrame(); @@ -324,7 +342,7 @@ Frame* Presenter::PrepareFrameInternal(VideoCore::ImageId image_id, bool is_eop) cmdbuf); VideoCore::ImageViewInfo info{}; - info.format = image.info.pixel_format; + info.format = GetFrameViewFormat(format); // Exclude alpha from output frame to avoid blending with UI. info.mapping = vk::ComponentMapping{ .r = vk::ComponentSwizzle::eIdentity, diff --git a/src/video_core/renderer_vulkan/vk_presenter.h b/src/video_core/renderer_vulkan/vk_presenter.h index ad2708474..8ed2052ee 100644 --- a/src/video_core/renderer_vulkan/vk_presenter.h +++ b/src/video_core/renderer_vulkan/vk_presenter.h @@ -70,11 +70,12 @@ public: auto desc = VideoCore::TextureCache::VideoOutDesc{attribute, cpu_address}; const auto image_id = texture_cache.FindImage(desc); texture_cache.UpdateImage(image_id, is_eop ? nullptr : &flip_scheduler); - return PrepareFrameInternal(image_id, is_eop); + return PrepareFrameInternal(image_id, attribute.attrib.pixel_format, is_eop); } Frame* PrepareBlankFrame(bool is_eop) { - return PrepareFrameInternal(VideoCore::NULL_IMAGE_ID, is_eop); + return PrepareFrameInternal(VideoCore::NULL_IMAGE_ID, + Libraries::VideoOut::PixelFormat::Unknown, is_eop); } VideoCore::Image& RegisterVideoOutSurface( @@ -119,7 +120,8 @@ public: } private: - Frame* PrepareFrameInternal(VideoCore::ImageId image_id, bool is_eop = true); + Frame* PrepareFrameInternal(VideoCore::ImageId image_id, + Libraries::VideoOut::PixelFormat format, bool is_eop = true); Frame* GetRenderFrame(); void SetExpectedGameSize(s32 width, s32 height); diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 26928eaf7..39322f449 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -16,14 +16,15 @@ using VideoOutFormat = Libraries::VideoOut::PixelFormat; static vk::Format ConvertPixelFormat(const VideoOutFormat format) { switch (format) { - case VideoOutFormat::A8R8G8B8Srgb: - return vk::Format::eB8G8R8A8Srgb; case VideoOutFormat::A8B8G8R8Srgb: + // Remaining formats are mapped to RGBA for internal consistency and changed to BGRA in the + // frame image view. + case VideoOutFormat::A8R8G8B8Srgb: return vk::Format::eR8G8B8A8Srgb; case VideoOutFormat::A2R10G10B10: case VideoOutFormat::A2R10G10B10Srgb: case VideoOutFormat::A2R10G10B10Bt2020Pq: - return vk::Format::eA2R10G10B10UnormPack32; + return vk::Format::eA2B10G10R10UnormPack32; default: break; } From 0ba9ea6a3b98d6454164ac12a29baef30f1ef595 Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Fri, 2 May 2025 13:22:05 -0500 Subject: [PATCH 02/10] Only perform early read-write open when truncating is needed (#2874) Should stop some fs error spam when games open files from /app0, as this open call would fail from reduced permissions. --- src/core/libraries/kernel/file_system.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/core/libraries/kernel/file_system.cpp b/src/core/libraries/kernel/file_system.cpp index cb1fd14a2..ad372325c 100644 --- a/src/core/libraries/kernel/file_system.cpp +++ b/src/core/libraries/kernel/file_system.cpp @@ -181,10 +181,6 @@ s32 PS4_SYSV_ABI open(const char* raw_path, s32 flags, u16 mode) { return -1; } } else { - // Start by opening as read-write so we can truncate regardless of flags. - // Since open starts by closing the file, this won't interfere with later open calls. - e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::ReadWrite); - file->type = Core::FileSys::FileType::Regular; if (truncate && read_only) { @@ -192,9 +188,14 @@ s32 PS4_SYSV_ABI open(const char* raw_path, s32 flags, u16 mode) { h->DeleteHandle(handle); *__Error() = POSIX_EROFS; return -1; - } else if (truncate && e == 0) { - // If the file was opened successfully and truncate was enabled, reduce size to 0 - file->f.SetSize(0); + } else if (truncate) { + // Open the file as read-write so we can truncate regardless of flags. + // Since open starts by closing the file, this won't interfere with later open calls. + e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::ReadWrite); + if (e == 0) { + // If the file was opened successfully, reduce size to 0 + file->f.SetSize(0); + } } if (read) { From d542d952f4bbff97ef71ed61beac6da0813ac84a Mon Sep 17 00:00:00 2001 From: Vinicius Rangel Date: Sat, 3 May 2025 12:51:10 -0300 Subject: [PATCH 03/10] Savefixes VIII (#2851) * savedata dialog: fix SaveDialogUi move semantics fix possible dangling points * savedata dialog: removed unnecessary firmware version checks --- .../save_data/dialog/savedatadialog_ui.cpp | 23 ++++++++++--------- .../save_data/dialog/savedatadialog_ui.h | 3 ++- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/core/libraries/save_data/dialog/savedatadialog_ui.cpp b/src/core/libraries/save_data/dialog/savedatadialog_ui.cpp index a6ca8744d..edb5caa07 100644 --- a/src/core/libraries/save_data/dialog/savedatadialog_ui.cpp +++ b/src/core/libraries/save_data/dialog/savedatadialog_ui.cpp @@ -49,13 +49,11 @@ void SaveDialogResult::CopyTo(OrbisSaveDataDialogResult& result) const { result.mode = this->mode; result.result = this->result; result.buttonId = this->button_id; - if (mode == SaveDataDialogMode::LIST || ElfInfo::Instance().FirmwareVer() >= ElfInfo::FW_45) { - if (result.dirName != nullptr) { - result.dirName->data.FromString(this->dir_name); - } - if (result.param != nullptr && this->param.GetString(SaveParams::MAINTITLE).has_value()) { - result.param->FromSFO(this->param); - } + if (result.dirName != nullptr) { + result.dirName->data.FromString(this->dir_name); + } + if (result.param != nullptr && this->param.GetString(SaveParams::MAINTITLE).has_value()) { + result.param->FromSFO(this->param); } result.userData = this->user_data; } @@ -345,12 +343,15 @@ SaveDialogUi::SaveDialogUi(SaveDialogUi&& other) noexcept } } -SaveDialogUi& SaveDialogUi::operator=(SaveDialogUi other) { +SaveDialogUi& SaveDialogUi::operator=(SaveDialogUi&& other) noexcept { std::scoped_lock lock(draw_mutex, other.draw_mutex); using std::swap; - swap(state, other.state); - swap(status, other.status); - swap(result, other.result); + state = other.state; + other.state = nullptr; + status = other.status; + other.status = nullptr; + result = other.result; + other.result = nullptr; if (status && *status == Status::RUNNING) { first_render = true; AddLayer(this); diff --git a/src/core/libraries/save_data/dialog/savedatadialog_ui.h b/src/core/libraries/save_data/dialog/savedatadialog_ui.h index aa67e1f5f..dc97268f4 100644 --- a/src/core/libraries/save_data/dialog/savedatadialog_ui.h +++ b/src/core/libraries/save_data/dialog/savedatadialog_ui.h @@ -300,7 +300,8 @@ public: ~SaveDialogUi() override; SaveDialogUi(const SaveDialogUi& other) = delete; SaveDialogUi(SaveDialogUi&& other) noexcept; - SaveDialogUi& operator=(SaveDialogUi other); + SaveDialogUi& operator=(SaveDialogUi& other) = delete; + SaveDialogUi& operator=(SaveDialogUi&& other) noexcept; void Finish(ButtonId buttonId, CommonDialog::Result r = CommonDialog::Result::OK); From 17b6343f18a39bbd6436a94956fb6cb90f8f554c Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sat, 3 May 2025 13:47:03 -0700 Subject: [PATCH 04/10] emulator: Fix log initialization order. (#2878) --- src/emulator.cpp | 184 ++++++++++++++++++++++++----------------------- 1 file changed, 95 insertions(+), 89 deletions(-) diff --git a/src/emulator.cpp b/src/emulator.cpp index 448b8aad4..ebb34054b 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -10,7 +10,6 @@ #include "common/logging/log.h" #ifdef ENABLE_QT_GUI #include -#include "common/memory_patcher.h" #endif #include "common/assert.h" #ifdef ENABLE_DISCORD_RPC @@ -20,6 +19,7 @@ #include #endif #include "common/elf_info.h" +#include "common/memory_patcher.h" #include "common/ntapi.h" #include "common/path_util.h" #include "common/polyfill_thread.h" @@ -54,27 +54,6 @@ Emulator::Emulator() { WSADATA wsaData; WSAStartup(versionWanted, &wsaData); #endif - - // Create stdin/stdout/stderr - Common::Singleton::Instance()->CreateStdHandles(); - - // Defer until after logging is initialized. - memory = Core::Memory::Instance(); - controller = Common::Singleton::Instance(); - linker = Common::Singleton::Instance(); - - // Load renderdoc module. - VideoCore::LoadRenderDoc(); - - // Start the timer (Play Time) -#ifdef ENABLE_QT_GUI - start_time = std::chrono::steady_clock::now(); - const auto user_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); - QString filePath = QString::fromStdString((user_dir / "play_time.txt").string()); - QFile file(filePath); - ASSERT_MSG(file.open(QIODevice::ReadWrite | QIODevice::Text), - "Error opening or creating play_time.txt"); -#endif } Emulator::~Emulator() { @@ -102,54 +81,89 @@ void Emulator::Run(const std::filesystem::path& file, const std::vectorMount(game_folder, "/hostapp", true); - auto& game_info = Common::ElfInfo::Instance(); + const auto param_sfo_path = mnt->GetHostPath("/app0/sce_sys/param.sfo"); + const auto param_sfo_exists = std::filesystem::exists(param_sfo_path); - // Loading param.sfo file if exists + // Load param.sfo details if it exists std::string id; std::string title; std::string app_version; u32 fw_version; Common::PSFAttributes psf_attributes{}; - - const auto param_sfo_path = mnt->GetHostPath("/app0/sce_sys/param.sfo"); - if (!std::filesystem::exists(param_sfo_path) || !Config::getSeparateLogFilesEnabled()) { - Common::Log::Initialize(); - Common::Log::Start(); - } - - if (std::filesystem::exists(param_sfo_path)) { + if (param_sfo_exists) { auto* param_sfo = Common::Singleton::Instance(); - const bool success = param_sfo->Open(param_sfo_path); - ASSERT_MSG(success, "Failed to open param.sfo"); + ASSERT_MSG(param_sfo->Open(param_sfo_path), "Failed to open param.sfo"); + const auto content_id = param_sfo->GetString("CONTENT_ID"); ASSERT_MSG(content_id.has_value(), "Failed to get CONTENT_ID"); + id = std::string(*content_id, 7, 9); - - if (Config::getSeparateLogFilesEnabled()) { - Common::Log::Initialize(id + ".log"); - Common::Log::Start(); + title = param_sfo->GetString("TITLE").value_or("Unknown title"); + fw_version = param_sfo->GetInteger("SYSTEM_VER").value_or(0x4700000); + app_version = param_sfo->GetString("APP_VER").value_or("Unknown version"); + if (const auto raw_attributes = param_sfo->GetInteger("ATTRIBUTE")) { + psf_attributes.raw = *raw_attributes; } - LOG_INFO(Loader, "Starting shadps4 emulator v{} ", Common::g_version); - LOG_INFO(Loader, "Revision {}", Common::g_scm_rev); - LOG_INFO(Loader, "Branch {}", Common::g_scm_branch); - LOG_INFO(Loader, "Description {}", Common::g_scm_desc); - LOG_INFO(Loader, "Remote {}", Common::g_scm_remote_url); + } - LOG_INFO(Config, "General LogType: {}", Config::getLogType()); - LOG_INFO(Config, "General isNeo: {}", Config::isNeoModeConsole()); - LOG_INFO(Config, "GPU isNullGpu: {}", Config::nullGpu()); - LOG_INFO(Config, "GPU shouldDumpShaders: {}", Config::dumpShaders()); - LOG_INFO(Config, "GPU vblankDivider: {}", Config::vblankDiv()); - LOG_INFO(Config, "Vulkan gpuId: {}", Config::getGpuId()); - LOG_INFO(Config, "Vulkan vkValidation: {}", Config::vkValidationEnabled()); - LOG_INFO(Config, "Vulkan vkValidationSync: {}", Config::vkValidationSyncEnabled()); - LOG_INFO(Config, "Vulkan vkValidationGpu: {}", Config::vkValidationGpuEnabled()); - LOG_INFO(Config, "Vulkan crashDiagnostics: {}", Config::getVkCrashDiagnosticEnabled()); - LOG_INFO(Config, "Vulkan hostMarkers: {}", Config::getVkHostMarkersEnabled()); - LOG_INFO(Config, "Vulkan guestMarkers: {}", Config::getVkGuestMarkersEnabled()); - LOG_INFO(Config, "Vulkan rdocEnable: {}", Config::isRdocEnabled()); + // Initialize logging as soon as possible + if (!id.empty() && Config::getSeparateLogFilesEnabled()) { + Common::Log::Initialize(id + ".log"); + } else { + Common::Log::Initialize(); + } + Common::Log::Start(); + LOG_INFO(Loader, "Starting shadps4 emulator v{} ", Common::g_version); + LOG_INFO(Loader, "Revision {}", Common::g_scm_rev); + LOG_INFO(Loader, "Branch {}", Common::g_scm_branch); + LOG_INFO(Loader, "Description {}", Common::g_scm_desc); + LOG_INFO(Loader, "Remote {}", Common::g_scm_remote_url); + + LOG_INFO(Config, "General LogType: {}", Config::getLogType()); + LOG_INFO(Config, "General isNeo: {}", Config::isNeoModeConsole()); + LOG_INFO(Config, "GPU isNullGpu: {}", Config::nullGpu()); + LOG_INFO(Config, "GPU shouldDumpShaders: {}", Config::dumpShaders()); + LOG_INFO(Config, "GPU vblankDivider: {}", Config::vblankDiv()); + LOG_INFO(Config, "Vulkan gpuId: {}", Config::getGpuId()); + LOG_INFO(Config, "Vulkan vkValidation: {}", Config::vkValidationEnabled()); + LOG_INFO(Config, "Vulkan vkValidationSync: {}", Config::vkValidationSyncEnabled()); + LOG_INFO(Config, "Vulkan vkValidationGpu: {}", Config::vkValidationGpuEnabled()); + LOG_INFO(Config, "Vulkan crashDiagnostics: {}", Config::getVkCrashDiagnosticEnabled()); + LOG_INFO(Config, "Vulkan hostMarkers: {}", Config::getVkHostMarkersEnabled()); + LOG_INFO(Config, "Vulkan guestMarkers: {}", Config::getVkGuestMarkersEnabled()); + LOG_INFO(Config, "Vulkan rdocEnable: {}", Config::isRdocEnabled()); + + if (param_sfo_exists) { + LOG_INFO(Loader, "Game id: {} Title: {}", id, title); + LOG_INFO(Loader, "Fw: {:#x} App Version: {}", fw_version, app_version); + } + if (!args.empty()) { + const auto argc = std::min(args.size(), 32); + for (auto i = 0; i < argc; i++) { + LOG_INFO(Loader, "Game argument {}: {}", i, args[i]); + } + if (args.size() > 32) { + LOG_ERROR(Loader, "Too many game arguments, only passing the first 32"); + } + } + + // Create stdin/stdout/stderr + Common::Singleton::Instance()->CreateStdHandles(); + + // Initialize components + memory = Core::Memory::Instance(); + controller = Common::Singleton::Instance(); + linker = Common::Singleton::Instance(); + + // Load renderdoc module + VideoCore::LoadRenderDoc(); + + // Initialize patcher and trophies + if (!id.empty()) { + MemoryPatcher::g_game_serial = id; Libraries::NpTrophy::game_serial = id; + const auto trophyDir = Common::FS::GetUserPath(Common::FS::PathType::MetaDataDir) / id / "TrophyFiles"; if (!std::filesystem::exists(trophyDir)) { @@ -158,41 +172,9 @@ void Emulator::Run(const std::filesystem::path& file, const std::vectorstart(60000); // 60000 ms = 1 minute -#endif - title = param_sfo->GetString("TITLE").value_or("Unknown title"); - LOG_INFO(Loader, "Game id: {} Title: {}", id, title); - fw_version = param_sfo->GetInteger("SYSTEM_VER").value_or(0x4700000); - app_version = param_sfo->GetString("APP_VER").value_or("Unknown version"); - LOG_INFO(Loader, "Fw: {:#x} App Version: {}", fw_version, app_version); - if (const auto raw_attributes = param_sfo->GetInteger("ATTRIBUTE")) { - psf_attributes.raw = *raw_attributes; - } - if (!args.empty()) { - int argc = std::min(args.size(), 32); - for (int i = 0; i < argc; i++) { - LOG_INFO(Loader, "Game argument {}: {}", i, args[i]); - } - if (args.size() > 32) { - LOG_ERROR(Loader, "Too many game arguments, only passing the first 32"); - } - } - } - - const auto pic1_path = mnt->GetHostPath("/app0/sce_sys/pic1.png"); - if (std::filesystem::exists(pic1_path)) { - game_info.splash_path = pic1_path; } + auto& game_info = Common::ElfInfo::Instance(); game_info.initialized = true; game_info.game_serial = id; game_info.title = title; @@ -201,6 +183,11 @@ void Emulator::Run(const std::filesystem::path& file, const std::vectorGetHostPath("/app0/sce_sys/pic1.png"); + if (std::filesystem::exists(pic1_path)) { + game_info.splash_path = pic1_path; + } + std::string game_title = fmt::format("{} - {} <{}>", id, title, app_version); std::string window_title = ""; if (Common::g_is_release) { @@ -284,6 +271,25 @@ void Emulator::Run(const std::filesystem::path& file, const std::vectorstart(60000); // 60000 ms = 1 minute + + start_time = std::chrono::steady_clock::now(); + const auto user_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); + QString filePath = QString::fromStdString((user_dir / "play_time.txt").string()); + QFile file(filePath); + ASSERT_MSG(file.open(QIODevice::ReadWrite | QIODevice::Text), + "Error opening or creating play_time.txt"); + } +#endif + linker->Execute(args); window->InitTimers(); From 9a22185ab780ce7362b7a587b7defd16b456c460 Mon Sep 17 00:00:00 2001 From: oltolm Date: Sun, 4 May 2025 12:11:02 +0200 Subject: [PATCH 05/10] vulkan: do not use VK_EXT_extended_dynamic_state (#2880) fixes Bloodborne crashing on RX 580 --- src/video_core/buffer_cache/buffer_cache.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index cdf736a89..fb9fd755e 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -177,8 +177,8 @@ void BufferCache::BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline) { if (instance.IsVertexInputDynamicState()) { cmdbuf.bindVertexBuffers(0, num_buffers, host_buffers.data(), host_offsets.data()); } else { - cmdbuf.bindVertexBuffers2EXT(0, num_buffers, host_buffers.data(), host_offsets.data(), - host_sizes.data(), host_strides.data()); + cmdbuf.bindVertexBuffers2(0, num_buffers, host_buffers.data(), host_offsets.data(), + host_sizes.data(), host_strides.data()); } } From fed064931ad599f2de628cd9ad72c640da3f061b Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Mon, 5 May 2025 05:24:08 -0500 Subject: [PATCH 06/10] Core: Fix module load addresses (#2879) * Fix module map addresses Most modules are mapped starting at 0x800000000, with no gaps between mappings. * Hardcode hardware accurate base address Looking at our address space, all platforms will have this base address mapped, so there shouldn't be any problem in using it. * Clang * Swap module mapping to NoFlags, remove offset code Since real hardware has no gap between module mappings, the Fixed flag is just an annoyance to work around, and has no impact on the actual mappings. Swapping the module mappings to use flags NoFlags instead simplifies our code slightly. * Fix module mapping names On real hardware, the file extension is part of the mapping name. Easiest way to manage this is to swap the name to be `file.filename().string()` instead of `file.stem().string()` * Fix patches Completely missed this, whoops. --- src/core/address_space.h | 2 -- src/core/module.cpp | 13 +++++-------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/src/core/address_space.h b/src/core/address_space.h index 7ccc2cd1e..d7f3efc75 100644 --- a/src/core/address_space.h +++ b/src/core/address_space.h @@ -19,8 +19,6 @@ enum class MemoryPermission : u32 { }; DECLARE_ENUM_FLAG_OPERATORS(MemoryPermission) -constexpr VAddr CODE_BASE_OFFSET = 0x100000000ULL; - constexpr VAddr SYSTEM_MANAGED_MIN = 0x00000400000ULL; constexpr VAddr SYSTEM_MANAGED_MAX = 0x07FFFFBFFFULL; constexpr VAddr SYSTEM_RESERVED_MIN = 0x07FFFFC000ULL; diff --git a/src/core/module.cpp b/src/core/module.cpp index cbe44457c..f31bbed6c 100644 --- a/src/core/module.cpp +++ b/src/core/module.cpp @@ -19,8 +19,7 @@ namespace Core { using EntryFunc = PS4_SYSV_ABI int (*)(size_t args, const void* argp, void* param); -static u64 LoadOffset = CODE_BASE_OFFSET; -static constexpr u64 CODE_BASE_INCR = 0x010000000u; +static constexpr u64 ModuleLoadBase = 0x800000000; static u64 GetAlignedSize(const elf_program_header& phdr) { return (phdr.p_align != 0 ? (phdr.p_memsz + (phdr.p_align - 1)) & ~(phdr.p_align - 1) @@ -84,7 +83,7 @@ static std::string StringToNid(std::string_view symbol) { } Module::Module(Core::MemoryManager* memory_, const std::filesystem::path& file_, u32& max_tls_index) - : memory{memory_}, file{file_}, name{file.stem().string()} { + : memory{memory_}, file{file_}, name{file.filename().string()} { elf.Open(file); if (elf.IsElfFile()) { LoadModuleToMemory(max_tls_index); @@ -113,10 +112,8 @@ void Module::LoadModuleToMemory(u32& max_tls_index) { // Map module segments (and possible TLS trampolines) void** out_addr = reinterpret_cast(&base_virtual_addr); - memory->MapMemory(out_addr, memory->SystemReservedVirtualBase() + LoadOffset, - aligned_base_size + TrampolineSize, MemoryProt::CpuReadWrite, - MemoryMapFlags::Fixed, VMAType::Code, name, true); - LoadOffset += CODE_BASE_INCR * (1 + aligned_base_size / CODE_BASE_INCR); + memory->MapMemory(out_addr, ModuleLoadBase, aligned_base_size + TrampolineSize, + MemoryProt::CpuReadWrite, MemoryMapFlags::NoFlags, VMAType::Code, name, true); LOG_INFO(Core_Linker, "Loading module {} to {}", name, fmt::ptr(*out_addr)); #ifdef ARCH_X86_64 @@ -229,7 +226,7 @@ void Module::LoadModuleToMemory(u32& max_tls_index) { LOG_INFO(Core_Linker, "program entry addr ..........: {:#018x}", entry_addr); if (MemoryPatcher::g_eboot_address == 0) { - if (name == "eboot") { + if (name == "eboot.bin") { MemoryPatcher::g_eboot_address = base_virtual_addr; MemoryPatcher::g_eboot_image_size = base_size; MemoryPatcher::OnGameLoaded(); From c7fb3ebd93a40a406e4dc6fdbfc03c00c58bec4a Mon Sep 17 00:00:00 2001 From: MajorP93 Date: Wed, 7 May 2025 02:11:32 +0200 Subject: [PATCH 07/10] shader_recompiler: Widen num_conversion bitfield (#2886) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We do this in order to be able to actually fit in all possible values from AmdGpu::NumberConversion. Fixes gcc compiler warnings: warning: ‘Shader::PsColorBuffer::num_conversion’ is too small to hold all values of ‘enum class AmdGpu::NumberConversion’ --- src/shader_recompiler/runtime_info.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 517392b98..b8ed42f5b 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -169,10 +169,10 @@ static constexpr u32 MaxColorBuffers = 8; struct PsColorBuffer { AmdGpu::NumberFormat num_format : 4; - AmdGpu::NumberConversion num_conversion : 2; + AmdGpu::NumberConversion num_conversion : 3; AmdGpu::Liverpool::ShaderExportFormat export_format : 4; u32 needs_unorm_fixup : 1; - u32 pad : 21; + u32 pad : 20; AmdGpu::CompMapping swizzle; auto operator<=>(const PsColorBuffer&) const noexcept = default; From 1aa7eb8a422ca90a1b7cfcc45f30331139f7cccf Mon Sep 17 00:00:00 2001 From: Fire Cube Date: Wed, 7 May 2025 23:50:16 +0200 Subject: [PATCH 08/10] add scePthreadSetaffinity and emulate affinity (#2885) * add implementation * fix preprocessor * fixes squidbus's comments * fix clang * comment became fucked up? * fix removed return --- src/core/libraries/kernel/threads/pthread.cpp | 70 +++++++++++++++++++ src/core/libraries/kernel/threads/pthread.h | 2 + 2 files changed, 72 insertions(+) diff --git a/src/core/libraries/kernel/threads/pthread.cpp b/src/core/libraries/kernel/threads/pthread.cpp index c4127ecf2..e791e74bf 100644 --- a/src/core/libraries/kernel/threads/pthread.cpp +++ b/src/core/libraries/kernel/threads/pthread.cpp @@ -289,7 +289,12 @@ int PS4_SYSV_ABI posix_pthread_create_name_np(PthreadT* thread, const PthreadAtt /* Create thread */ new_thread->native_thr = Core::NativeThread(); int ret = new_thread->native_thr.Create(RunThread, new_thread, &new_thread->attr); + ASSERT_MSG(ret == 0, "Failed to create thread with error {}", ret); + + if (attr != nullptr && *attr != nullptr && (*attr)->cpuset != nullptr) { + new_thread->SetAffinity((*attr)->cpuset); + } if (ret) { *thread = nullptr; } @@ -521,6 +526,69 @@ int PS4_SYSV_ABI posix_pthread_setcancelstate(PthreadCancelState state, return 0; } +int Pthread::SetAffinity(const Cpuset* cpuset) { + const auto processor_count = std::thread::hardware_concurrency(); + if (processor_count < 8) { + return 0; + } + if (cpuset == nullptr) { + return POSIX_EINVAL; + } + + u64 mask = cpuset->bits; + + uintptr_t handle = native_thr.GetHandle(); + if (handle == 0) { + return POSIX_ESRCH; + } + + // We don't use this currently because some games gets performance problems + // when applying affinity even on strong hardware + /* + #ifdef _WIN64 + DWORD_PTR affinity_mask = static_cast(mask); + if (!SetThreadAffinityMask(reinterpret_cast(handle), affinity_mask)) { + return POSIX_EINVAL; + } + + #elif defined(__linux__) + cpu_set_t cpu_set; + CPU_ZERO(&cpu_set); + + u64 mask = cpuset->bits; + for (int cpu = 0; cpu < std::min(64, CPU_SETSIZE); ++cpu) { + if (mask & (1ULL << cpu)) { + CPU_SET(cpu, &cpu_set); + } + } + + int result = + pthread_setaffinity_np(static_cast(handle), sizeof(cpu_set_t), &cpu_set); + if (result != 0) { + return POSIX_EINVAL; + } + #endif + */ + return 0; +} + +int PS4_SYSV_ABI posix_pthread_setaffinity_np(PthreadT thread, size_t cpusetsize, + const Cpuset* cpusetp) { + if (thread == nullptr || cpusetp == nullptr) { + return POSIX_EINVAL; + } + thread->attr.cpusetsize = cpusetsize; + return thread->SetAffinity(cpusetp); +} + +int PS4_SYSV_ABI scePthreadSetaffinity(PthreadT thread, const Cpuset mask) { + int result = posix_pthread_setaffinity_np(thread, 0x10, &mask); + if (result != 0) { + return ErrnoToSceKernelError(result); + } + return 0; +} + void RegisterThread(Core::Loader::SymbolsResolver* sym) { // Posix LIB_FUNCTION("Z4QosVuAsA0", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_once); @@ -544,6 +612,7 @@ void RegisterThread(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("Z4QosVuAsA0", "libkernel", 1, "libkernel", 1, 1, posix_pthread_once); LIB_FUNCTION("EotR8a3ASf4", "libkernel", 1, "libkernel", 1, 1, posix_pthread_self); LIB_FUNCTION("OxhIB8LB-PQ", "libkernel", 1, "libkernel", 1, 1, posix_pthread_create); + LIB_FUNCTION("5KWrg7-ZqvE", "libkernel", 1, "libkernel", 1, 1, posix_pthread_setaffinity_np); // Orbis LIB_FUNCTION("14bOACANTBo", "libkernel", 1, "libkernel", 1, 1, ORBIS(posix_pthread_once)); @@ -566,6 +635,7 @@ void RegisterThread(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("W0Hpm2X0uPE", "libkernel", 1, "libkernel", 1, 1, ORBIS(posix_pthread_setprio)); LIB_FUNCTION("rNhWz+lvOMU", "libkernel", 1, "libkernel", 1, 1, _sceKernelSetThreadDtors); LIB_FUNCTION("6XG4B33N09g", "libkernel", 1, "libkernel", 1, 1, sched_yield); + LIB_FUNCTION("bt3CTBKmGyI", "libkernel", 1, "libkernel", 1, 1, scePthreadSetaffinity) } } // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/threads/pthread.h b/src/core/libraries/kernel/threads/pthread.h index 089156776..09eed11b8 100644 --- a/src/core/libraries/kernel/threads/pthread.h +++ b/src/core/libraries/kernel/threads/pthread.h @@ -332,6 +332,8 @@ struct Pthread { return true; } } + + int SetAffinity(const Cpuset* cpuset); }; using PthreadT = Pthread*; From 3b7c36e1ba435e96e16c81d11b5c8a526513ff21 Mon Sep 17 00:00:00 2001 From: Vinicius Rangel Date: Wed, 7 May 2025 19:20:55 -0300 Subject: [PATCH 09/10] Clear stack before executing guest code (#2877) * Clear stack before executing guest code * clang, don't optimize me :rotating_light: avoid ClearStack function being optimized in release builds --- src/core/tls.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/core/tls.h b/src/core/tls.h index 6edd6a297..46ca8153b 100644 --- a/src/core/tls.h +++ b/src/core/tls.h @@ -5,6 +5,8 @@ #include "common/types.h" +void* memset(void* ptr, int value, size_t num); + namespace Xbyak { class CodeGenerator; } @@ -41,9 +43,18 @@ Tcb* GetTcbBase(); /// Makes sure TLS is initialized for the thread before entering guest. void EnsureThreadInitialized(); +template +__attribute__((optnone)) void ClearStack() { + volatile void* buf = alloca(size); + memset(const_cast(buf), 0, size); + buf = nullptr; +} + template ReturnType ExecuteGuest(PS4_SYSV_ABI ReturnType (*func)(FuncArgs...), CallArgs&&... args) { EnsureThreadInitialized(); + // clear stack to avoid trash from EnsureThreadInitialized + ClearStack<13_KB>(); return func(std::forward(args)...); } From 58df609ba00e09435c79d6a6649bce6176f06f78 Mon Sep 17 00:00:00 2001 From: Paris Oplopoios Date: Thu, 8 May 2025 19:59:12 +0300 Subject: [PATCH 10/10] Optimize games that hit unpatchable EXTRQ/INSERTQ (#2888) * Make signal handler faster * I love clang-format * Use faster decoding * MacOS CI --- src/core/cpu_patches.cpp | 259 ++++++++++++++++++++------------------- 1 file changed, 136 insertions(+), 123 deletions(-) diff --git a/src/core/cpu_patches.cpp b/src/core/cpu_patches.cpp index c8106b270..8937ef04b 100644 --- a/src/core/cpu_patches.cpp +++ b/src/core/cpu_patches.cpp @@ -464,9 +464,8 @@ static std::pair TryPatch(u8* code, PatchModule* module) { if (needs_trampoline && instruction.length < 5) { // Trampoline is needed but instruction is too short to patch. - // Return false and length to fall back to the illegal instruction handler, - // or to signal to AOT compilation that this instruction should be skipped and - // handled at runtime. + // Return false and length to signal to AOT compilation that this instruction + // should be skipped and handled at runtime. return std::make_pair(false, instruction.length); } @@ -512,136 +511,137 @@ static std::pair TryPatch(u8* code, PatchModule* module) { #if defined(ARCH_X86_64) +static bool Is4ByteExtrqOrInsertq(void* code_address) { + u8* bytes = (u8*)code_address; + if (bytes[0] == 0x66 && bytes[1] == 0x0F && bytes[2] == 0x79) { + return true; // extrq + } else if (bytes[0] == 0xF2 && bytes[1] == 0x0F && bytes[2] == 0x79) { + return true; // insertq + } else { + return false; + } +} + static bool TryExecuteIllegalInstruction(void* ctx, void* code_address) { - ZydisDecodedInstruction instruction; - ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT]; - const auto status = - Common::Decoder::Instance()->decodeInstruction(instruction, operands, code_address); + // We need to decode the instruction to find out what it is. Normally we'd use a fully fleshed + // out decoder like Zydis, however Zydis does a bunch of stuff that impact performance that we + // don't care about. We can get information about the instruction a lot faster by writing a mini + // decoder here, since we know it is definitely an extrq or an insertq. If for some reason we + // need to interpret more instructions in the future (I don't see why we would), we can revert + // to using Zydis. + ZydisMnemonic mnemonic; + u8* bytes = (u8*)code_address; + if (bytes[0] == 0x66) { + mnemonic = ZYDIS_MNEMONIC_EXTRQ; + } else if (bytes[0] == 0xF2) { + mnemonic = ZYDIS_MNEMONIC_INSERTQ; + } else { + ZydisDecodedInstruction instruction; + ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT]; + const auto status = + Common::Decoder::Instance()->decodeInstruction(instruction, operands, code_address); + LOG_ERROR(Core, "Unhandled illegal instruction at code address {}: {}", + fmt::ptr(code_address), + ZYAN_SUCCESS(status) ? ZydisMnemonicGetString(instruction.mnemonic) + : "Failed to decode"); + return false; + } - switch (instruction.mnemonic) { + ASSERT(bytes[1] == 0x0F && bytes[2] == 0x79); + + // Note: It's guaranteed that there's no REX prefix in these instructions checked by + // Is4ByteExtrqOrInsertq + u8 modrm = bytes[3]; + u8 rm = modrm & 0b111; + u8 reg = (modrm >> 3) & 0b111; + u8 mod = (modrm >> 6) & 0b11; + + ASSERT(mod == 0b11); // Any instruction we interpret here uses reg/reg addressing only + + int dstIndex = reg; + int srcIndex = rm; + + switch (mnemonic) { case ZYDIS_MNEMONIC_EXTRQ: { - bool immediateForm = operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE && - operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE; - if (immediateForm) { - LOG_CRITICAL(Core, "EXTRQ immediate form should have been patched at code address: {}", - fmt::ptr(code_address)); - return false; + const auto dst = Common::GetXmmPointer(ctx, dstIndex); + const auto src = Common::GetXmmPointer(ctx, srcIndex); + + u64 lowQWordSrc; + memcpy(&lowQWordSrc, src, sizeof(lowQWordSrc)); + + u64 lowQWordDst; + memcpy(&lowQWordDst, dst, sizeof(lowQWordDst)); + + u64 length = lowQWordSrc & 0x3F; + u64 mask; + if (length == 0) { + length = 64; // for the check below + mask = 0xFFFF'FFFF'FFFF'FFFF; } else { - ASSERT_MSG(operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && - operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER && - operands[0].reg.value >= ZYDIS_REGISTER_XMM0 && - operands[0].reg.value <= ZYDIS_REGISTER_XMM15 && - operands[1].reg.value >= ZYDIS_REGISTER_XMM0 && - operands[1].reg.value <= ZYDIS_REGISTER_XMM15, - "Unexpected operand types for EXTRQ instruction"); - - const auto dstIndex = operands[0].reg.value - ZYDIS_REGISTER_XMM0; - const auto srcIndex = operands[1].reg.value - ZYDIS_REGISTER_XMM0; - - const auto dst = Common::GetXmmPointer(ctx, dstIndex); - const auto src = Common::GetXmmPointer(ctx, srcIndex); - - u64 lowQWordSrc; - memcpy(&lowQWordSrc, src, sizeof(lowQWordSrc)); - - u64 lowQWordDst; - memcpy(&lowQWordDst, dst, sizeof(lowQWordDst)); - - u64 length = lowQWordSrc & 0x3F; - u64 mask; - if (length == 0) { - length = 64; // for the check below - mask = 0xFFFF'FFFF'FFFF'FFFF; - } else { - mask = (1ULL << length) - 1; - } - - u64 index = (lowQWordSrc >> 8) & 0x3F; - if (length + index > 64) { - // Undefined behavior if length + index is bigger than 64 according to the spec, - // we'll warn and continue execution. - LOG_TRACE(Core, - "extrq at {} with length {} and index {} is bigger than 64, " - "undefined behavior", - fmt::ptr(code_address), length, index); - } - - lowQWordDst >>= index; - lowQWordDst &= mask; - - memcpy(dst, &lowQWordDst, sizeof(lowQWordDst)); - - Common::IncrementRip(ctx, instruction.length); - - return true; + mask = (1ULL << length) - 1; } - break; + + u64 index = (lowQWordSrc >> 8) & 0x3F; + if (length + index > 64) { + // Undefined behavior if length + index is bigger than 64 according to the spec, + // we'll warn and continue execution. + LOG_TRACE(Core, + "extrq at {} with length {} and index {} is bigger than 64, " + "undefined behavior", + fmt::ptr(code_address), length, index); + } + + lowQWordDst >>= index; + lowQWordDst &= mask; + + memcpy(dst, &lowQWordDst, sizeof(lowQWordDst)); + + Common::IncrementRip(ctx, 4); + + return true; } case ZYDIS_MNEMONIC_INSERTQ: { - bool immediateForm = operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE && - operands[3].type == ZYDIS_OPERAND_TYPE_IMMEDIATE; - if (immediateForm) { - LOG_CRITICAL(Core, - "INSERTQ immediate form should have been patched at code address: {}", - fmt::ptr(code_address)); - return false; + const auto dst = Common::GetXmmPointer(ctx, dstIndex); + const auto src = Common::GetXmmPointer(ctx, srcIndex); + + u64 lowQWordSrc, highQWordSrc; + memcpy(&lowQWordSrc, src, sizeof(lowQWordSrc)); + memcpy(&highQWordSrc, (u8*)src + 8, sizeof(highQWordSrc)); + + u64 lowQWordDst; + memcpy(&lowQWordDst, dst, sizeof(lowQWordDst)); + + u64 length = highQWordSrc & 0x3F; + u64 mask; + if (length == 0) { + length = 64; // for the check below + mask = 0xFFFF'FFFF'FFFF'FFFF; } else { - ASSERT_MSG(operands[2].type == ZYDIS_OPERAND_TYPE_UNUSED && - operands[3].type == ZYDIS_OPERAND_TYPE_UNUSED, - "operands 2 and 3 must be unused for register form."); - - ASSERT_MSG(operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && - operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER, - "operands 0 and 1 must be registers."); - - const auto dstIndex = operands[0].reg.value - ZYDIS_REGISTER_XMM0; - const auto srcIndex = operands[1].reg.value - ZYDIS_REGISTER_XMM0; - - const auto dst = Common::GetXmmPointer(ctx, dstIndex); - const auto src = Common::GetXmmPointer(ctx, srcIndex); - - u64 lowQWordSrc, highQWordSrc; - memcpy(&lowQWordSrc, src, sizeof(lowQWordSrc)); - memcpy(&highQWordSrc, (u8*)src + 8, sizeof(highQWordSrc)); - - u64 lowQWordDst; - memcpy(&lowQWordDst, dst, sizeof(lowQWordDst)); - - u64 length = highQWordSrc & 0x3F; - u64 mask; - if (length == 0) { - length = 64; // for the check below - mask = 0xFFFF'FFFF'FFFF'FFFF; - } else { - mask = (1ULL << length) - 1; - } - - u64 index = (highQWordSrc >> 8) & 0x3F; - if (length + index > 64) { - // Undefined behavior if length + index is bigger than 64 according to the spec, - // we'll warn and continue execution. - LOG_TRACE(Core, - "insertq at {} with length {} and index {} is bigger than 64, " - "undefined behavior", - fmt::ptr(code_address), length, index); - } - - lowQWordSrc &= mask; - lowQWordDst &= ~(mask << index); - lowQWordDst |= lowQWordSrc << index; - - memcpy(dst, &lowQWordDst, sizeof(lowQWordDst)); - - Common::IncrementRip(ctx, instruction.length); - - return true; + mask = (1ULL << length) - 1; } - break; + + u64 index = (highQWordSrc >> 8) & 0x3F; + if (length + index > 64) { + // Undefined behavior if length + index is bigger than 64 according to the spec, + // we'll warn and continue execution. + LOG_TRACE(Core, + "insertq at {} with length {} and index {} is bigger than 64, " + "undefined behavior", + fmt::ptr(code_address), length, index); + } + + lowQWordSrc &= mask; + lowQWordDst &= ~(mask << index); + lowQWordDst |= lowQWordSrc << index; + + memcpy(dst, &lowQWordDst, sizeof(lowQWordDst)); + + Common::IncrementRip(ctx, 4); + + return true; } default: { - LOG_ERROR(Core, "Unhandled illegal instruction at code address {}: {}", - fmt::ptr(code_address), ZydisMnemonicGetString(instruction.mnemonic)); - return false; + UNREACHABLE(); } } @@ -695,9 +695,22 @@ static bool PatchesAccessViolationHandler(void* context, void* /* fault_address static bool PatchesIllegalInstructionHandler(void* context) { void* code_address = Common::GetRip(context); - if (!TryPatchJit(code_address)) { + if (Is4ByteExtrqOrInsertq(code_address)) { + // The instruction is not big enough for a relative jump, don't try to patch it and pass it + // to our illegal instruction interpreter directly return TryExecuteIllegalInstruction(context, code_address); + } else { + if (!TryPatchJit(code_address)) { + ZydisDecodedInstruction instruction; + ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT]; + const auto status = + Common::Decoder::Instance()->decodeInstruction(instruction, operands, code_address); + LOG_ERROR(Core, "Failed to patch address {:x} -- mnemonic: {}", (u64)code_address, + ZYAN_SUCCESS(status) ? ZydisMnemonicGetString(instruction.mnemonic) + : "Failed to decode"); + } } + return true; }