From 46b88bd10f0d6d8dc59a80866a625a75e739a0af Mon Sep 17 00:00:00 2001 From: mailwl Date: Fri, 9 May 2025 11:08:22 +0300 Subject: [PATCH 1/6] [Libs] Stubs sceSigninDialog (#2890) * [Libs] Stubs SigninDialog * clang-format * clang-format again * remove magic constant * log dialog finished status --- CMakeLists.txt | 2 + src/common/logging/filter.cpp | 1 + src/common/logging/types.h | 1 + src/core/libraries/libs.cpp | 2 + .../libraries/signin_dialog/signindialog.cpp | 64 +++++++++++++++++++ .../libraries/signin_dialog/signindialog.h | 29 +++++++++ 6 files changed, 99 insertions(+) create mode 100644 src/core/libraries/signin_dialog/signindialog.cpp create mode 100644 src/core/libraries/signin_dialog/signindialog.h diff --git a/CMakeLists.txt b/CMakeLists.txt index f55767611..9b10d0e5b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -597,6 +597,8 @@ set(MISC_LIBS src/core/libraries/screenshot/screenshot.cpp src/core/libraries/move/move.h src/core/libraries/ulobjmgr/ulobjmgr.cpp src/core/libraries/ulobjmgr/ulobjmgr.h + src/core/libraries/signin_dialog/signindialog.cpp + src/core/libraries/signin_dialog/signindialog.h ) set(DEV_TOOLS src/core/devtools/layer.cpp diff --git a/src/common/logging/filter.cpp b/src/common/logging/filter.cpp index 867d62916..622af93cc 100644 --- a/src/common/logging/filter.cpp +++ b/src/common/logging/filter.cpp @@ -137,6 +137,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) { SUB(Lib, NpParty) \ SUB(Lib, Zlib) \ SUB(Lib, Hmd) \ + SUB(Lib, SigninDialog) \ CLS(Frontend) \ CLS(Render) \ SUB(Render, Vulkan) \ diff --git a/src/common/logging/types.h b/src/common/logging/types.h index e5714a81a..27a87e082 100644 --- a/src/common/logging/types.h +++ b/src/common/logging/types.h @@ -104,6 +104,7 @@ enum class Class : u8 { Lib_NpParty, ///< The LibSceNpParty implementation Lib_Zlib, ///< The LibSceZlib implementation. Lib_Hmd, ///< The LibSceHmd implementation. + Lib_SigninDialog, ///< The LibSigninDialog implementation. Frontend, ///< Emulator UI Render, ///< Video Core Render_Vulkan, ///< Vulkan backend diff --git a/src/core/libraries/libs.cpp b/src/core/libraries/libs.cpp index 3f5baf640..3826ff793 100644 --- a/src/core/libraries/libs.cpp +++ b/src/core/libraries/libs.cpp @@ -45,6 +45,7 @@ #include "core/libraries/save_data/savedata.h" #include "core/libraries/screenshot/screenshot.h" #include "core/libraries/share_play/shareplay.h" +#include "core/libraries/signin_dialog/signindialog.h" #include "core/libraries/system/commondialog.h" #include "core/libraries/system/msgdialog.h" #include "core/libraries/system/posix.h" @@ -120,6 +121,7 @@ void InitHLELibs(Core::Loader::SymbolsResolver* sym) { Libraries::Hmd::RegisterlibSceHmd(sym); Libraries::DiscMap::RegisterlibSceDiscMap(sym); Libraries::Ulobjmgr::RegisterlibSceUlobjmgr(sym); + Libraries::SigninDialog::RegisterlibSceSigninDialog(sym); } } // namespace Libraries diff --git a/src/core/libraries/signin_dialog/signindialog.cpp b/src/core/libraries/signin_dialog/signindialog.cpp new file mode 100644 index 000000000..0e4eb63a2 --- /dev/null +++ b/src/core/libraries/signin_dialog/signindialog.cpp @@ -0,0 +1,64 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +// Generated By moduleGenerator +#include "common/logging/log.h" +#include "core/libraries/error_codes.h" +#include "core/libraries/libs.h" +#include "signindialog.h" + +namespace Libraries::SigninDialog { + +s32 PS4_SYSV_ABI sceSigninDialogInitialize() { + LOG_ERROR(Lib_SigninDialog, "(STUBBED) called"); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceSigninDialogOpen() { + LOG_ERROR(Lib_SigninDialog, "(STUBBED) called"); + return ORBIS_OK; +} + +Status PS4_SYSV_ABI sceSigninDialogGetStatus() { + LOG_ERROR(Lib_SigninDialog, "(STUBBED) called, return 'finished' status"); + return Status::FINISHED; +} + +Status PS4_SYSV_ABI sceSigninDialogUpdateStatus() { + LOG_ERROR(Lib_SigninDialog, "(STUBBED) called, return 'finished' status"); + return Status::FINISHED; +} + +s32 PS4_SYSV_ABI sceSigninDialogGetResult() { + LOG_ERROR(Lib_SigninDialog, "(STUBBED) called"); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceSigninDialogClose() { + LOG_ERROR(Lib_SigninDialog, "(STUBBED) called"); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceSigninDialogTerminate() { + LOG_ERROR(Lib_SigninDialog, "(STUBBED) called"); + return ORBIS_OK; +} + +void RegisterlibSceSigninDialog(Core::Loader::SymbolsResolver* sym) { + LIB_FUNCTION("mlYGfmqE3fQ", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1, + sceSigninDialogInitialize); + LIB_FUNCTION("JlpJVoRWv7U", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1, + sceSigninDialogOpen); + LIB_FUNCTION("2m077aeC+PA", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1, + sceSigninDialogGetStatus); + LIB_FUNCTION("Bw31liTFT3A", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1, + sceSigninDialogUpdateStatus); + LIB_FUNCTION("nqG7rqnYw1U", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1, + sceSigninDialogGetResult); + LIB_FUNCTION("M3OkENHcyiU", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1, + sceSigninDialogClose); + LIB_FUNCTION("LXlmS6PvJdU", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1, + sceSigninDialogTerminate); +}; + +} // namespace Libraries::SigninDialog diff --git a/src/core/libraries/signin_dialog/signindialog.h b/src/core/libraries/signin_dialog/signindialog.h new file mode 100644 index 000000000..8726ad1f6 --- /dev/null +++ b/src/core/libraries/signin_dialog/signindialog.h @@ -0,0 +1,29 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once +#include "common/types.h" + +namespace Core::Loader { +class SymbolsResolver; +} + +enum class Status : u32 { + NONE = 0, + INITIALIZED = 1, + RUNNING = 2, + FINISHED = 3, +}; + +namespace Libraries::SigninDialog { + +s32 PS4_SYSV_ABI sceSigninDialogInitialize(); +s32 PS4_SYSV_ABI sceSigninDialogOpen(); +Status PS4_SYSV_ABI sceSigninDialogGetStatus(); +Status PS4_SYSV_ABI sceSigninDialogUpdateStatus(); +s32 PS4_SYSV_ABI sceSigninDialogGetResult(); +s32 PS4_SYSV_ABI sceSigninDialogClose(); +s32 PS4_SYSV_ABI sceSigninDialogTerminate(); + +void RegisterlibSceSigninDialog(Core::Loader::SymbolsResolver* sym); +} // namespace Libraries::SigninDialog From 8e7c5a4d995106661524173914af15aeeb11511a Mon Sep 17 00:00:00 2001 From: kalaposfos13 <153381648+kalaposfos13@users.noreply.github.com> Date: Fri, 9 May 2025 17:33:32 +0200 Subject: [PATCH 2/6] Remove deprecated include (#2893) --- src/core/libraries/libc_internal/printf.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/core/libraries/libc_internal/printf.h b/src/core/libraries/libc_internal/printf.h index fe63481a0..9c22e922c 100644 --- a/src/core/libraries/libc_internal/printf.h +++ b/src/core/libraries/libc_internal/printf.h @@ -56,7 +56,6 @@ #include #include -#include #include #include #include From b130fe6ed59277ff66ff8579ce3aa14452f2416c Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Fri, 9 May 2025 08:43:20 -0700 Subject: [PATCH 3/6] vulkan: Handle incompatible depth format using null binding. (#2892) Co-authored-by: kalaposfos13 <153381648+kalaposfos13@users.noreply.github.com> --- src/shader_recompiler/info.h | 5 +++ .../ir/passes/resource_tracking_pass.cpp | 6 +++ src/video_core/amdgpu/resource.h | 13 +++++++ .../renderer_vulkan/vk_rasterizer.cpp | 5 ++- src/video_core/texture_cache/image_view.h | 2 - .../texture_cache/texture_cache.cpp | 37 ++++++++++++------- src/video_core/texture_cache/texture_cache.h | 4 ++ 7 files changed, 55 insertions(+), 17 deletions(-) diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 784f8b4d2..12e48c8e4 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -281,6 +281,11 @@ constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept // Fall back to null image if unbound. return AmdGpu::Image::Null(); } + const auto data_fmt = image.GetDataFmt(); + if (is_depth && data_fmt != AmdGpu::DataFormat::Format16 && + data_fmt != AmdGpu::DataFormat::Format32) { + return AmdGpu::Image::NullDepth(); + } return image; } diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 1de255e4d..cc0bf83d3 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -363,6 +363,12 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& LOG_ERROR(Render_Vulkan, "Shader compiled with unbound image!"); image = AmdGpu::Image::Null(); } + const auto data_fmt = image.GetDataFmt(); + if (inst_info.is_depth && data_fmt != AmdGpu::DataFormat::Format16 && + data_fmt != AmdGpu::DataFormat::Format32) { + LOG_ERROR(Render_Vulkan, "Shader compiled using non-depth image with depth instruction!"); + image = AmdGpu::Image::NullDepth(); + } ASSERT(image.GetType() != AmdGpu::ImageType::Invalid); const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite; diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index c387c7bf2..9060074fb 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -219,6 +219,19 @@ struct Image { return image; } + static constexpr Image NullDepth() { + Image image{}; + image.data_format = u64(DataFormat::Format32); + image.num_format = u64(NumberFormat::Float); + image.dst_sel_x = u64(CompSwizzle::Red); + image.dst_sel_y = u64(CompSwizzle::Green); + image.dst_sel_z = u64(CompSwizzle::Blue); + image.dst_sel_w = u64(CompSwizzle::Alpha); + image.tiling_index = u64(TilingMode::Texture_MicroTiled); + image.type = u64(ImageType::Color2D); + return image; + } + bool Valid() const { return (type & 0x8u) != 0; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 4caa781b9..e7b42a34b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -618,8 +618,9 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin if (instance.IsNullDescriptorSupported()) { image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); } else { - auto& null_image = texture_cache.GetImageView(VideoCore::NULL_IMAGE_VIEW_ID); - image_infos.emplace_back(VK_NULL_HANDLE, *null_image.image_view, + auto& null_image_view = + texture_cache.FindTexture(VideoCore::NULL_IMAGE_ID, desc.view_info); + image_infos.emplace_back(VK_NULL_HANDLE, *null_image_view.image_view, vk::ImageLayout::eGeneral); } } else { diff --git a/src/video_core/texture_cache/image_view.h b/src/video_core/texture_cache/image_view.h index 23c703d23..6a17490bf 100644 --- a/src/video_core/texture_cache/image_view.h +++ b/src/video_core/texture_cache/image_view.h @@ -34,8 +34,6 @@ struct ImageViewInfo { struct Image; -constexpr Common::SlotId NULL_IMAGE_VIEW_ID{0}; - struct ImageView { ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info, Image& image, ImageId image_id); diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 047bb3dfe..82f4d6413 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -8,6 +8,7 @@ #include "common/debug.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/page_manager.h" +#include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/texture_cache/host_compatibility.h" @@ -23,31 +24,41 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& BufferCache& buffer_cache_, PageManager& tracker_) : instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, tracker{tracker_}, tile_manager{instance, scheduler} { + // Create basic null image at fixed image ID. + const auto null_id = GetNullImage(vk::Format::eR8G8B8A8Unorm); + ASSERT(null_id.index == NULL_IMAGE_ID.index); +} + +TextureCache::~TextureCache() = default; + +ImageId TextureCache::GetNullImage(const vk::Format format) { + const auto existing_image = null_images.find(format); + if (existing_image != null_images.end()) { + return existing_image->second; + } + ImageInfo info{}; - info.pixel_format = vk::Format::eR8G8B8A8Unorm; + info.pixel_format = format; info.type = vk::ImageType::e2D; - info.tiling_idx = u32(AmdGpu::TilingMode::Texture_MicroTiled); + info.tiling_idx = static_cast(AmdGpu::TilingMode::Texture_MicroTiled); info.num_bits = 32; info.UpdateSize(); + const ImageId null_id = slot_images.insert(instance, scheduler, info); - ASSERT(null_id.index == NULL_IMAGE_ID.index); auto& img = slot_images[null_id]; + const vk::Image& null_image = img.image; - Vulkan::SetObjectName(instance.GetDevice(), null_image, "Null Image"); + Vulkan::SetObjectName(instance.GetDevice(), null_image, + fmt::format("Null Image ({})", vk::to_string(format))); + img.flags = ImageFlagBits::Empty; img.track_addr = img.info.guest_address; img.track_addr_end = img.info.guest_address + img.info.guest_size; - ImageViewInfo view_info; - const auto null_view_id = - slot_image_views.insert(instance, view_info, slot_images[null_id], null_id); - ASSERT(null_view_id.index == NULL_IMAGE_VIEW_ID.index); - const vk::ImageView& null_image_view = slot_image_views[null_view_id].image_view.get(); - Vulkan::SetObjectName(instance.GetDevice(), null_image_view, "Null Image View"); + null_images.emplace(format, null_id); + return null_id; } -TextureCache::~TextureCache() = default; - void TextureCache::MarkAsMaybeDirty(ImageId image_id, Image& image) { if (image.hash == 0) { // Initialize hash @@ -296,7 +307,7 @@ ImageId TextureCache::FindImage(BaseDesc& desc, FindFlags flags) { const auto& info = desc.info; if (info.guest_address == 0) [[unlikely]] { - return NULL_IMAGE_ID; + return GetNullImage(info.pixel_format); } std::scoped_lock lock{mutex}; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f262768ea..b6bf88958 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -246,6 +246,9 @@ private: } } + /// Gets or creates a null image for a particular format. + ImageId GetNullImage(vk::Format format); + /// Create an image from the given parameters [[nodiscard]] ImageId InsertImage(const ImageInfo& info, VAddr cpu_addr); @@ -285,6 +288,7 @@ private: Common::SlotVector slot_images; Common::SlotVector slot_image_views; tsl::robin_map samplers; + tsl::robin_map null_images; PageTable page_table; std::mutex mutex; From 8d7cbf9943f1b8476bee7bde758b77d0d4d4edff Mon Sep 17 00:00:00 2001 From: Missake212 Date: Fri, 9 May 2025 17:01:34 +0100 Subject: [PATCH 4/6] Adding opcode IMAGE_SAMPLE_B_O (#2894) * Adding opcode IMAGE_SAMPLE_B_O: * fix clang (my first time !) --- src/shader_recompiler/frontend/translate/vector_memory.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index cfc01c58f..5639bc56a 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -143,6 +143,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { case Opcode::IMAGE_SAMPLE_C_LZ: case Opcode::IMAGE_SAMPLE_O: case Opcode::IMAGE_SAMPLE_L_O: + case Opcode::IMAGE_SAMPLE_B_O: case Opcode::IMAGE_SAMPLE_LZ_O: case Opcode::IMAGE_SAMPLE_C_O: case Opcode::IMAGE_SAMPLE_C_LZ_O: From a1439b15cf572a862dfd01dea1dbe71c66b473d7 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Fri, 9 May 2025 10:04:37 -0700 Subject: [PATCH 5/6] gnm: Implement sceGnmDrawIndexIndirectMulti (#2889) --- src/core/libraries/gnmdriver/gnmdriver.cpp | 38 +++++++++++++++---- src/core/libraries/gnmdriver/gnmdriver.h | 4 +- src/video_core/amdgpu/liverpool.cpp | 37 ++++++++++++++---- src/video_core/amdgpu/pm4_cmds.h | 26 +++++++++++-- .../renderer_vulkan/vk_instance.cpp | 1 + 5 files changed, 86 insertions(+), 20 deletions(-) diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 25ac4921c..f2f40e0e3 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -505,9 +505,10 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti(u32* cmdbuf, u32 size, u32 da u32 flags) { LOG_TRACE(Lib_GnmDriver, "called"); - if ((!sceKernelIsNeoMode() || !UseNeoCompatSequences) && !cmdbuf && (size == 16) && - (shader_stage < ShaderStages::Max) && (vertex_sgpr_offset < 0x10u) && - (instance_sgpr_offset < 0x10u)) { + if ((!sceKernelIsNeoMode() || !UseNeoCompatSequences) && cmdbuf && (size == 16) && + (vertex_sgpr_offset < 0x10u) && (instance_sgpr_offset < 0x10u) && + (shader_stage == ShaderStages::Vs || shader_stage == ShaderStages::Es || + shader_stage == ShaderStages::Ls)) { cmdbuf = WriteHeader(cmdbuf, 2); cmdbuf = WriteBody(cmdbuf, 0u); @@ -535,10 +536,33 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti(u32* cmdbuf, u32 size, u32 da return -1; } -int PS4_SYSV_ABI sceGnmDrawIndexIndirectMulti() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); - UNREACHABLE(); - return ORBIS_OK; +int PS4_SYSV_ABI sceGnmDrawIndexIndirectMulti(u32* cmdbuf, u32 size, u32 data_offset, u32 max_count, + u32 shader_stage, u32 vertex_sgpr_offset, + u32 instance_sgpr_offset, u32 flags) { + LOG_TRACE(Lib_GnmDriver, "called"); + + if (cmdbuf && (size == 11) && (vertex_sgpr_offset < 0x10u) && (instance_sgpr_offset < 0x10u) && + (shader_stage == ShaderStages::Vs || shader_stage == ShaderStages::Es || + shader_stage == ShaderStages::Ls)) { + + const auto predicate = flags & 1 ? PM4Predicate::PredEnable : PM4Predicate::PredDisable; + cmdbuf = WriteHeader( + cmdbuf, 6, PM4ShaderType::ShaderGraphics, predicate); + + const auto sgpr_offset = indirect_sgpr_offsets[shader_stage]; + + cmdbuf[0] = data_offset; + cmdbuf[1] = vertex_sgpr_offset == 0 ? 0 : (vertex_sgpr_offset & 0xffffu) + sgpr_offset; + cmdbuf[2] = instance_sgpr_offset == 0 ? 0 : (instance_sgpr_offset & 0xffffu) + sgpr_offset; + cmdbuf[3] = max_count; + cmdbuf[4] = sizeof(DrawIndexedIndirectArgs); + cmdbuf[5] = sceKernelIsNeoMode() ? flags & 0xe0000000u : 0; + + cmdbuf += 6; + WriteTrailingNop<3>(cmdbuf); + return ORBIS_OK; + } + return -1; } int PS4_SYSV_ABI sceGnmDrawIndexMultiInstanced() { diff --git a/src/core/libraries/gnmdriver/gnmdriver.h b/src/core/libraries/gnmdriver/gnmdriver.h index 94d06c85f..a3d4968d3 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.h +++ b/src/core/libraries/gnmdriver/gnmdriver.h @@ -51,7 +51,9 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti(u32* cmdbuf, u32 size, u32 da u32 max_count, u64 count_addr, u32 shader_stage, u32 vertex_sgpr_offset, u32 instance_sgpr_offset, u32 flags); -int PS4_SYSV_ABI sceGnmDrawIndexIndirectMulti(); +int PS4_SYSV_ABI sceGnmDrawIndexIndirectMulti(u32* cmdbuf, u32 size, u32 data_offset, u32 max_count, + u32 shader_stage, u32 vertex_sgpr_offset, + u32 instance_sgpr_offset, u32 flags); int PS4_SYSV_ABI sceGnmDrawIndexMultiInstanced(); s32 PS4_SYSV_ABI sceGnmDrawIndexOffset(u32* cmdbuf, u32 size, u32 index_offset, u32 index_count, u32 flags); diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 967b952c6..4c8e3367a 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -455,14 +455,14 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); const auto offset = draw_indirect->data_offset; - const auto size = sizeof(DrawIndirectArgs); + const auto stride = sizeof(DrawIndirectArgs); if (DebugState.DumpingCurrentReg()) { DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs); } if (rasterizer) { const auto cmd_address = reinterpret_cast(header); rasterizer->ScopeMarkerBegin(fmt::format("gfx:{}:DrawIndirect", cmd_address)); - rasterizer->DrawIndirect(false, indirect_args_addr, offset, size, 1, 0); + rasterizer->DrawIndirect(false, indirect_args_addr, offset, stride, 1, 0); rasterizer->ScopeMarkerEnd(); } break; @@ -471,7 +471,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); const auto offset = draw_index_indirect->data_offset; - const auto size = sizeof(DrawIndexedIndirectArgs); + const auto stride = sizeof(DrawIndexedIndirectArgs); if (DebugState.DumpingCurrentReg()) { DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs); } @@ -479,25 +479,46 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); rasterizer->ScopeMarkerBegin( fmt::format("gfx:{}:DrawIndexIndirect", cmd_address)); - rasterizer->DrawIndirect(true, indirect_args_addr, offset, size, 1, 0); + rasterizer->DrawIndirect(true, indirect_args_addr, offset, stride, 1, 0); rasterizer->ScopeMarkerEnd(); } break; } - case PM4ItOpcode::DrawIndexIndirectCountMulti: { + case PM4ItOpcode::DrawIndexIndirectMulti: { const auto* draw_index_indirect = reinterpret_cast(header); const auto offset = draw_index_indirect->data_offset; if (DebugState.DumpingCurrentReg()) { DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs); } + if (rasterizer) { + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin( + fmt::format("gfx:{}:DrawIndexIndirectMulti", cmd_address)); + rasterizer->DrawIndirect(true, indirect_args_addr, offset, + draw_index_indirect->stride, + draw_index_indirect->count, 0); + rasterizer->ScopeMarkerEnd(); + } + break; + } + case PM4ItOpcode::DrawIndexIndirectCountMulti: { + const auto* draw_index_indirect = + reinterpret_cast(header); + const auto offset = draw_index_indirect->data_offset; + if (DebugState.DumpingCurrentReg()) { + DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs); + } if (rasterizer) { const auto cmd_address = reinterpret_cast(header); rasterizer->ScopeMarkerBegin( fmt::format("gfx:{}:DrawIndexIndirectCountMulti", cmd_address)); - rasterizer->DrawIndirect( - true, indirect_args_addr, offset, draw_index_indirect->stride, - draw_index_indirect->count, draw_index_indirect->countAddr); + rasterizer->DrawIndirect(true, indirect_args_addr, offset, + draw_index_indirect->stride, + draw_index_indirect->count, + draw_index_indirect->count_indirect_enable.Value() + ? draw_index_indirect->count_addr + : 0); rasterizer->ScopeMarkerEnd(); } break; diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index ae1d32e00..6b55f5b65 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -860,6 +860,24 @@ struct PM4CmdDrawIndexIndirect { }; struct PM4CmdDrawIndexIndirectMulti { + PM4Type3Header header; ///< header + u32 data_offset; ///< Byte aligned offset where the required data structure starts + union { + u32 dw2; + BitField<0, 16, u32> base_vtx_loc; ///< Offset where the CP will write the + ///< BaseVertexLocation it fetched from memory + }; + union { + u32 dw3; + BitField<0, 16, u32> start_inst_loc; ///< Offset where the CP will write the + ///< StartInstanceLocation it fetched from memory + }; + u32 count; ///< Count of data structures to loop through before going to next packet + u32 stride; ///< Stride in memory from one data structure to the next + u32 draw_initiator; ///< Draw Initiator Register +}; + +struct PM4CmdDrawIndexIndirectCountMulti { PM4Type3Header header; ///< header u32 data_offset; ///< Byte aligned offset where the required data structure starts union { @@ -874,14 +892,14 @@ struct PM4CmdDrawIndexIndirectMulti { }; union { u32 dw4; - BitField<0, 16, u32> drawIndexLoc; ///< register offset to write the Draw Index count + BitField<0, 16, u32> draw_index_loc; ///< register offset to write the Draw Index count BitField<30, 1, u32> - countIndirectEnable; ///< Indicates the data structure count is in memory + count_indirect_enable; ///< Indicates the data structure count is in memory BitField<31, 1, u32> - drawIndexEnable; ///< Enables writing of Draw Index count to DRAW_INDEX_LOC + draw_index_enable; ///< Enables writing of Draw Index count to DRAW_INDEX_LOC }; u32 count; ///< Count of data structures to loop through before going to next packet - u64 countAddr; ///< DWord aligned Address[31:2]; Valid if countIndirectEnable is set + u64 count_addr; ///< DWord aligned Address[31:2]; Valid if countIndirectEnable is set u32 stride; ///< Stride in memory from one data structure to the next u32 draw_initiator; ///< Draw Initiator Register }; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 99f225d79..1004d850f 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -338,6 +338,7 @@ bool Instance::CreateDevice() { .geometryShader = features.geometryShader, .tessellationShader = features.tessellationShader, .logicOp = features.logicOp, + .multiDrawIndirect = features.multiDrawIndirect, .depthBiasClamp = features.depthBiasClamp, .fillModeNonSolid = features.fillModeNonSolid, .depthBounds = features.depthBounds, From 6477dc4f1e699981919022ac69fef59813a9ad94 Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Fri, 9 May 2025 14:33:04 -0500 Subject: [PATCH 6/6] Core: Memory Fixes (#2872) * Fix VirtualQuery behavior on low addresses. * Fix VirtualQuery struct Somewhere in our BitField and array use, the size of our VirtualQuery struct became larger than the struct used on real hardware. Fixing this fixes some data corruption visible in the name parameter during my tests. * Default name to anon On real hardware, nameless mappings are given the name "anon:address" where address appears to be the address that made the memory call. For simplicity sake, I'll stick to the name "anon" for now. * Place an upper bound on returns from SearchFree Right now, this upper bound is set based on the limitations of our GPU buffer cache and page table. Someone with more experience in that area of code should probably fix that at some point. * More anons * Clang * Fix name in sceKernelMapNamedDirectMemory * strncpy instead of strcpy Hardcoded the constant size for now, I need to review how real hardware behaves here to determine if anything else is necessary for this to be accurate. * Fix name behavior All memory naming functions restrict the name size to a 31 character limit, and return `ORBIS_KERNEL_ERROR_ENAMETOOLONG` if that limit is exceeded. Since this value is constant for all functions involving names, I've defined it as a constant in kernel's memory.h, and used that in place of any hardcoded 32 character limits. * Error logging Hopefully this helps in catching the UFC regression? * Increase address space upper bound Probably needs heavy testing, especially on Mac/Windows. This increases the address space, as needed to accommodate strange memory behaviors seen in UFC. * VirtualQuery fix Due to limitations of certain platforms, we initialize our vma_map with 3 separate free mappings. As such, we need to use a while loop here to accurately query mappings with high addresses * Fix mappings to high addresses The PS4's GPU can only handle 40bit addresses. Our texture cache and buffer cache were designed around these limits, and mapping to higher addresses would cause segmentation faults and access violations. To fix these crashes, only map to the GPU if the mapping is fully contained within the address space the GPU should access. I'm open to suggestions on how to make this cleaner * Revert "Increase address space upper bound" This reverts commit 3d50eeeebb6aa40e38d6f87e6480235c917843f3. * Revert VirtualQuery while loop Windows wasn't happy with this, again. Will try to debug and properly fix this when I have a good chance. * Fix asserts FindVMA, due to the way it's programmed, never actually returns vma_map.end(), the furthest it ever returns is the last valid memory area. All those asserts we involving vma_map.end() never actually trigger due to this. This commit removes redundant asserts, adds messages to asserts that were lacking them, and fixes all asserts designed to detect out of bounds memory accesses so they actually trigger. I've also fixed some potential memory safety issues. * Proper error behavior in QueryProtection Might as well handle this properly while I'm here. * Clang * More information about ReserveVirtualRange results Should help debug issues like the one in The Order: 1886 (CUSA00076) * Fix assert message * Update assert message Extra space * Fix my bug Oh hey, finally something that's my fault. * Fix rasterizer unmaps Should use adjusted_size here, otherwise we could unmap too much. Thanks to diegolix29 for spotting this. * Fix edge case in MapMemory Code comments explain everything. This should fix some memory asserts. * Fix fix Avoid running the code path if it's unnecessary, since there are many additional edge cases to handle when the VMA map is small. * Fix fix fix Should prevent infinite loops, haven't tested properly yet though. * Split logging for inputs and out_addr in ReserveVirtualRange Addresses review comments. --- src/core/libraries/kernel/memory.cpp | 54 ++++---- src/core/libraries/kernel/memory.h | 16 +-- src/core/memory.cpp | 177 ++++++++++++++++++++------- src/core/memory.h | 8 +- 4 files changed, 179 insertions(+), 76 deletions(-) diff --git a/src/core/libraries/kernel/memory.cpp b/src/core/libraries/kernel/memory.cpp index 8a0c91479..495ddc52f 100644 --- a/src/core/libraries/kernel/memory.cpp +++ b/src/core/libraries/kernel/memory.cpp @@ -126,9 +126,6 @@ s32 PS4_SYSV_ABI sceKernelAvailableDirectMemorySize(u64 searchStart, u64 searchE s32 PS4_SYSV_ABI sceKernelVirtualQuery(const void* addr, int flags, OrbisVirtualQueryInfo* info, size_t infoSize) { LOG_INFO(Kernel_Vmm, "called addr = {}, flags = {:#x}", fmt::ptr(addr), flags); - if (!addr) { - return ORBIS_KERNEL_ERROR_EACCES; - } auto* memory = Core::Memory::Instance(); return memory->VirtualQuery(std::bit_cast(addr), flags, info); } @@ -136,7 +133,6 @@ s32 PS4_SYSV_ABI sceKernelVirtualQuery(const void* addr, int flags, OrbisVirtual s32 PS4_SYSV_ABI sceKernelReserveVirtualRange(void** addr, u64 len, int flags, u64 alignment) { LOG_INFO(Kernel_Vmm, "addr = {}, len = {:#x}, flags = {:#x}, alignment = {:#x}", fmt::ptr(*addr), len, flags, alignment); - if (addr == nullptr) { LOG_ERROR(Kernel_Vmm, "Address is invalid!"); return ORBIS_KERNEL_ERROR_EINVAL; @@ -155,9 +151,12 @@ s32 PS4_SYSV_ABI sceKernelReserveVirtualRange(void** addr, u64 len, int flags, u auto* memory = Core::Memory::Instance(); const VAddr in_addr = reinterpret_cast(*addr); const auto map_flags = static_cast(flags); - memory->Reserve(addr, in_addr, len, map_flags, alignment); - return ORBIS_OK; + s32 result = memory->Reserve(addr, in_addr, len, map_flags, alignment); + if (result == 0) { + LOG_INFO(Kernel_Vmm, "out_addr = {}", fmt::ptr(*addr)); + } + return result; } int PS4_SYSV_ABI sceKernelMapNamedDirectMemory(void** addr, u64 len, int prot, int flags, @@ -172,10 +171,12 @@ int PS4_SYSV_ABI sceKernelMapNamedDirectMemory(void** addr, u64 len, int prot, i LOG_ERROR(Kernel_Vmm, "Map size is either zero or not 16KB aligned!"); return ORBIS_KERNEL_ERROR_EINVAL; } + if (!Common::Is16KBAligned(directMemoryStart)) { LOG_ERROR(Kernel_Vmm, "Start address is not 16KB aligned!"); return ORBIS_KERNEL_ERROR_EINVAL; } + if (alignment != 0) { if ((!std::has_single_bit(alignment) && !Common::Is16KBAligned(alignment))) { LOG_ERROR(Kernel_Vmm, "Alignment value is invalid!"); @@ -183,14 +184,19 @@ int PS4_SYSV_ABI sceKernelMapNamedDirectMemory(void** addr, u64 len, int prot, i } } + if (std::strlen(name) >= ORBIS_KERNEL_MAXIMUM_NAME_LENGTH) { + LOG_ERROR(Kernel_Vmm, "name exceeds 32 bytes!"); + return ORBIS_KERNEL_ERROR_ENAMETOOLONG; + } + const VAddr in_addr = reinterpret_cast(*addr); const auto mem_prot = static_cast(prot); const auto map_flags = static_cast(flags); auto* memory = Core::Memory::Instance(); const auto ret = - memory->MapMemory(addr, in_addr, len, mem_prot, map_flags, Core::VMAType::Direct, "", false, - directMemoryStart, alignment); + memory->MapMemory(addr, in_addr, len, mem_prot, map_flags, Core::VMAType::Direct, name, + false, directMemoryStart, alignment); LOG_INFO(Kernel_Vmm, "out_addr = {}", fmt::ptr(*addr)); return ret; @@ -199,7 +205,8 @@ int PS4_SYSV_ABI sceKernelMapNamedDirectMemory(void** addr, u64 len, int prot, i int PS4_SYSV_ABI sceKernelMapDirectMemory(void** addr, u64 len, int prot, int flags, s64 directMemoryStart, u64 alignment) { LOG_INFO(Kernel_Vmm, "called, redirected to sceKernelMapNamedDirectMemory"); - return sceKernelMapNamedDirectMemory(addr, len, prot, flags, directMemoryStart, alignment, ""); + return sceKernelMapNamedDirectMemory(addr, len, prot, flags, directMemoryStart, alignment, + "anon"); } s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t len, int prot, @@ -210,17 +217,16 @@ s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t return ORBIS_KERNEL_ERROR_EINVAL; } - static constexpr size_t MaxNameSize = 32; - if (std::strlen(name) > MaxNameSize) { - LOG_ERROR(Kernel_Vmm, "name exceeds 32 bytes!"); - return ORBIS_KERNEL_ERROR_ENAMETOOLONG; - } - if (name == nullptr) { LOG_ERROR(Kernel_Vmm, "name is invalid!"); return ORBIS_KERNEL_ERROR_EFAULT; } + if (std::strlen(name) >= ORBIS_KERNEL_MAXIMUM_NAME_LENGTH) { + LOG_ERROR(Kernel_Vmm, "name exceeds 32 bytes!"); + return ORBIS_KERNEL_ERROR_ENAMETOOLONG; + } + const VAddr in_addr = reinterpret_cast(*addr_in_out); const auto mem_prot = static_cast(prot); const auto map_flags = static_cast(flags); @@ -236,7 +242,7 @@ s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len, int prot, int flags) { - return sceKernelMapNamedFlexibleMemory(addr_in_out, len, prot, flags, ""); + return sceKernelMapNamedFlexibleMemory(addr_in_out, len, prot, flags, "anon"); } int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot) { @@ -304,7 +310,7 @@ s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEn case MemoryOpTypes::ORBIS_KERNEL_MAP_OP_MAP_DIRECT: { result = sceKernelMapNamedDirectMemory(&entries[i].start, entries[i].length, entries[i].protection, flags, - static_cast(entries[i].offset), 0, ""); + static_cast(entries[i].offset), 0, "anon"); LOG_INFO(Kernel_Vmm, "entry = {}, operation = {}, len = {:#x}, offset = {:#x}, type = {}, " "result = {}", @@ -326,7 +332,7 @@ s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEn } case MemoryOpTypes::ORBIS_KERNEL_MAP_OP_MAP_FLEXIBLE: { result = sceKernelMapNamedFlexibleMemory(&entries[i].start, entries[i].length, - entries[i].protection, flags, ""); + entries[i].protection, flags, "anon"); LOG_INFO(Kernel_Vmm, "entry = {}, operation = {}, len = {:#x}, type = {}, " "result = {}", @@ -356,16 +362,16 @@ s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEn } s32 PS4_SYSV_ABI sceKernelSetVirtualRangeName(const void* addr, size_t len, const char* name) { - static constexpr size_t MaxNameSize = 32; - if (std::strlen(name) > MaxNameSize) { - LOG_ERROR(Kernel_Vmm, "name exceeds 32 bytes!"); - return ORBIS_KERNEL_ERROR_ENAMETOOLONG; - } - if (name == nullptr) { LOG_ERROR(Kernel_Vmm, "name is invalid!"); return ORBIS_KERNEL_ERROR_EFAULT; } + + if (std::strlen(name) >= ORBIS_KERNEL_MAXIMUM_NAME_LENGTH) { + LOG_ERROR(Kernel_Vmm, "name exceeds 32 bytes!"); + return ORBIS_KERNEL_ERROR_ENAMETOOLONG; + } + auto* memory = Core::Memory::Instance(); memory->NameVirtualRange(std::bit_cast(addr), len, name); return ORBIS_OK; diff --git a/src/core/libraries/kernel/memory.h b/src/core/libraries/kernel/memory.h index 400b6c3fc..6acb559d1 100644 --- a/src/core/libraries/kernel/memory.h +++ b/src/core/libraries/kernel/memory.h @@ -47,6 +47,8 @@ enum MemoryOpTypes : u32 { ORBIS_KERNEL_MAP_OP_TYPE_PROTECT = 4 }; +constexpr u32 ORBIS_KERNEL_MAXIMUM_NAME_LENGTH = 32; + struct OrbisQueryInfo { uintptr_t start; uintptr_t end; @@ -59,14 +61,12 @@ struct OrbisVirtualQueryInfo { size_t offset; s32 protection; s32 memory_type; - union { - BitField<0, 1, u32> is_flexible; - BitField<1, 1, u32> is_direct; - BitField<2, 1, u32> is_stack; - BitField<3, 1, u32> is_pooled; - BitField<4, 1, u32> is_committed; - }; - std::array name; + u32 is_flexible : 1; + u32 is_direct : 1; + u32 is_stack : 1; + u32 is_pooled : 1; + u32 is_committed : 1; + char name[ORBIS_KERNEL_MAXIMUM_NAME_LENGTH]; }; struct OrbisKernelBatchMapEntry { diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 494ffa70c..9861e813a 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -75,7 +75,8 @@ u64 MemoryManager::ClampRangeSize(VAddr virtual_addr, u64 size) { // Clamp size to the remaining size of the current VMA. auto vma = FindVMA(virtual_addr); - ASSERT_MSG(vma != vma_map.end(), "Attempted to access invalid GPU address {:#x}", virtual_addr); + ASSERT_MSG(vma->second.Contains(virtual_addr, 0), + "Attempted to access invalid GPU address {:#x}", virtual_addr); u64 clamped_size = vma->second.base + vma->second.size - virtual_addr; ++vma; @@ -96,6 +97,8 @@ u64 MemoryManager::ClampRangeSize(VAddr virtual_addr, u64 size) { bool MemoryManager::TryWriteBacking(void* address, const void* data, u32 num_bytes) { const VAddr virtual_addr = std::bit_cast(address); const auto& vma = FindVMA(virtual_addr)->second; + ASSERT_MSG(vma.Contains(virtual_addr, 0), + "Attempting to access out of bounds memory at address {:#x}", virtual_addr); if (vma.type != VMAType::Direct) { return false; } @@ -145,10 +148,12 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size, auto mapping_end = mapping_start + size; // Find the first free, large enough dmem area in the range. - while ((!dmem_area->second.is_free || dmem_area->second.GetEnd() < mapping_end) && - dmem_area != dmem_map.end()) { + while (!dmem_area->second.is_free || dmem_area->second.GetEnd() < mapping_end) { // The current dmem_area isn't suitable, move to the next one. dmem_area++; + if (dmem_area == dmem_map.end()) { + break; + } // Update local variables based on the new dmem_area mapping_start = Common::AlignUp(dmem_area->second.base, alignment); @@ -172,7 +177,6 @@ void MemoryManager::Free(PAddr phys_addr, size_t size) { std::scoped_lock lk{mutex}; auto dmem_area = CarveDmemArea(phys_addr, size); - ASSERT(dmem_area != dmem_map.end() && dmem_area->second.size >= size); // Release any dmem mappings that reference this physical block. std::vector> remove_list; @@ -216,12 +220,18 @@ int MemoryManager::PoolReserve(void** out_addr, VAddr virtual_addr, size_t size, vma = FindVMA(mapped_addr)->second; } const size_t remaining_size = vma.base + vma.size - mapped_addr; - ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size); + ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size, + "Memory region {:#x} to {:#x} is not large enough to reserve {:#x} to {:#x}", + vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size); } // Find the first free area starting with provided virtual address. if (False(flags & MemoryMapFlags::Fixed)) { mapped_addr = SearchFree(mapped_addr, size, alignment); + if (mapped_addr == -1) { + // No suitable memory areas to map to + return ORBIS_KERNEL_ERROR_ENOMEM; + } } // Add virtual memory area @@ -229,7 +239,7 @@ int MemoryManager::PoolReserve(void** out_addr, VAddr virtual_addr, size_t size, auto& new_vma = new_vma_handle->second; new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce); new_vma.prot = MemoryProt::NoAccess; - new_vma.name = ""; + new_vma.name = "anon"; new_vma.type = VMAType::PoolReserved; MergeAdjacent(vma_map, new_vma_handle); @@ -247,19 +257,25 @@ int MemoryManager::Reserve(void** out_addr, VAddr virtual_addr, size_t size, Mem // Fixed mapping means the virtual address must exactly match the provided one. if (True(flags & MemoryMapFlags::Fixed)) { - auto& vma = FindVMA(mapped_addr)->second; + auto vma = FindVMA(mapped_addr)->second; // If the VMA is mapped, unmap the region first. if (vma.IsMapped()) { UnmapMemoryImpl(mapped_addr, size); vma = FindVMA(mapped_addr)->second; } const size_t remaining_size = vma.base + vma.size - mapped_addr; - ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size); + ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size, + "Memory region {:#x} to {:#x} is not large enough to reserve {:#x} to {:#x}", + vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size); } // Find the first free area starting with provided virtual address. if (False(flags & MemoryMapFlags::Fixed)) { mapped_addr = SearchFree(mapped_addr, size, alignment); + if (mapped_addr == -1) { + // No suitable memory areas to map to + return ORBIS_KERNEL_ERROR_ENOMEM; + } } // Add virtual memory area @@ -267,7 +283,7 @@ int MemoryManager::Reserve(void** out_addr, VAddr virtual_addr, size_t size, Mem auto& new_vma = new_vma_handle->second; new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce); new_vma.prot = MemoryProt::NoAccess; - new_vma.name = ""; + new_vma.name = "anon"; new_vma.type = VMAType::Reserved; MergeAdjacent(vma_map, new_vma_handle); @@ -288,7 +304,9 @@ int MemoryManager::PoolCommit(VAddr virtual_addr, size_t size, MemoryProt prot) // This should return SCE_KERNEL_ERROR_ENOMEM but shouldn't normally happen. const auto& vma = FindVMA(mapped_addr)->second; const size_t remaining_size = vma.base + vma.size - mapped_addr; - ASSERT_MSG(!vma.IsMapped() && remaining_size >= size); + ASSERT_MSG(!vma.IsMapped() && remaining_size >= size, + "Memory region {:#x} to {:#x} isn't free enough to map region {:#x} to {:#x}", + vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size); // Perform the mapping. void* out_addr = impl.Map(mapped_addr, size, alignment, -1, false); @@ -302,7 +320,10 @@ int MemoryManager::PoolCommit(VAddr virtual_addr, size_t size, MemoryProt prot) new_vma.is_exec = false; new_vma.phys_base = 0; - rasterizer->MapMemory(mapped_addr, size); + if (IsValidGpuMapping(mapped_addr, size)) { + rasterizer->MapMemory(mapped_addr, size); + } + return ORBIS_OK; } @@ -325,15 +346,34 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M // Fixed mapping means the virtual address must exactly match the provided one. if (True(flags & MemoryMapFlags::Fixed)) { - // This should return SCE_KERNEL_ERROR_ENOMEM but shouldn't normally happen. - const auto& vma = FindVMA(mapped_addr)->second; - const size_t remaining_size = vma.base + vma.size - mapped_addr; - ASSERT_MSG(!vma.IsMapped() && remaining_size >= size); + auto vma = FindVMA(mapped_addr)->second; + size_t remaining_size = vma.base + vma.size - mapped_addr; + // There's a possible edge case where we're mapping to a partially reserved range. + // To account for this, unmap any reserved areas within this mapping range first. + auto unmap_addr = mapped_addr; + auto unmap_size = size; + while (!vma.IsMapped() && unmap_addr < mapped_addr + size && remaining_size < size) { + auto unmapped = UnmapBytesFromEntry(unmap_addr, vma, unmap_size); + unmap_addr += unmapped; + unmap_size -= unmapped; + vma = FindVMA(unmap_addr)->second; + } + + // This should return SCE_KERNEL_ERROR_ENOMEM but rarely happens. + vma = FindVMA(mapped_addr)->second; + remaining_size = vma.base + vma.size - mapped_addr; + ASSERT_MSG(!vma.IsMapped() && remaining_size >= size, + "Memory region {:#x} to {:#x} isn't free enough to map region {:#x} to {:#x}", + vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size); } // Find the first free area starting with provided virtual address. if (False(flags & MemoryMapFlags::Fixed)) { mapped_addr = SearchFree(mapped_addr, size, alignment); + if (mapped_addr == -1) { + // No suitable memory areas to map to + return ORBIS_KERNEL_ERROR_ENOMEM; + } } // Perform the mapping. @@ -353,7 +393,10 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M if (type == VMAType::Flexible) { flexible_usage += size; } - rasterizer->MapMemory(mapped_addr, size); + + if (IsValidGpuMapping(mapped_addr, size)) { + rasterizer->MapMemory(mapped_addr, size); + } return ORBIS_OK; } @@ -366,12 +409,18 @@ int MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, size_t size, Mem // Find first free area to map the file. if (False(flags & MemoryMapFlags::Fixed)) { mapped_addr = SearchFree(mapped_addr, size_aligned, 1); + if (mapped_addr == -1) { + // No suitable memory areas to map to + return ORBIS_KERNEL_ERROR_ENOMEM; + } } if (True(flags & MemoryMapFlags::Fixed)) { const auto& vma = FindVMA(virtual_addr)->second; const size_t remaining_size = vma.base + vma.size - virtual_addr; - ASSERT_MSG(!vma.IsMapped() && remaining_size >= size); + ASSERT_MSG(!vma.IsMapped() && remaining_size >= size, + "Memory region {:#x} to {:#x} isn't free enough to map region {:#x} to {:#x}", + vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size); } // Map the file. @@ -404,7 +453,9 @@ void MemoryManager::PoolDecommit(VAddr virtual_addr, size_t size) { const auto start_in_vma = virtual_addr - vma_base_addr; const auto type = vma_base.type; - rasterizer->UnmapMemory(virtual_addr, size); + if (IsValidGpuMapping(virtual_addr, size)) { + rasterizer->UnmapMemory(virtual_addr, size); + } // Mark region as free and attempt to coalesce it with neighbours. const auto new_it = CarveVMA(virtual_addr, size); @@ -444,7 +495,10 @@ u64 MemoryManager::UnmapBytesFromEntry(VAddr virtual_addr, VirtualMemoryArea vma if (type == VMAType::Flexible) { flexible_usage -= adjusted_size; } - rasterizer->UnmapMemory(virtual_addr, adjusted_size); + + if (IsValidGpuMapping(virtual_addr, adjusted_size)) { + rasterizer->UnmapMemory(virtual_addr, adjusted_size); + } // Mark region as free and attempt to coalesce it with neighbours. const auto new_it = CarveVMA(virtual_addr, adjusted_size); @@ -471,6 +525,8 @@ s32 MemoryManager::UnmapMemoryImpl(VAddr virtual_addr, u64 size) { do { auto it = FindVMA(virtual_addr + unmapped_bytes); auto& vma_base = it->second; + ASSERT_MSG(vma_base.Contains(virtual_addr + unmapped_bytes, 0), + "Address {:#x} is out of bounds", virtual_addr + unmapped_bytes); auto unmapped = UnmapBytesFromEntry(virtual_addr + unmapped_bytes, vma_base, size - unmapped_bytes); ASSERT_MSG(unmapped > 0, "Failed to unmap memory, progress is impossible"); @@ -485,7 +541,10 @@ int MemoryManager::QueryProtection(VAddr addr, void** start, void** end, u32* pr const auto it = FindVMA(addr); const auto& vma = it->second; - ASSERT_MSG(vma.type != VMAType::Free, "Provided address is not mapped"); + if (!vma.Contains(addr, 0) || vma.IsFree()) { + LOG_ERROR(Kernel_Vmm, "Address {:#x} is not mapped", addr); + return ORBIS_KERNEL_ERROR_EACCES; + } if (start != nullptr) { *start = reinterpret_cast(vma.base); @@ -555,6 +614,8 @@ s32 MemoryManager::Protect(VAddr addr, size_t size, MemoryProt prot) { do { auto it = FindVMA(addr + protected_bytes); auto& vma_base = it->second; + ASSERT_MSG(vma_base.Contains(addr + protected_bytes, 0), "Address {:#x} is out of bounds", + addr + protected_bytes); auto result = 0; result = ProtectBytes(addr + protected_bytes, vma_base, size - protected_bytes, prot); if (result < 0) { @@ -571,8 +632,16 @@ int MemoryManager::VirtualQuery(VAddr addr, int flags, ::Libraries::Kernel::OrbisVirtualQueryInfo* info) { std::scoped_lock lk{mutex}; - auto it = FindVMA(addr); - if (it->second.type == VMAType::Free && flags == 1) { + // FindVMA on addresses before the vma_map return garbage data. + auto query_addr = + addr < impl.SystemManagedVirtualBase() ? impl.SystemManagedVirtualBase() : addr; + if (addr < query_addr && flags == 0) { + LOG_WARNING(Kernel_Vmm, "VirtualQuery on free memory region"); + return ORBIS_KERNEL_ERROR_EACCES; + } + auto it = FindVMA(query_addr); + + while (it->second.type == VMAType::Free && flags == 1 && it != --vma_map.end()) { ++it; } if (it->second.type == VMAType::Free) { @@ -585,15 +654,17 @@ int MemoryManager::VirtualQuery(VAddr addr, int flags, info->end = vma.base + vma.size; info->offset = vma.phys_base; info->protection = static_cast(vma.prot); - info->is_flexible.Assign(vma.type == VMAType::Flexible); - info->is_direct.Assign(vma.type == VMAType::Direct); - info->is_stack.Assign(vma.type == VMAType::Stack); - info->is_pooled.Assign(vma.type == VMAType::PoolReserved || vma.type == VMAType::Pooled); - info->is_committed.Assign(vma.IsMapped()); - vma.name.copy(info->name.data(), std::min(info->name.size(), vma.name.size())); + info->is_flexible = vma.type == VMAType::Flexible ? 1 : 0; + info->is_direct = vma.type == VMAType::Direct ? 1 : 0; + info->is_stack = vma.type == VMAType::Stack ? 1 : 0; + info->is_pooled = vma.type == VMAType::PoolReserved || vma.type == VMAType::Pooled ? 1 : 0; + info->is_committed = vma.IsMapped() ? 1 : 0; + + strncpy(info->name, vma.name.data(), ::Libraries::Kernel::ORBIS_KERNEL_MAXIMUM_NAME_LENGTH); + if (vma.type == VMAType::Direct) { const auto dmem_it = FindDmemArea(vma.phys_base); - ASSERT(dmem_it != dmem_map.end()); + ASSERT_MSG(vma.phys_base <= dmem_it->second.GetEnd(), "vma.phys_base is not in dmem_map!"); info->memory_type = dmem_it->second.memory_type; } else { info->memory_type = ::Libraries::Kernel::SCE_KERNEL_WB_ONION; @@ -607,11 +678,11 @@ int MemoryManager::DirectMemoryQuery(PAddr addr, bool find_next, std::scoped_lock lk{mutex}; auto dmem_area = FindDmemArea(addr); - while (dmem_area != dmem_map.end() && dmem_area->second.is_free && find_next) { + while (dmem_area != --dmem_map.end() && dmem_area->second.is_free && find_next) { dmem_area++; } - if (dmem_area == dmem_map.end() || dmem_area->second.is_free) { + if (dmem_area->second.is_free) { LOG_ERROR(Core, "Unable to find allocated direct memory region to query!"); return ORBIS_KERNEL_ERROR_EACCES; } @@ -691,36 +762,56 @@ VAddr MemoryManager::SearchFree(VAddr virtual_addr, size_t size, u32 alignment) virtual_addr = min_search_address; } + // If the requested address is beyond the maximum our code can handle, throw an assert + auto max_search_address = impl.UserVirtualBase() + impl.UserVirtualSize(); + ASSERT_MSG(virtual_addr <= max_search_address, "Input address {:#x} is out of bounds", + virtual_addr); + auto it = FindVMA(virtual_addr); - ASSERT_MSG(it != vma_map.end(), "Specified mapping address was not found!"); // If the VMA is free and contains the requested mapping we are done. if (it->second.IsFree() && it->second.Contains(virtual_addr, size)) { return virtual_addr; } + // Search for the first free VMA that fits our mapping. - const auto is_suitable = [&] { + while (it != vma_map.end()) { if (!it->second.IsFree()) { - return false; + it++; + continue; } + const auto& vma = it->second; virtual_addr = Common::AlignUp(vma.base, alignment); // Sometimes the alignment itself might be larger than the VMA. if (virtual_addr > vma.base + vma.size) { - return false; + it++; + continue; } + + // Make sure the address is within our defined bounds + if (virtual_addr >= max_search_address) { + // There are no free mappings within our safely usable address space. + break; + } + + // If there's enough space in the VMA, return the address. const size_t remaining_size = vma.base + vma.size - virtual_addr; - return remaining_size >= size; - }; - while (!is_suitable()) { - ++it; + if (remaining_size >= size) { + return virtual_addr; + } + it++; } - return virtual_addr; + + // Couldn't find a suitable VMA, return an error. + LOG_ERROR(Kernel_Vmm, "Couldn't find a free mapping for address {:#x}, size {:#x}", + virtual_addr, size); + return -1; } MemoryManager::VMAHandle MemoryManager::CarveVMA(VAddr virtual_addr, size_t size) { auto vma_handle = FindVMA(virtual_addr); - ASSERT_MSG(vma_handle != vma_map.end(), "Virtual address not in vm_map"); + ASSERT_MSG(vma_handle->second.Contains(virtual_addr, 0), "Virtual address not in vm_map"); const VirtualMemoryArea& vma = vma_handle->second; ASSERT_MSG(vma.base <= virtual_addr, "Adding a mapping to already mapped region"); @@ -749,7 +840,7 @@ MemoryManager::VMAHandle MemoryManager::CarveVMA(VAddr virtual_addr, size_t size MemoryManager::DMemHandle MemoryManager::CarveDmemArea(PAddr addr, size_t size) { auto dmem_handle = FindDmemArea(addr); - ASSERT_MSG(dmem_handle != dmem_map.end(), "Physical address not in dmem_map"); + ASSERT_MSG(addr <= dmem_handle->second.GetEnd(), "Physical address not in dmem_map"); const DirectMemoryArea& area = dmem_handle->second; ASSERT_MSG(area.base <= addr, "Adding an allocation to already allocated region"); @@ -804,7 +895,7 @@ int MemoryManager::GetDirectMemoryType(PAddr addr, int* directMemoryTypeOut, auto dmem_area = FindDmemArea(addr); - if (dmem_area == dmem_map.end() || dmem_area->second.is_free) { + if (addr > dmem_area->second.GetEnd() || dmem_area->second.is_free) { LOG_ERROR(Core, "Unable to find allocated direct memory region to check type!"); return ORBIS_KERNEL_ERROR_ENOENT; } diff --git a/src/core/memory.h b/src/core/memory.h index a6a55e288..3a204eb96 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -157,6 +157,12 @@ public: return impl.SystemReservedVirtualBase(); } + bool IsValidGpuMapping(VAddr virtual_addr, u64 size) { + // The PS4's GPU can only handle 40 bit addresses. + const VAddr max_gpu_address{0x10000000000}; + return virtual_addr + size < max_gpu_address; + } + bool IsValidAddress(const void* addr) const noexcept { const VAddr virtual_addr = reinterpret_cast(addr); const auto end_it = std::prev(vma_map.end()); @@ -186,7 +192,7 @@ public: int PoolCommit(VAddr virtual_addr, size_t size, MemoryProt prot); int MapMemory(void** out_addr, VAddr virtual_addr, size_t size, MemoryProt prot, - MemoryMapFlags flags, VMAType type, std::string_view name = "", + MemoryMapFlags flags, VMAType type, std::string_view name = "anon", bool is_exec = false, PAddr phys_addr = -1, u64 alignment = 0); int MapFile(void** out_addr, VAddr virtual_addr, size_t size, MemoryProt prot,