From 6206986914314ebb9318ded5c2c4d9d6bd0fe9c3 Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Sun, 11 May 2025 21:51:03 -0500 Subject: [PATCH 1/5] libkernel: Implement sceKernelMemoryPoolBatch (#2909) * Implement sceKernelMemoryPoolBatch I've tested Commit and Decommit on real hardware, haven't tested Protect or TypeProtect yet. Implementation is primarily based on our sceKernelBatchMap implementation. * Clang --- src/core/libraries/kernel/memory.cpp | 54 ++++++++++++++++++++++++++++ src/core/libraries/kernel/memory.h | 44 +++++++++++++++++++++++ 2 files changed, 98 insertions(+) diff --git a/src/core/libraries/kernel/memory.cpp b/src/core/libraries/kernel/memory.cpp index 9fcaa2439..dd0e07302 100644 --- a/src/core/libraries/kernel/memory.cpp +++ b/src/core/libraries/kernel/memory.cpp @@ -481,6 +481,59 @@ s32 PS4_SYSV_ABI sceKernelMemoryPoolDecommit(void* addr, size_t len, int flags) return memory->PoolDecommit(pool_addr, len); } +s32 PS4_SYSV_ABI sceKernelMemoryPoolBatch(const OrbisKernelMemoryPoolBatchEntry* entries, s32 count, + s32* num_processed, s32 flags) { + if (entries == nullptr) { + return ORBIS_KERNEL_ERROR_EINVAL; + } + s32 result = ORBIS_OK; + s32 processed = 0; + + for (s32 i = 0; i < count; i++, processed++) { + OrbisKernelMemoryPoolBatchEntry entry = entries[i]; + switch (entry.opcode) { + case OrbisKernelMemoryPoolOpcode::Commit: { + result = sceKernelMemoryPoolCommit(entry.commit_params.addr, entry.commit_params.len, + entry.commit_params.type, entry.commit_params.prot, + entry.flags); + break; + } + case OrbisKernelMemoryPoolOpcode::Decommit: { + result = sceKernelMemoryPoolDecommit(entry.decommit_params.addr, + entry.decommit_params.len, entry.flags); + break; + } + case OrbisKernelMemoryPoolOpcode::Protect: { + result = sceKernelMProtect(entry.protect_params.addr, entry.protect_params.len, + entry.protect_params.prot); + break; + } + case OrbisKernelMemoryPoolOpcode::TypeProtect: { + result = sceKernelMTypeProtect( + entry.type_protect_params.addr, entry.type_protect_params.len, + entry.type_protect_params.type, entry.type_protect_params.prot); + break; + } + case OrbisKernelMemoryPoolOpcode::Move: { + UNREACHABLE_MSG("Unimplemented sceKernelMemoryPoolBatch opcode Move"); + } + default: { + result = ORBIS_KERNEL_ERROR_EINVAL; + break; + } + } + + if (result != ORBIS_OK) { + break; + } + } + + if (num_processed != nullptr) { + *num_processed = processed; + } + return result; +} + int PS4_SYSV_ABI sceKernelMmap(void* addr, u64 len, int prot, int flags, int fd, size_t offset, void** res) { LOG_INFO(Kernel_Vmm, "called addr = {}, len = {}, prot = {}, flags = {}, fd = {}, offset = {}", @@ -612,6 +665,7 @@ void RegisterMemory(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("pU-QydtGcGY", "libkernel", 1, "libkernel", 1, 1, sceKernelMemoryPoolReserve); LIB_FUNCTION("Vzl66WmfLvk", "libkernel", 1, "libkernel", 1, 1, sceKernelMemoryPoolCommit); LIB_FUNCTION("LXo1tpFqJGs", "libkernel", 1, "libkernel", 1, 1, sceKernelMemoryPoolDecommit); + LIB_FUNCTION("YN878uKRBbE", "libkernel", 1, "libkernel", 1, 1, sceKernelMemoryPoolBatch); LIB_FUNCTION("BPE9s9vQQXo", "libkernel", 1, "libkernel", 1, 1, posix_mmap); LIB_FUNCTION("BPE9s9vQQXo", "libScePosix", 1, "libkernel", 1, 1, posix_mmap); diff --git a/src/core/libraries/kernel/memory.h b/src/core/libraries/kernel/memory.h index 2ca7f2931..3e2bf8de5 100644 --- a/src/core/libraries/kernel/memory.h +++ b/src/core/libraries/kernel/memory.h @@ -81,6 +81,48 @@ struct OrbisKernelBatchMapEntry { int operation; }; +enum class OrbisKernelMemoryPoolOpcode : u32 { + Commit = 1, + Decommit = 2, + Protect = 3, + TypeProtect = 4, + Move = 5, +}; + +struct OrbisKernelMemoryPoolBatchEntry { + OrbisKernelMemoryPoolOpcode opcode; + u32 flags; + union { + struct { + void* addr; + u64 len; + u8 prot; + u8 type; + } commit_params; + struct { + void* addr; + u64 len; + } decommit_params; + struct { + void* addr; + u64 len; + u8 prot; + } protect_params; + struct { + void* addr; + u64 len; + u8 prot; + u8 type; + } type_protect_params; + struct { + void* dest_addr; + void* src_addr; + u64 len; + } move_params; + uintptr_t padding[3]; + }; +}; + u64 PS4_SYSV_ABI sceKernelGetDirectMemorySize(); int PS4_SYSV_ABI sceKernelAllocateDirectMemory(s64 searchStart, s64 searchEnd, u64 len, u64 alignment, int memoryType, s64* physAddrOut); @@ -130,6 +172,8 @@ s32 PS4_SYSV_ABI sceKernelMemoryPoolReserve(void* addrIn, size_t len, size_t ali void** addrOut); s32 PS4_SYSV_ABI sceKernelMemoryPoolCommit(void* addr, size_t len, int type, int prot, int flags); s32 PS4_SYSV_ABI sceKernelMemoryPoolDecommit(void* addr, size_t len, int flags); +s32 PS4_SYSV_ABI sceKernelMemoryPoolBatch(const OrbisKernelMemoryPoolBatchEntry* entries, s32 count, + s32* num_processed, s32 flags); int PS4_SYSV_ABI sceKernelMunmap(void* addr, size_t len); From 02d3ed4973960e8a4a2ac454dc3e46d5d804a743 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sun, 11 May 2025 20:27:43 -0700 Subject: [PATCH 2/5] liverpool: Log more information on SetQueueReg. (#2912) --- src/video_core/amdgpu/liverpool.cpp | 5 +++++ src/video_core/amdgpu/pm4_cmds.h | 15 +++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 4c8e3367a..598288085 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -864,6 +864,11 @@ Liverpool::Task Liverpool::ProcessCompute(const u32* acb, u32 acb_dwords, u32 vq } break; } + case PM4ItOpcode::SetQueueReg: { + const auto* set_data = reinterpret_cast(header); + UNREACHABLE_MSG("Encountered compute SetQueueReg: vqid = {}, reg_offset = {:#x}", + set_data->vqid.Value(), set_data->reg_offset.Value()); + } case PM4ItOpcode::DispatchDirect: { const auto* dispatch_direct = reinterpret_cast(header); auto& cs_program = GetCsRegs(); diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index 6b55f5b65..cd175f6c9 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -211,6 +211,21 @@ struct PM4CmdSetData { } }; +struct PM4CmdSetQueueReg { + PM4Type3Header header; + union { + u32 raw; + BitField<0, 8, u32> reg_offset; ///< Offset in DWords from the register base address + BitField<15, 1, u32> defer_exec; ///< Defer execution + BitField<16, 10, u32> vqid; ///< Queue ID + }; + u32 data[0]; + + [[nodiscard]] u32 Size() const { + return header.count << 2u; + } +}; + struct PM4CmdNop { PM4Type3Header header; u32 data_block[0]; From 678f18ddb95773186877504b1b668e2a3c8d8785 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sun, 11 May 2025 20:27:54 -0700 Subject: [PATCH 3/5] core: Introduce host call wrapper. (#2913) --- src/core/libraries/kernel/threads.h | 2 +- src/core/libraries/libs.h | 3 ++- src/core/tls.h | 12 ++++++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/core/libraries/kernel/threads.h b/src/core/libraries/kernel/threads.h index 409136968..7b75d54bf 100644 --- a/src/core/libraries/kernel/threads.h +++ b/src/core/libraries/kernel/threads.h @@ -35,7 +35,7 @@ public: this->func = std::move(func); PthreadAttrT attr{}; posix_pthread_attr_init(&attr); - posix_pthread_create(&thread, &attr, RunWrapper, this); + posix_pthread_create(&thread, &attr, HOST_CALL(RunWrapper), this); posix_pthread_attr_destroy(&attr); } diff --git a/src/core/libraries/libs.h b/src/core/libraries/libs.h index 7e073db8e..d9c8216a5 100644 --- a/src/core/libraries/libs.h +++ b/src/core/libraries/libs.h @@ -5,6 +5,7 @@ #include "core/loader/elf.h" #include "core/loader/symbols_resolver.h" +#include "core/tls.h" #define LIB_FUNCTION(nid, lib, libversion, mod, moduleVersionMajor, moduleVersionMinor, function) \ { \ @@ -16,7 +17,7 @@ sr.module_version_major = moduleVersionMajor; \ sr.module_version_minor = moduleVersionMinor; \ sr.type = Core::Loader::SymbolType::Function; \ - auto func = reinterpret_cast(function); \ + auto func = reinterpret_cast(HOST_CALL(function)); \ sym->AddSymbol(sr, func); \ } diff --git a/src/core/tls.h b/src/core/tls.h index 46ca8153b..d1d490465 100644 --- a/src/core/tls.h +++ b/src/core/tls.h @@ -58,4 +58,16 @@ ReturnType ExecuteGuest(PS4_SYSV_ABI ReturnType (*func)(FuncArgs...), CallArgs&& return func(std::forward(args)...); } +template +struct HostCallWrapperImpl; + +template +struct HostCallWrapperImpl { + static ReturnType PS4_SYSV_ABI wrap(Args... args) { + return func(args...); + } +}; + +#define HOST_CALL(func) (Core::HostCallWrapperImpl::wrap) + } // namespace Core From 8909d9bb891a8167e33f7de0aca5eae649b4ec4d Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Mon, 12 May 2025 10:46:40 -0700 Subject: [PATCH 4/5] shader_recompiler: Always mark buffers as storage buffers. (#2914) --- src/shader_recompiler/info.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 12e48c8e4..ba28d7e43 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -62,7 +62,14 @@ struct BufferResource { } bool IsStorage(const AmdGpu::Buffer& buffer, const Profile& profile) const noexcept { - return buffer.GetSize() > profile.max_ubo_size || is_written; + // When using uniform buffers, a size is required at compilation time, so we need to + // either compile a lot of shader specializations to handle each size or just force it to + // the maximum possible size always. However, for some vendors the shader-supplied size is + // used for bounds checking uniform buffer accesses, so the latter would effectively turn + // off buffer robustness behavior. Instead, force storage buffers which are bounds checked + // using the actual buffer size. We are assuming the performance hit from this is + // acceptable. + return true; // buffer.GetSize() > profile.max_ubo_size || is_written; } [[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept; From f94c7e52b7d687e89c06a7dd2fe159a7070d4492 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Mon, 12 May 2025 10:46:53 -0700 Subject: [PATCH 5/5] kernel: Implement scePthreadGetaffinity (#2916) --- src/core/libraries/kernel/threads.h | 6 ++++ src/core/libraries/kernel/threads/pthread.cpp | 30 ++++++++++++++++--- .../libraries/kernel/threads/pthread_attr.cpp | 6 ++-- 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/src/core/libraries/kernel/threads.h b/src/core/libraries/kernel/threads.h index 7b75d54bf..bcccf1695 100644 --- a/src/core/libraries/kernel/threads.h +++ b/src/core/libraries/kernel/threads.h @@ -17,6 +17,12 @@ int PS4_SYSV_ABI posix_pthread_attr_init(PthreadAttrT* attr); int PS4_SYSV_ABI posix_pthread_attr_destroy(PthreadAttrT* attr); +int PS4_SYSV_ABI posix_pthread_attr_getaffinity_np(const PthreadAttrT* pattr, size_t cpusetsize, + Cpuset* cpusetp); + +int PS4_SYSV_ABI posix_pthread_attr_setaffinity_np(PthreadAttrT* pattr, size_t cpusetsize, + const Cpuset* cpusetp); + int PS4_SYSV_ABI posix_pthread_create(PthreadT* thread, const PthreadAttrT* attr, PthreadEntryFunc start_routine, void* arg); diff --git a/src/core/libraries/kernel/threads/pthread.cpp b/src/core/libraries/kernel/threads/pthread.cpp index a51f1f6e8..61310bfb5 100644 --- a/src/core/libraries/kernel/threads/pthread.cpp +++ b/src/core/libraries/kernel/threads/pthread.cpp @@ -6,6 +6,7 @@ #include "core/debug_state.h" #include "core/libraries/kernel/kernel.h" #include "core/libraries/kernel/posix_error.h" +#include "core/libraries/kernel/threads.h" #include "core/libraries/kernel/threads/pthread.h" #include "core/libraries/kernel/threads/thread_state.h" #include "core/libraries/libs.h" @@ -535,8 +536,6 @@ int Pthread::SetAffinity(const Cpuset* cpuset) { return POSIX_EINVAL; } - u64 mask = cpuset->bits; - uintptr_t handle = native_thr.GetHandle(); if (handle == 0) { return POSIX_ESRCH; @@ -545,6 +544,7 @@ int Pthread::SetAffinity(const Cpuset* cpuset) { // We don't use this currently because some games gets performance problems // when applying affinity even on strong hardware /* + u64 mask = cpuset->bits; #ifdef _WIN64 DWORD_PTR affinity_mask = static_cast(mask); if (!SetThreadAffinityMask(reinterpret_cast(handle), affinity_mask)) { @@ -572,13 +572,33 @@ int Pthread::SetAffinity(const Cpuset* cpuset) { return 0; } +int PS4_SYSV_ABI posix_pthread_getaffinity_np(PthreadT thread, size_t cpusetsize, Cpuset* cpusetp) { + if (thread == nullptr || cpusetp == nullptr) { + return POSIX_EINVAL; + } + auto* attr_ptr = &thread->attr; + return posix_pthread_attr_getaffinity_np(&attr_ptr, cpusetsize, cpusetp); +} + int PS4_SYSV_ABI posix_pthread_setaffinity_np(PthreadT thread, size_t cpusetsize, const Cpuset* cpusetp) { if (thread == nullptr || cpusetp == nullptr) { return POSIX_EINVAL; } - thread->attr.cpusetsize = cpusetsize; - return thread->SetAffinity(cpusetp); + auto* attr_ptr = &thread->attr; + if (const auto ret = posix_pthread_attr_setaffinity_np(&attr_ptr, cpusetsize, cpusetp)) { + return ret; + } + return thread->SetAffinity(thread->attr.cpuset); +} + +int PS4_SYSV_ABI scePthreadGetaffinity(PthreadT thread, u64* mask) { + Cpuset cpuset; + const int ret = posix_pthread_getaffinity_np(thread, sizeof(Cpuset), &cpuset); + if (ret == 0) { + *mask = cpuset.bits; + } + return ret; } int PS4_SYSV_ABI scePthreadSetaffinity(PthreadT thread, const u64 mask) { @@ -609,6 +629,7 @@ void RegisterThread(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("Z4QosVuAsA0", "libkernel", 1, "libkernel", 1, 1, posix_pthread_once); LIB_FUNCTION("EotR8a3ASf4", "libkernel", 1, "libkernel", 1, 1, posix_pthread_self); LIB_FUNCTION("OxhIB8LB-PQ", "libkernel", 1, "libkernel", 1, 1, posix_pthread_create); + LIB_FUNCTION("Jb2uGFMr688", "libkernel", 1, "libkernel", 1, 1, posix_pthread_getaffinity_np); LIB_FUNCTION("5KWrg7-ZqvE", "libkernel", 1, "libkernel", 1, 1, posix_pthread_setaffinity_np); // Orbis @@ -632,6 +653,7 @@ void RegisterThread(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("W0Hpm2X0uPE", "libkernel", 1, "libkernel", 1, 1, ORBIS(posix_pthread_setprio)); LIB_FUNCTION("rNhWz+lvOMU", "libkernel", 1, "libkernel", 1, 1, _sceKernelSetThreadDtors); LIB_FUNCTION("6XG4B33N09g", "libkernel", 1, "libkernel", 1, 1, sched_yield); + LIB_FUNCTION("rcrVFJsQWRY", "libkernel", 1, "libkernel", 1, 1, ORBIS(scePthreadGetaffinity)); LIB_FUNCTION("bt3CTBKmGyI", "libkernel", 1, "libkernel", 1, 1, ORBIS(scePthreadSetaffinity)); } diff --git a/src/core/libraries/kernel/threads/pthread_attr.cpp b/src/core/libraries/kernel/threads/pthread_attr.cpp index 02a8cb1c7..71f6438a6 100644 --- a/src/core/libraries/kernel/threads/pthread_attr.cpp +++ b/src/core/libraries/kernel/threads/pthread_attr.cpp @@ -268,13 +268,13 @@ int PS4_SYSV_ABI posix_pthread_attr_setaffinity_np(PthreadAttrT* pattr, size_t c attr->cpuset = static_cast(calloc(1, sizeof(Cpuset))); attr->cpusetsize = sizeof(Cpuset); } - memcpy(attr->cpuset, cpusetp, cpusetsize); + memcpy(attr->cpuset, cpusetp, std::min(cpusetsize, sizeof(Cpuset))); return 0; } -int PS4_SYSV_ABI scePthreadAttrGetaffinity(PthreadAttrT* param_1, u64* mask) { +int PS4_SYSV_ABI scePthreadAttrGetaffinity(PthreadAttrT* attr, u64* mask) { Cpuset cpuset; - const int ret = posix_pthread_attr_getaffinity_np(param_1, sizeof(Cpuset), &cpuset); + const int ret = posix_pthread_attr_getaffinity_np(attr, sizeof(Cpuset), &cpuset); if (ret == 0) { *mask = cpuset.bits; }