From 851995d4440b65f34b81995c638ce73b4ee92489 Mon Sep 17 00:00:00 2001 From: polybiusproxy <47796739+polybiusproxy@users.noreply.github.com> Date: Sat, 28 Dec 2024 17:33:40 +0100 Subject: [PATCH 01/53] libraries/fiber: implement context switching (#1950) --- src/core/libraries/fiber/fiber.cpp | 98 +++++++++++++++++++++++++++--- src/core/libraries/fiber/fiber.h | 2 + 2 files changed, 91 insertions(+), 9 deletions(-) diff --git a/src/core/libraries/fiber/fiber.cpp b/src/core/libraries/fiber/fiber.cpp index 7bb81b61e..6d3f546f2 100644 --- a/src/core/libraries/fiber/fiber.cpp +++ b/src/core/libraries/fiber/fiber.cpp @@ -41,6 +41,39 @@ void PS4_SYSV_ABI _sceFiberCheckStackOverflow(OrbisFiberContext* ctx) { } } +s32 PS4_SYSV_ABI _sceFiberAttachContext(OrbisFiber* fiber, void* addr_context, u64 size_context) { + if (size_context && size_context < ORBIS_FIBER_CONTEXT_MINIMUM_SIZE) { + return ORBIS_FIBER_ERROR_RANGE; + } + if (size_context & 15) { + return ORBIS_FIBER_ERROR_INVALID; + } + if (!addr_context || !size_context) { + return ORBIS_FIBER_ERROR_INVALID; + } + if (fiber->addr_context) { + return ORBIS_FIBER_ERROR_INVALID; + } + + fiber->addr_context = addr_context; + fiber->size_context = size_context; + fiber->context_start = addr_context; + fiber->context_end = reinterpret_cast(addr_context) + size_context; + + /* Apply signature to start of stack */ + *(u64*)addr_context = kFiberStackSignature; + + if (fiber->flags & FiberFlags::ContextSizeCheck) { + u64* stack_start = reinterpret_cast(fiber->context_start); + u64* stack_end = reinterpret_cast(fiber->context_end); + + u64* stack_ptr = stack_start + 1; + while (stack_ptr < stack_end) { + *stack_ptr++ = kFiberStackSizeCheck; + } + } +} + void PS4_SYSV_ABI _sceFiberSwitchToFiber(OrbisFiber* fiber, u64 arg_on_run_to, OrbisFiberContext* ctx) { OrbisFiberContext* fiber_ctx = fiber->context; @@ -62,8 +95,7 @@ void PS4_SYSV_ABI _sceFiberSwitchToFiber(OrbisFiber* fiber, u64 arg_on_run_to, data.entry = fiber->entry; data.arg_on_initialize = fiber->arg_on_initialize; data.arg_on_run_to = arg_on_run_to; - data.stack_addr = - reinterpret_cast(reinterpret_cast(fiber->addr_context) + fiber->size_context); + data.stack_addr = reinterpret_cast(fiber->addr_context) + fiber->size_context; if (fiber->flags & FiberFlags::SetFpuRegs) { data.fpucw = 0x037f; data.mxcsr = 0x9fc0; @@ -169,8 +201,7 @@ s32 PS4_SYSV_ABI sceFiberInitialize(OrbisFiber* fiber, const char* name, OrbisFi if (addr_context != nullptr) { fiber->context_start = addr_context; - fiber->context_end = - reinterpret_cast(reinterpret_cast(addr_context) + size_context); + fiber->context_end = reinterpret_cast(addr_context) + size_context; /* Apply signature to start of stack */ *(u64*)addr_context = kFiberStackSignature; @@ -221,11 +252,12 @@ s32 PS4_SYSV_ABI sceFiberFinalize(OrbisFiber* fiber) { return ORBIS_OK; } -s32 PS4_SYSV_ABI sceFiberRun(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_return) { +s32 PS4_SYSV_ABI sceFiberRunImpl(OrbisFiber* fiber, void* addr_context, u64 size_context, + u64 arg_on_run_to, u64* arg_on_return) { if (!fiber) { return ORBIS_FIBER_ERROR_NULL; } - if ((u64)fiber & 7) { + if ((u64)fiber & 7 || (u64)addr_context & 15) { return ORBIS_FIBER_ERROR_ALIGNMENT; } if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) { @@ -237,6 +269,14 @@ s32 PS4_SYSV_ABI sceFiberRun(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_r return ORBIS_FIBER_ERROR_PERMISSION; } + /* Caller wants to attach context and run. */ + if (addr_context != nullptr || size_context != 0) { + s32 res = _sceFiberAttachContext(fiber, addr_context, size_context); + if (res < 0) { + return res; + } + } + FiberState expected = FiberState::Idle; if (!fiber->state.compare_exchange_strong(expected, FiberState::Run)) { return ORBIS_FIBER_ERROR_STATE; @@ -288,11 +328,12 @@ s32 PS4_SYSV_ABI sceFiberRun(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_r return ORBIS_OK; } -s32 PS4_SYSV_ABI sceFiberSwitch(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_run) { +s32 PS4_SYSV_ABI sceFiberSwitchImpl(OrbisFiber* fiber, void* addr_context, u64 size_context, + u64 arg_on_run_to, u64* arg_on_run) { if (!fiber) { return ORBIS_FIBER_ERROR_NULL; } - if ((u64)fiber & 7) { + if ((u64)fiber & 7 || (u64)addr_context & 15) { return ORBIS_FIBER_ERROR_ALIGNMENT; } if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) { @@ -304,6 +345,14 @@ s32 PS4_SYSV_ABI sceFiberSwitch(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_o return ORBIS_FIBER_ERROR_PERMISSION; } + /* Caller wants to attach context and switch. */ + if (addr_context != nullptr || size_context != 0) { + s32 res = _sceFiberAttachContext(fiber, addr_context, size_context); + if (res < 0) { + return res; + } + } + FiberState expected = FiberState::Idle; if (!fiber->state.compare_exchange_strong(expected, FiberState::Run)) { return ORBIS_FIBER_ERROR_STATE; @@ -462,9 +511,32 @@ s32 PS4_SYSV_ABI sceFiberRename(OrbisFiber* fiber, const char* name) { return ORBIS_OK; } +s32 PS4_SYSV_ABI sceFiberGetThreadFramePointerAddress(u64* addr_frame_pointer) { + if (!addr_frame_pointer) { + return ORBIS_FIBER_ERROR_NULL; + } + + OrbisFiberContext* g_ctx = GetFiberContext(); + if (!g_ctx) { + return ORBIS_FIBER_ERROR_PERMISSION; + } + + *addr_frame_pointer = g_ctx->rbp; + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceFiberRun(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_return) { + return sceFiberRunImpl(fiber, nullptr, 0, arg_on_run_to, arg_on_return); +} + +s32 PS4_SYSV_ABI sceFiberSwitch(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_run) { + return sceFiberSwitchImpl(fiber, nullptr, 0, arg_on_run_to, arg_on_run); +} + void RegisterlibSceFiber(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("hVYD7Ou2pCQ", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberInitialize); - LIB_FUNCTION("7+OJIpko9RY", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberInitialize); + LIB_FUNCTION("7+OJIpko9RY", "libSceFiber", 1, "libSceFiber", 1, 1, + sceFiberInitialize); // _sceFiberInitializeWithInternalOptionImpl LIB_FUNCTION("asjUJJ+aa8s", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberOptParamInitialize); LIB_FUNCTION("JeNX5F-NzQU", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberFinalize); @@ -473,12 +545,20 @@ void RegisterlibSceFiber(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("p+zLIOg27zU", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberGetSelf); LIB_FUNCTION("B0ZX2hx9DMw", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberReturnToThread); + LIB_FUNCTION("avfGJ94g36Q", "libSceFiber", 1, "libSceFiber", 1, 1, + sceFiberRunImpl); // _sceFiberAttachContextAndRun + LIB_FUNCTION("ZqhZFuzKT6U", "libSceFiber", 1, "libSceFiber", 1, 1, + sceFiberSwitchImpl); // _sceFiberAttachContextAndSwitch + LIB_FUNCTION("uq2Y5BFz0PE", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberGetInfo); LIB_FUNCTION("Lcqty+QNWFc", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberStartContextSizeCheck); LIB_FUNCTION("Kj4nXMpnM8Y", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberStopContextSizeCheck); LIB_FUNCTION("JzyT91ucGDc", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberRename); + + LIB_FUNCTION("0dy4JtMUcMQ", "libSceFiber", 1, "libSceFiber", 1, 1, + sceFiberGetThreadFramePointerAddress); } } // namespace Libraries::Fiber diff --git a/src/core/libraries/fiber/fiber.h b/src/core/libraries/fiber/fiber.h index 3c4e3b70e..edcd9afe8 100644 --- a/src/core/libraries/fiber/fiber.h +++ b/src/core/libraries/fiber/fiber.h @@ -114,5 +114,7 @@ s32 PS4_SYSV_ABI sceFiberStopContextSizeCheck(void); s32 PS4_SYSV_ABI sceFiberRename(OrbisFiber* fiber, const char* name); +s32 PS4_SYSV_ABI sceFiberGetThreadFramePointerAddress(u64* addr_frame_pointer); + void RegisterlibSceFiber(Core::Loader::SymbolsResolver* sym); } // namespace Libraries::Fiber \ No newline at end of file From ee974414d28b4da47fda57024987057c928e2872 Mon Sep 17 00:00:00 2001 From: polyproxy <47796739+polybiusproxy@users.noreply.github.com> Date: Sat, 28 Dec 2024 17:43:29 +0100 Subject: [PATCH 02/53] hotfix: fix fiber initialization --- src/common/elf_info.h | 1 + src/core/libraries/fiber/fiber.cpp | 29 ++++++++++++++++++++--------- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/src/common/elf_info.h b/src/common/elf_info.h index 5a2c914e0..6eb144e9a 100644 --- a/src/common/elf_info.h +++ b/src/common/elf_info.h @@ -34,6 +34,7 @@ public: static constexpr u32 FW_20 = 0x2000000; static constexpr u32 FW_25 = 0x2500000; static constexpr u32 FW_30 = 0x3000000; + static constexpr u32 FW_35 = 0x3500000; static constexpr u32 FW_40 = 0x4000000; static constexpr u32 FW_45 = 0x4500000; static constexpr u32 FW_50 = 0x5000000; diff --git a/src/core/libraries/fiber/fiber.cpp b/src/core/libraries/fiber/fiber.cpp index 6d3f546f2..b77b5b5b6 100644 --- a/src/core/libraries/fiber/fiber.cpp +++ b/src/core/libraries/fiber/fiber.cpp @@ -3,6 +3,7 @@ #include "fiber.h" +#include "common/elf_info.h" #include "common/logging/log.h" #include "core/libraries/fiber/fiber_error.h" #include "core/libraries/libs.h" @@ -72,6 +73,8 @@ s32 PS4_SYSV_ABI _sceFiberAttachContext(OrbisFiber* fiber, void* addr_context, u *stack_ptr++ = kFiberStackSizeCheck; } } + + return ORBIS_OK; } void PS4_SYSV_ABI _sceFiberSwitchToFiber(OrbisFiber* fiber, u64 arg_on_run_to, @@ -143,9 +146,10 @@ void PS4_SYSV_ABI _sceFiberTerminate(OrbisFiber* fiber, u64 arg_on_return, Orbis __builtin_trap(); } -s32 PS4_SYSV_ABI sceFiberInitialize(OrbisFiber* fiber, const char* name, OrbisFiberEntry entry, - u64 arg_on_initialize, void* addr_context, u64 size_context, - const OrbisFiberOptParam* opt_param, u32 build_ver) { +s32 PS4_SYSV_ABI sceFiberInitializeImpl(OrbisFiber* fiber, const char* name, OrbisFiberEntry entry, + u64 arg_on_initialize, void* addr_context, u64 size_context, + const OrbisFiberOptParam* opt_param, u32 flags, + u32 build_ver) { if (!fiber || !name || !entry) { return ORBIS_FIBER_ERROR_NULL; } @@ -171,12 +175,12 @@ s32 PS4_SYSV_ABI sceFiberInitialize(OrbisFiber* fiber, const char* name, OrbisFi return ORBIS_FIBER_ERROR_INVALID; } - u32 flags = FiberFlags::None; - if (build_ver >= 0x3500000) { - flags |= FiberFlags::SetFpuRegs; + u32 user_flags = flags; + if (build_ver >= Common::ElfInfo::FW_35) { + user_flags |= FiberFlags::SetFpuRegs; } if (context_size_check) { - flags |= FiberFlags::ContextSizeCheck; + user_flags |= FiberFlags::ContextSizeCheck; } strncpy(fiber->name, name, ORBIS_FIBER_MAX_NAME_LENGTH); @@ -186,7 +190,7 @@ s32 PS4_SYSV_ABI sceFiberInitialize(OrbisFiber* fiber, const char* name, OrbisFi fiber->addr_context = addr_context; fiber->size_context = size_context; fiber->context = nullptr; - fiber->flags = flags; + fiber->flags = user_flags; /* A low stack area is problematic, as we can easily @@ -525,6 +529,13 @@ s32 PS4_SYSV_ABI sceFiberGetThreadFramePointerAddress(u64* addr_frame_pointer) { return ORBIS_OK; } +s32 PS4_SYSV_ABI sceFiberInitialize(OrbisFiber* fiber, const char* name, OrbisFiberEntry entry, + u64 arg_on_initialize, void* addr_context, u64 size_context, + const OrbisFiberOptParam* opt_param, u32 build_ver) { + return sceFiberInitializeImpl(fiber, name, entry, arg_on_initialize, addr_context, size_context, + opt_param, 0, build_ver); +} + s32 PS4_SYSV_ABI sceFiberRun(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_return) { return sceFiberRunImpl(fiber, nullptr, 0, arg_on_run_to, arg_on_return); } @@ -536,7 +547,7 @@ s32 PS4_SYSV_ABI sceFiberSwitch(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_o void RegisterlibSceFiber(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("hVYD7Ou2pCQ", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberInitialize); LIB_FUNCTION("7+OJIpko9RY", "libSceFiber", 1, "libSceFiber", 1, 1, - sceFiberInitialize); // _sceFiberInitializeWithInternalOptionImpl + sceFiberInitializeImpl); // _sceFiberInitializeWithInternalOptionImpl LIB_FUNCTION("asjUJJ+aa8s", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberOptParamInitialize); LIB_FUNCTION("JeNX5F-NzQU", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberFinalize); From ab7e794f23881c4fb704ceedcaad6133fc187a7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quang=20Ng=C3=B4?= Date: Sun, 29 Dec 2024 17:35:52 +0700 Subject: [PATCH 03/53] sdl: Limit minimum window size (#1966) --- src/sdl_window.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sdl_window.cpp b/src/sdl_window.cpp index 4b13844b8..50c3e93ee 100644 --- a/src/sdl_window.cpp +++ b/src/sdl_window.cpp @@ -92,6 +92,7 @@ WindowSDL::WindowSDL(s32 width_, s32 height_, Input::GameController* controller_ UNREACHABLE_MSG("Failed to create window handle: {}", SDL_GetError()); } + SDL_SetWindowMinimumSize(window, 640, 360); SDL_SetWindowFullscreen(window, Config::isFullscreenMode()); SDL_InitSubSystem(SDL_INIT_GAMEPAD); From 468d7ea80edd5c370e9de67f3a3f3b5b03b3ddf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quang=20Ng=C3=B4?= Date: Sun, 29 Dec 2024 17:36:16 +0700 Subject: [PATCH 04/53] config: Don't load config in the Emulator class (#1965) Allows overriding of configs in frontends. Fix set fullscreen not working when specified in the CLI. --- src/emulator.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/emulator.cpp b/src/emulator.cpp index 10d17a2db..dbe21a141 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -45,10 +45,6 @@ Frontend::WindowSDL* g_window = nullptr; namespace Core { Emulator::Emulator() { - // Read configuration file. - const auto config_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); - Config::load(config_dir / "config.toml"); - // Initialize NT API functions and set high priority #ifdef _WIN32 Common::NtApi::Initialize(); From 202c1046a139a413f6b37265c89a9ebc6c24ca97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quang=20Ng=C3=B4?= Date: Sun, 29 Dec 2024 17:36:29 +0700 Subject: [PATCH 05/53] Fix loading RenderDoc in offline mode for Linux (#1968) --- src/video_core/renderdoc.cpp | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/video_core/renderdoc.cpp b/src/video_core/renderdoc.cpp index 7e0994992..b082fd1ca 100644 --- a/src/video_core/renderdoc.cpp +++ b/src/video_core/renderdoc.cpp @@ -65,11 +65,18 @@ void LoadRenderDoc() { #else static constexpr const char RENDERDOC_LIB[] = "librenderdoc.so"; #endif - if (void* mod = dlopen(RENDERDOC_LIB, RTLD_NOW | RTLD_NOLOAD)) { - const auto RENDERDOC_GetAPI = - reinterpret_cast(dlsym(mod, "RENDERDOC_GetAPI")); - const s32 ret = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_6_0, (void**)&rdoc_api); - ASSERT(ret == 1); + // Check if we are running by RDoc GUI + void* mod = dlopen(RENDERDOC_LIB, RTLD_NOW | RTLD_NOLOAD); + if (!mod && Config::isRdocEnabled()) { + // If enabled in config, try to load RDoc runtime in offline mode + if ((mod = dlopen(RENDERDOC_LIB, RTLD_NOW))) { + const auto RENDERDOC_GetAPI = + reinterpret_cast(dlsym(mod, "RENDERDOC_GetAPI")); + const s32 ret = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_6_0, (void**)&rdoc_api); + ASSERT(ret == 1); + } else { + LOG_ERROR(Render, "Cannot load RenderDoc: {}", dlerror()); + } } #endif if (rdoc_api) { From da9e45b5828a9a7c54b10b6d41e346cde0e2d535 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sun, 29 Dec 2024 02:36:41 -0800 Subject: [PATCH 06/53] build: Update MoltenVK and fix missing add_dependencies for copy. (#1970) * build: Fix missing add_dependencies for MoltenVK copy target. * externals: Update MoltenVK --- CMakeLists.txt | 1 + externals/MoltenVK/MoltenVK | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cd3894719..d63bd1951 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -917,6 +917,7 @@ if (APPLE) DEPENDS ${MVK_DYLIB_SRC} COMMAND cmake -E copy ${MVK_DYLIB_SRC} ${MVK_DYLIB_DST}) add_custom_target(CopyMoltenVK DEPENDS ${MVK_DYLIB_DST}) + add_dependencies(CopyMoltenVK MoltenVK) add_dependencies(shadps4 CopyMoltenVK) set_property(TARGET shadps4 APPEND PROPERTY BUILD_RPATH "@executable_path/../Frameworks") else() diff --git a/externals/MoltenVK/MoltenVK b/externals/MoltenVK/MoltenVK index 5ad3ee5d2..9f0b616d9 160000 --- a/externals/MoltenVK/MoltenVK +++ b/externals/MoltenVK/MoltenVK @@ -1 +1 @@ -Subproject commit 5ad3ee5d2f84342950c3fe93dec97719574d1932 +Subproject commit 9f0b616d9e2c39464d2a859b79dbc655c4a30e7e From 62c47cb1b74c22812c566e7a2aeb2968e7fcb999 Mon Sep 17 00:00:00 2001 From: baggins183 Date: Sun, 29 Dec 2024 02:37:15 -0800 Subject: [PATCH 07/53] recompiler: handle reads of output variables in hull shaders (#1962) * Handle output control point reads in hull shader. Might need additional barriers * output storage class --- .../spirv/emit_spirv_context_get_set.cpp | 15 +++++----- .../backend/spirv/emit_spirv_instructions.h | 2 ++ .../frontend/translate/vector_memory.cpp | 4 --- src/shader_recompiler/ir/ir_emitter.cpp | 6 ++++ src/shader_recompiler/ir/ir_emitter.h | 3 ++ src/shader_recompiler/ir/opcodes.inc | 2 ++ .../ir/passes/hull_shader_transform.cpp | 28 ++++++++++++------- 7 files changed, 38 insertions(+), 22 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index f3db6af56..4550440bb 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -217,14 +217,6 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) { const auto pointer{ ctx.OpAccessChain(component_ptr, ctx.tess_coord, ctx.ConstU32(component))}; return ctx.OpLoad(ctx.F32[1], pointer); - } else if (IR::IsParam(attr)) { - const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)}; - const auto param = ctx.input_params.at(param_id).id; - const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); - const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)}; - const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); - return ctx.OpLoad(ctx.F32[1], - ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); } UNREACHABLE(); } @@ -351,6 +343,13 @@ Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, vertex_index, attr_index, comp_index)); } +Id EmitReadTcsGenericOuputAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, + Id comp_index) { + const auto attr_comp_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]); + return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(attr_comp_ptr, ctx.output_attr_array, + vertex_index, attr_index, comp_index)); +} + void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index) { // Implied vertex index is invocation_id const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 85bed589b..d26cf6662 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -89,6 +89,8 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp); Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index); void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index); +Id EmitReadTcsGenericOuputAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, + Id comp_index); Id EmitGetPatch(EmitContext& ctx, IR::Patch patch); void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value); void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 7c3db9551..79d46cd42 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -255,10 +255,6 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst "Non immediate offset not supported"); } - if (info.stage == Stage::Hull) { - // printf("here\n"); // break - } - IR::Value address = [&] -> IR::Value { if (is_ring) { return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset); diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index c9d97679f..20e6eae0b 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -288,6 +288,12 @@ void IREmitter::SetTcsGenericAttribute(const F32& value, const U32& attr_index, Inst(Opcode::SetTcsGenericAttribute, value, attr_index, comp_index); } +F32 IREmitter::ReadTcsGenericOuputAttribute(const U32& vertex_index, const U32& attr_index, + const U32& comp_index) { + return Inst(IR::Opcode::ReadTcsGenericOuputAttribute, vertex_index, attr_index, + comp_index); +} + F32 IREmitter::GetPatch(Patch patch) { return Inst(Opcode::GetPatch, patch); } diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 4679a0133..f65baee2a 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -90,6 +90,9 @@ public: const U32& comp_index); void SetTcsGenericAttribute(const F32& value, const U32& attr_index, const U32& comp_index); + [[nodiscard]] F32 ReadTcsGenericOuputAttribute(const U32& vertex_index, const U32& attr_index, + const U32& comp_index); + [[nodiscard]] F32 GetPatch(Patch patch); void SetPatch(Patch patch, const F32& value); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index cf2c3b67e..1194c3792 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -64,6 +64,8 @@ OPCODE(GetPatch, F32, Patc OPCODE(SetPatch, Void, Patch, F32, ) OPCODE(GetTessGenericAttribute, F32, U32, U32, U32, ) OPCODE(SetTcsGenericAttribute, Void, F32, U32, U32, ) +OPCODE(ReadTcsGenericOuputAttribute, F32, U32, U32, U32, ) + // Flags OPCODE(GetScc, U1, Void, ) diff --git a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp index 895c9823e..6164fec12 100644 --- a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp +++ b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp @@ -343,8 +343,8 @@ static IR::U32 TryOptimizeAddressModulo(IR::U32 addr, u32 stride, IR::IREmitter& // TODO: can optimize div in control point index similarly to mod // Read a TCS input (InputCP region) or TES input (OutputCP region) -static IR::F32 ReadTessInputComponent(IR::U32 addr, const u32 stride, IR::IREmitter& ir, - u32 off_dw) { +static IR::F32 ReadTessControlPointAttribute(IR::U32 addr, const u32 stride, IR::IREmitter& ir, + u32 off_dw, bool is_output_read_in_tcs) { if (off_dw > 0) { addr = ir.IAdd(addr, ir.Imm32(off_dw)); } @@ -354,7 +354,11 @@ static IR::F32 ReadTessInputComponent(IR::U32 addr, const u32 stride, IR::IREmit ir.ShiftRightLogical(ir.IMod(addr_for_attrs, ir.Imm32(stride)), ir.Imm32(4u)); const IR::U32 comp_index = ir.ShiftRightLogical(ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u)); - return ir.GetTessGenericAttribute(control_point_index, attr_index, comp_index); + if (is_output_read_in_tcs) { + return ir.ReadTcsGenericOuputAttribute(control_point_index, attr_index, comp_index); + } else { + return ir.GetTessGenericAttribute(control_point_index, attr_index, comp_index); + } } } // namespace @@ -481,21 +485,25 @@ void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) { case IR::Opcode::LoadSharedU128: IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)}; const IR::U32 addr{inst.Arg(0)}; - AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info); + const AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info); const u32 num_dwords = opcode == IR::Opcode::LoadSharedU32 ? 1 : (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4); - ASSERT_MSG(region == AttributeRegion::InputCP, - "Unhandled read of output or patchconst attribute in hull shader"); + ASSERT_MSG(region == AttributeRegion::InputCP || + region == AttributeRegion::OutputCP, + "Unhandled read of patchconst attribute in hull shader"); + const bool is_tcs_output_read = region == AttributeRegion::OutputCP; + const u32 stride = is_tcs_output_read ? runtime_info.hs_info.hs_output_cp_stride + : runtime_info.hs_info.ls_stride; IR::Value attr_read; if (num_dwords == 1) { attr_read = ir.BitCast( - ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, 0)); + ReadTessControlPointAttribute(addr, stride, ir, 0, is_tcs_output_read)); } else { boost::container::static_vector read_components; for (auto i = 0; i < num_dwords; i++) { const IR::F32 component = - ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, i); + ReadTessControlPointAttribute(addr, stride, ir, i, is_tcs_output_read); read_components.push_back(ir.BitCast(component)); } attr_read = ir.CompositeConstruct(read_components); @@ -565,8 +573,8 @@ void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) { : (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4); const auto GetInput = [&](IR::U32 addr, u32 off_dw) -> IR::F32 { if (region == AttributeRegion::OutputCP) { - return ReadTessInputComponent( - addr, runtime_info.vs_info.hs_output_cp_stride, ir, off_dw); + return ReadTessControlPointAttribute( + addr, runtime_info.vs_info.hs_output_cp_stride, ir, off_dw, false); } else { ASSERT(region == AttributeRegion::PatchConst); return ir.GetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw)); From 248220fef333557f6a59450072da9555f68e9109 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sun, 29 Dec 2024 02:37:37 -0800 Subject: [PATCH 08/53] pthread: Change minimum stack for HLE to additional stack. (#1960) --- src/core/libraries/kernel/threads/pthread.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/core/libraries/kernel/threads/pthread.cpp b/src/core/libraries/kernel/threads/pthread.cpp index 761d13346..e81207a0d 100644 --- a/src/core/libraries/kernel/threads/pthread.cpp +++ b/src/core/libraries/kernel/threads/pthread.cpp @@ -244,10 +244,9 @@ int PS4_SYSV_ABI posix_pthread_create_name_np(PthreadT* thread, const PthreadAtt new_thread->tid = ++TidCounter; if (new_thread->attr.stackaddr_attr == 0) { - /* Enforce minimum stack size of 128 KB */ - static constexpr size_t MinimumStack = 128_KB; - auto& stacksize = new_thread->attr.stacksize_attr; - stacksize = std::max(stacksize, MinimumStack); + /* Add additional stack space for HLE */ + static constexpr size_t AdditionalStack = 128_KB; + new_thread->attr.stacksize_attr += AdditionalStack; } if (thread_state->CreateStack(&new_thread->attr) != 0) { From 4b2db61120b893840baeedf1e03500e4d32b1699 Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Sun, 29 Dec 2024 12:45:18 +0200 Subject: [PATCH 09/53] Update issue templates --- .github/ISSUE_TEMPLATE/bug_report.md | 30 ++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 000000000..02017dddf --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,30 @@ +--- +name: Bug report +about: Report a bug in the emulator +title: '' +labels: '' +assignees: '' + +--- + +Checklist: +[ ] I have searched for a similar issue in this repository and did not find one. +[ ] I have asked for support on shadPS4 discord server. +[ ] I am using an official build obtained from [releases](https://github.com/shadps4-emu/shadPS4/releases) or updated it using its in-app updater. +[ ] I have re-dumped the game and performed a clean install without mods. +[ ] I have disabled all patches and cheats. + +Description: + + +Steps To Reproduce: + + +Logs: + + +System Information: + + +Additional Information: + From f8177902a5e4a2f99bd166b9d9f85f468dfce7d5 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sun, 29 Dec 2024 02:46:59 -0800 Subject: [PATCH 10/53] cubeb_audio: Make sure COM is initialized on Windows. (#1958) --- src/core/libraries/audio/audioout_backend.h | 3 +++ src/core/libraries/audio/cubeb_audio.cpp | 23 +++++++++++++++------ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/core/libraries/audio/audioout_backend.h b/src/core/libraries/audio/audioout_backend.h index ecc4cf7c6..f423d4963 100644 --- a/src/core/libraries/audio/audioout_backend.h +++ b/src/core/libraries/audio/audioout_backend.h @@ -34,6 +34,9 @@ public: private: cubeb* ctx = nullptr; +#ifdef _WIN32 + bool owns_com = false; +#endif }; class SDLAudioOut final : public AudioOutBackend { diff --git a/src/core/libraries/audio/cubeb_audio.cpp b/src/core/libraries/audio/cubeb_audio.cpp index e1195558a..4127931b7 100644 --- a/src/core/libraries/audio/cubeb_audio.cpp +++ b/src/core/libraries/audio/cubeb_audio.cpp @@ -10,9 +10,11 @@ #include "core/libraries/audio/audioout.h" #include "core/libraries/audio/audioout_backend.h" -namespace Libraries::AudioOut { +#ifdef _WIN32 +#include +#endif -constexpr int AUDIO_STREAM_BUFFER_THRESHOLD = 65536; // Define constant for buffer threshold +namespace Libraries::AudioOut { class CubebPortBackend : public PortBackend { public: @@ -143,17 +145,26 @@ private: }; CubebAudioOut::CubebAudioOut() { +#ifdef _WIN32 + // Need to initialize COM for this thread on Windows, in case WASAPI backend is used. + owns_com = CoInitializeEx(nullptr, COINIT_MULTITHREADED) == S_OK; +#endif if (const auto ret = cubeb_init(&ctx, "shadPS4", nullptr); ret != CUBEB_OK) { LOG_CRITICAL(Lib_AudioOut, "Failed to create cubeb context: {}", ret); } } CubebAudioOut::~CubebAudioOut() { - if (!ctx) { - return; + if (ctx) { + cubeb_destroy(ctx); + ctx = nullptr; } - cubeb_destroy(ctx); - ctx = nullptr; +#ifdef _WIN32 + if (owns_com) { + CoUninitialize(); + owns_com = false; + } +#endif } std::unique_ptr CubebAudioOut::Open(PortOut& port) { From e952013fe06b6d040d3be091e3c7e50e3456770f Mon Sep 17 00:00:00 2001 From: Mahmoud Adel <94652220+AboMedoz@users.noreply.github.com> Date: Sun, 29 Dec 2024 12:47:15 +0200 Subject: [PATCH 11/53] add EventWrite and DispatchIndirect to ProcessCompute (#1948) * add EventWrite and DispatchIndirect to ProcessCompute helps Alienation go Ingame * apply review changes Co-authored-by: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> --------- Co-authored-by: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> --- src/video_core/amdgpu/liverpool.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 5dd3edd6d..43adff8d2 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -821,6 +821,24 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, u32 vqid) { } break; } + case PM4ItOpcode::DispatchIndirect: { + const auto* dispatch_indirect = reinterpret_cast(header); + auto& cs_program = GetCsRegs(); + const auto offset = dispatch_indirect->data_offset; + const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr; + const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions); + if (DebugState.DumpingCurrentReg()) { + DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast(header), + cs_program); + } + if (rasterizer && (cs_program.dispatch_initiator & 1)) { + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin(fmt::format("acb:{}:DispatchIndirect", cmd_address)); + rasterizer->DispatchIndirect(ib_address, offset, size); + rasterizer->ScopeMarkerEnd(); + } + break; + } case PM4ItOpcode::WriteData: { const auto* write_data = reinterpret_cast(header); ASSERT(write_data->dst_sel.Value() == 2 || write_data->dst_sel.Value() == 5); @@ -845,6 +863,10 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, u32 vqid) { release_mem->SignalFence(static_cast(queue.pipe_id)); break; } + case PM4ItOpcode::EventWrite: { + // const auto* event = reinterpret_cast(header); + break; + } default: UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}", static_cast(opcode), count); From f09a95453ee875e0a6492343f7a27ba91525ab23 Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Sun, 29 Dec 2024 12:48:45 +0200 Subject: [PATCH 12/53] hot-fix: Correct queue id in dispatch indirect I missed this --- src/video_core/amdgpu/liverpool.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 43adff8d2..2926bcc69 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -825,7 +825,7 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, u32 vqid) { const auto* dispatch_indirect = reinterpret_cast(header); auto& cs_program = GetCsRegs(); const auto offset = dispatch_indirect->data_offset; - const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr; + const auto ib_address = mapped_queues[vqid].indirect_args_addr; const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions); if (DebugState.DumpingCurrentReg()) { DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast(header), @@ -833,7 +833,7 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, u32 vqid) { } if (rasterizer && (cs_program.dispatch_initiator & 1)) { const auto cmd_address = reinterpret_cast(header); - rasterizer->ScopeMarkerBegin(fmt::format("acb:{}:DispatchIndirect", cmd_address)); + rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:Dispatch", vqid, cmd_address)); rasterizer->DispatchIndirect(ib_address, offset, size); rasterizer->ScopeMarkerEnd(); } From ee72d99947382dac8f4fbd56f1915a3eb6b65cde Mon Sep 17 00:00:00 2001 From: Vladislav Mikhalin Date: Sun, 29 Dec 2024 13:53:06 +0300 Subject: [PATCH 13/53] ajm: added stubbed statistics instance (#1924) * ajm: added stubbed statistics instance * fixed a typo, thanks poly * fixed clang-format * removed unused struct * small fixes * fixed typedefs, added per codec statistics --- CMakeLists.txt | 2 + src/core/libraries/ajm/ajm.cpp | 10 +- src/core/libraries/ajm/ajm.h | 49 ++++++- src/core/libraries/ajm/ajm_batch.cpp | 129 ++++++++++++++---- src/core/libraries/ajm/ajm_batch.h | 4 + src/core/libraries/ajm/ajm_context.cpp | 21 +-- src/core/libraries/ajm/ajm_instance.cpp | 4 +- .../libraries/ajm/ajm_instance_statistics.cpp | 37 +++++ .../libraries/ajm/ajm_instance_statistics.h | 17 +++ 9 files changed, 231 insertions(+), 42 deletions(-) create mode 100644 src/core/libraries/ajm/ajm_instance_statistics.cpp create mode 100644 src/core/libraries/ajm/ajm_instance_statistics.h diff --git a/CMakeLists.txt b/CMakeLists.txt index d63bd1951..af811e9fb 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -189,6 +189,8 @@ set(AJM_LIB src/core/libraries/ajm/ajm.cpp src/core/libraries/ajm/ajm_context.cpp src/core/libraries/ajm/ajm_context.h src/core/libraries/ajm/ajm_error.h + src/core/libraries/ajm/ajm_instance_statistics.cpp + src/core/libraries/ajm/ajm_instance_statistics.h src/core/libraries/ajm/ajm_instance.cpp src/core/libraries/ajm/ajm_instance.h src/core/libraries/ajm/ajm_mp3.cpp diff --git a/src/core/libraries/ajm/ajm.cpp b/src/core/libraries/ajm/ajm.cpp index 3184fa64f..5c55d2c06 100644 --- a/src/core/libraries/ajm/ajm.cpp +++ b/src/core/libraries/ajm/ajm.cpp @@ -183,13 +183,15 @@ int PS4_SYSV_ABI sceAjmInstanceSwitch() { return ORBIS_OK; } -int PS4_SYSV_ABI sceAjmMemoryRegister() { - LOG_ERROR(Lib_Ajm, "(STUBBED) called"); +int PS4_SYSV_ABI sceAjmMemoryRegister(u32 context_id, void* ptr, size_t num_pages) { + // All memory is already shared with our implementation since we do not use any hardware. + LOG_TRACE(Lib_Ajm, "(STUBBED) called"); return ORBIS_OK; } -int PS4_SYSV_ABI sceAjmMemoryUnregister() { - LOG_ERROR(Lib_Ajm, "(STUBBED) called"); +int PS4_SYSV_ABI sceAjmMemoryUnregister(u32 context_id, void* ptr) { + // All memory is already shared with our implementation since we do not use any hardware. + LOG_TRACE(Lib_Ajm, "(STUBBED) called"); return ORBIS_OK; } diff --git a/src/core/libraries/ajm/ajm.h b/src/core/libraries/ajm/ajm.h index 1ac7c7629..34aeb9aa4 100644 --- a/src/core/libraries/ajm/ajm.h +++ b/src/core/libraries/ajm/ajm.h @@ -74,6 +74,26 @@ union AjmJobFlags { }; }; +enum class AjmStatisticsFlags : u64 { + Memory = 1 << 0, + EnginePerCodec = 1 << 15, + Engine = 1 << 16, +}; +DECLARE_ENUM_FLAG_OPERATORS(AjmStatisticsFlags) + +union AjmStatisticsJobFlags { + AjmStatisticsJobFlags(AjmJobFlags job_flags) : raw(job_flags.raw) {} + + u64 raw; + struct { + u64 version : 3; + u64 : 12; + AjmStatisticsFlags statistics_flags : 17; + u64 : 32; + }; +}; +static_assert(sizeof(AjmStatisticsJobFlags) == 8); + struct AjmSidebandResult { s32 result; s32 internal_result; @@ -126,6 +146,31 @@ union AjmSidebandInitParameters { u8 reserved[8]; }; +struct AjmSidebandStatisticsEngine { + float usage_batch; + float usage_interval[3]; +}; + +struct AjmSidebandStatisticsEnginePerCodec { + u8 codec_count; + u8 codec_id[3]; + float codec_percentage[3]; +}; + +struct AjmSidebandStatisticsMemory { + u32 instance_free; + u32 buffer_free; + u32 batch_size; + u32 input_size; + u32 output_size; + u32 small_size; +}; + +struct AjmSidebandStatisticsEngineParameters { + u32 interval_count; + float interval[3]; +}; + union AjmInstanceFlags { u64 raw; struct { @@ -178,8 +223,8 @@ int PS4_SYSV_ABI sceAjmInstanceCreate(u32 context, AjmCodecType codec_type, AjmI int PS4_SYSV_ABI sceAjmInstanceDestroy(u32 context, u32 instance); int PS4_SYSV_ABI sceAjmInstanceExtend(); int PS4_SYSV_ABI sceAjmInstanceSwitch(); -int PS4_SYSV_ABI sceAjmMemoryRegister(); -int PS4_SYSV_ABI sceAjmMemoryUnregister(); +int PS4_SYSV_ABI sceAjmMemoryRegister(u32 context_id, void* ptr, size_t num_pages); +int PS4_SYSV_ABI sceAjmMemoryUnregister(u32 context_id, void* ptr); int PS4_SYSV_ABI sceAjmModuleRegister(u32 context, AjmCodecType codec_type, s64 reserved); int PS4_SYSV_ABI sceAjmModuleUnregister(); int PS4_SYSV_ABI sceAjmStrError(); diff --git a/src/core/libraries/ajm/ajm_batch.cpp b/src/core/libraries/ajm/ajm_batch.cpp index b1cec88b3..30e1deb71 100644 --- a/src/core/libraries/ajm/ajm_batch.cpp +++ b/src/core/libraries/ajm/ajm_batch.cpp @@ -54,6 +54,8 @@ public: : m_p_begin(begin), m_p_current(m_p_begin), m_size(size) {} AjmBatchBuffer(std::span data) : m_p_begin(data.data()), m_p_current(m_p_begin), m_size(data.size()) {} + AjmBatchBuffer(AjmChunkBuffer& buffer) + : AjmBatchBuffer(reinterpret_cast(buffer.p_address), buffer.size) {} AjmBatchBuffer SubBuffer(size_t size = s_dynamic_extent) { auto current = m_p_current; @@ -113,6 +115,88 @@ private: size_t m_size{}; }; +AjmJob AjmStatisticsJobFromBatchBuffer(u32 instance_id, AjmBatchBuffer batch_buffer) { + std::optional job_flags = {}; + std::optional input_control_buffer = {}; + std::optional output_control_buffer = {}; + + AjmJob job; + job.instance_id = instance_id; + + while (!batch_buffer.IsEmpty()) { + auto& header = batch_buffer.Peek(); + switch (header.ident) { + case Identifier::AjmIdentInputControlBuf: { + ASSERT_MSG(!input_control_buffer.has_value(), + "Only one instance of input control buffer is allowed per job"); + const auto& buffer = batch_buffer.Consume(); + if (buffer.p_address != nullptr && buffer.size != 0) { + input_control_buffer = buffer; + } + break; + } + case Identifier::AjmIdentControlFlags: { + ASSERT_MSG(!job_flags.has_value(), "Only one instance of job flags is allowed per job"); + auto& chunk = batch_buffer.Consume(); + job_flags = AjmJobFlags{ + .raw = (u64(chunk.header.payload) << 32) + chunk.flags_low, + }; + break; + } + case Identifier::AjmIdentReturnAddressBuf: { + // Ignore return address buffers. + batch_buffer.Skip(); + break; + } + case Identifier::AjmIdentOutputControlBuf: { + ASSERT_MSG(!output_control_buffer.has_value(), + "Only one instance of output control buffer is allowed per job"); + const auto& buffer = batch_buffer.Consume(); + if (buffer.p_address != nullptr && buffer.size != 0) { + output_control_buffer = buffer; + } + break; + } + default: + UNREACHABLE_MSG("Unknown chunk: {}", header.ident); + } + } + + ASSERT(job_flags.has_value()); + job.flags = job_flags.value(); + + AjmStatisticsJobFlags flags(job.flags); + if (input_control_buffer.has_value()) { + AjmBatchBuffer input_batch(input_control_buffer.value()); + if (True(flags.statistics_flags & AjmStatisticsFlags::Engine)) { + job.input.statistics_engine_parameters = + input_batch.Consume(); + } + } + + if (output_control_buffer.has_value()) { + AjmBatchBuffer output_batch(output_control_buffer.value()); + job.output.p_result = &output_batch.Consume(); + *job.output.p_result = AjmSidebandResult{}; + + if (True(flags.statistics_flags & AjmStatisticsFlags::Engine)) { + job.output.p_engine = &output_batch.Consume(); + *job.output.p_engine = AjmSidebandStatisticsEngine{}; + } + if (True(flags.statistics_flags & AjmStatisticsFlags::EnginePerCodec)) { + job.output.p_engine_per_codec = + &output_batch.Consume(); + *job.output.p_engine_per_codec = AjmSidebandStatisticsEnginePerCodec{}; + } + if (True(flags.statistics_flags & AjmStatisticsFlags::Memory)) { + job.output.p_memory = &output_batch.Consume(); + *job.output.p_memory = AjmSidebandStatisticsMemory{}; + } + } + + return job; +} + AjmJob AjmJobFromBatchBuffer(u32 instance_id, AjmBatchBuffer batch_buffer) { std::optional job_flags = {}; std::optional input_control_buffer = {}; @@ -155,15 +239,6 @@ AjmJob AjmJobFromBatchBuffer(u32 instance_id, AjmBatchBuffer batch_buffer) { batch_buffer.Skip(); break; } - case Identifier::AjmIdentInlineBuf: { - ASSERT_MSG(!output_control_buffer.has_value(), - "Only one instance of inline buffer is allowed per job"); - const auto& buffer = batch_buffer.Consume(); - if (buffer.p_address != nullptr && buffer.size != 0) { - inline_buffer = buffer; - } - break; - } case Identifier::AjmIdentOutputRunBuf: { auto& buffer = batch_buffer.Consume(); u8* p_begin = reinterpret_cast(buffer.p_address); @@ -186,13 +261,12 @@ AjmJob AjmJobFromBatchBuffer(u32 instance_id, AjmBatchBuffer batch_buffer) { } } + ASSERT(job_flags.has_value()); job.flags = job_flags.value(); // Initialize sideband input parameters if (input_control_buffer.has_value()) { - AjmBatchBuffer input_batch(reinterpret_cast(input_control_buffer->p_address), - input_control_buffer->size); - + AjmBatchBuffer input_batch(input_control_buffer.value()); const auto sideband_flags = job_flags->sideband_flags; if (True(sideband_flags & AjmJobSidebandFlags::Format) && !input_batch.IsEmpty()) { job.input.format = input_batch.Consume(); @@ -202,6 +276,9 @@ AjmJob AjmJobFromBatchBuffer(u32 instance_id, AjmBatchBuffer batch_buffer) { } const auto control_flags = job_flags.value().control_flags; + if (True(control_flags & AjmJobControlFlags::Resample)) { + job.input.resample_parameters = input_batch.Consume(); + } if (True(control_flags & AjmJobControlFlags::Initialize)) { job.input.init_params = AjmDecAt9InitializeParameters{}; std::memcpy(&job.input.init_params.value(), input_batch.GetCurrent(), @@ -209,21 +286,9 @@ AjmJob AjmJobFromBatchBuffer(u32 instance_id, AjmBatchBuffer batch_buffer) { } } - if (inline_buffer.has_value()) { - AjmBatchBuffer inline_batch(reinterpret_cast(inline_buffer->p_address), - inline_buffer->size); - - const auto control_flags = job_flags.value().control_flags; - if (True(control_flags & AjmJobControlFlags::Resample)) { - job.input.resample_parameters = inline_batch.Consume(); - } - } - // Initialize sideband output parameters if (output_control_buffer.has_value()) { - AjmBatchBuffer output_batch(reinterpret_cast(output_control_buffer->p_address), - output_control_buffer->size); - + AjmBatchBuffer output_batch(output_control_buffer.value()); job.output.p_result = &output_batch.Consume(); *job.output.p_result = AjmSidebandResult{}; @@ -260,9 +325,21 @@ std::shared_ptr AjmBatch::FromBatchBuffer(std::span data) { AjmBatchBuffer buffer(data); while (!buffer.IsEmpty()) { auto& job_chunk = buffer.Consume(); + if (job_chunk.header.ident == AjmIdentInlineBuf) { + // Inline buffers are used to store sideband input data. + // We should just skip them as they do not require any special handling. + buffer.Advance(job_chunk.size); + continue; + } ASSERT(job_chunk.header.ident == AjmIdentJob); auto instance_id = job_chunk.header.payload; - batch->jobs.push_back(AjmJobFromBatchBuffer(instance_id, buffer.SubBuffer(job_chunk.size))); + if (instance_id == AJM_INSTANCE_STATISTICS) { + batch->jobs.push_back( + AjmStatisticsJobFromBatchBuffer(instance_id, buffer.SubBuffer(job_chunk.size))); + } else { + batch->jobs.push_back( + AjmJobFromBatchBuffer(instance_id, buffer.SubBuffer(job_chunk.size))); + } } return batch; diff --git a/src/core/libraries/ajm/ajm_batch.h b/src/core/libraries/ajm/ajm_batch.h index 3c586b773..09daa630d 100644 --- a/src/core/libraries/ajm/ajm_batch.h +++ b/src/core/libraries/ajm/ajm_batch.h @@ -23,6 +23,7 @@ struct AjmJob { struct Input { std::optional init_params; std::optional resample_parameters; + std::optional statistics_engine_parameters; std::optional format; std::optional gapless_decode; std::vector buffer; @@ -33,6 +34,9 @@ struct AjmJob { AjmSidebandResult* p_result = nullptr; AjmSidebandStream* p_stream = nullptr; AjmSidebandFormat* p_format = nullptr; + AjmSidebandStatisticsMemory* p_memory = nullptr; + AjmSidebandStatisticsEnginePerCodec* p_engine_per_codec = nullptr; + AjmSidebandStatisticsEngine* p_engine = nullptr; AjmSidebandGaplessDecode* p_gapless_decode = nullptr; AjmSidebandMFrame* p_mframe = nullptr; u8* p_codec_info = nullptr; diff --git a/src/core/libraries/ajm/ajm_context.cpp b/src/core/libraries/ajm/ajm_context.cpp index 09255110c..8992dd83b 100644 --- a/src/core/libraries/ajm/ajm_context.cpp +++ b/src/core/libraries/ajm/ajm_context.cpp @@ -9,6 +9,7 @@ #include "core/libraries/ajm/ajm_context.h" #include "core/libraries/ajm/ajm_error.h" #include "core/libraries/ajm/ajm_instance.h" +#include "core/libraries/ajm/ajm_instance_statistics.h" #include "core/libraries/ajm/ajm_mp3.h" #include "core/libraries/error_codes.h" @@ -70,15 +71,19 @@ void AjmContext::ProcessBatch(u32 id, std::span jobs) { LOG_TRACE(Lib_Ajm, "Processing job {} for instance {}. flags = {:#x}", id, job.instance_id, job.flags.raw); - std::shared_ptr instance; - { - std::shared_lock lock(instances_mutex); - auto* p_instance = instances.Get(job.instance_id); - ASSERT_MSG(p_instance != nullptr, "Attempting to execute job on null instance"); - instance = *p_instance; - } + if (job.instance_id == AJM_INSTANCE_STATISTICS) { + AjmInstanceStatistics::Getinstance().ExecuteJob(job); + } else { + std::shared_ptr instance; + { + std::shared_lock lock(instances_mutex); + auto* p_instance = instances.Get(job.instance_id); + ASSERT_MSG(p_instance != nullptr, "Attempting to execute job on null instance"); + instance = *p_instance; + } - instance->ExecuteJob(job); + instance->ExecuteJob(job); + } } } diff --git a/src/core/libraries/ajm/ajm_instance.cpp b/src/core/libraries/ajm/ajm_instance.cpp index ea7fd5617..8af105c77 100644 --- a/src/core/libraries/ajm/ajm_instance.cpp +++ b/src/core/libraries/ajm/ajm_instance.cpp @@ -68,11 +68,11 @@ void AjmInstance::ExecuteJob(AjmJob& job) { m_codec->Initialize(¶ms, sizeof(params)); } if (job.input.resample_parameters.has_value()) { - UNREACHABLE_MSG("Unimplemented: resample parameters"); + LOG_ERROR(Lib_Ajm, "Unimplemented: resample parameters"); m_resample_parameters = job.input.resample_parameters.value(); } if (job.input.format.has_value()) { - UNREACHABLE_MSG("Unimplemented: format parameters"); + LOG_ERROR(Lib_Ajm, "Unimplemented: format parameters"); m_format = job.input.format.value(); } if (job.input.gapless_decode.has_value()) { diff --git a/src/core/libraries/ajm/ajm_instance_statistics.cpp b/src/core/libraries/ajm/ajm_instance_statistics.cpp new file mode 100644 index 000000000..c0c1af8bb --- /dev/null +++ b/src/core/libraries/ajm/ajm_instance_statistics.cpp @@ -0,0 +1,37 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "core/libraries/ajm/ajm.h" +#include "core/libraries/ajm/ajm_instance_statistics.h" + +namespace Libraries::Ajm { + +void AjmInstanceStatistics::ExecuteJob(AjmJob& job) { + if (job.output.p_engine) { + job.output.p_engine->usage_batch = 0.01; + const auto ic = job.input.statistics_engine_parameters->interval_count; + for (u32 idx = 0; idx < ic; ++idx) { + job.output.p_engine->usage_interval[idx] = 0.01; + } + } + if (job.output.p_engine_per_codec) { + job.output.p_engine_per_codec->codec_count = 1; + job.output.p_engine_per_codec->codec_id[0] = static_cast(AjmCodecType::At9Dec); + job.output.p_engine_per_codec->codec_percentage[0] = 0.01; + } + if (job.output.p_memory) { + job.output.p_memory->instance_free = 0x400000; + job.output.p_memory->buffer_free = 0x400000; + job.output.p_memory->batch_size = 0x4200; + job.output.p_memory->input_size = 0x2000; + job.output.p_memory->output_size = 0x2000; + job.output.p_memory->small_size = 0x200; + } +} + +AjmInstanceStatistics& AjmInstanceStatistics::Getinstance() { + static AjmInstanceStatistics instance; + return instance; +} + +} // namespace Libraries::Ajm diff --git a/src/core/libraries/ajm/ajm_instance_statistics.h b/src/core/libraries/ajm/ajm_instance_statistics.h new file mode 100644 index 000000000..ea70c9d56 --- /dev/null +++ b/src/core/libraries/ajm/ajm_instance_statistics.h @@ -0,0 +1,17 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "core/libraries/ajm/ajm_batch.h" + +namespace Libraries::Ajm { + +class AjmInstanceStatistics { +public: + void ExecuteJob(AjmJob& job); + + static AjmInstanceStatistics& Getinstance(); +}; + +} // namespace Libraries::Ajm From 1bc27135e33b057702d0fe673f48b9eb29cc8e36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quang=20Ng=C3=B4?= Date: Sun, 29 Dec 2024 18:22:35 +0700 Subject: [PATCH 14/53] renderer_vulkan: fix deadlock when resizing the SDL window (#1860) * renderer_vulkan: Fix deadlock when resizing the SDL window * Address review comment --- .../renderer_vulkan/vk_presenter.cpp | 27 +++++++++++++------ .../renderer_vulkan/vk_swapchain.cpp | 1 + 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_presenter.cpp b/src/video_core/renderer_vulkan/vk_presenter.cpp index bc55cde23..93129842f 100644 --- a/src/video_core/renderer_vulkan/vk_presenter.cpp +++ b/src/video_core/renderer_vulkan/vk_presenter.cpp @@ -628,6 +628,13 @@ Frame* Presenter::PrepareFrameInternal(VideoCore::ImageId image_id, bool is_eop) } void Presenter::Present(Frame* frame) { + // Free the frame for reuse + const auto free_frame = [&] { + std::scoped_lock fl{free_mutex}; + free_queue.push(frame); + free_cv.notify_one(); + }; + // Recreate the swapchain if the window was resized. if (window.GetWidth() != swapchain.GetExtent().width || window.GetHeight() != swapchain.GetExtent().height) { @@ -636,8 +643,19 @@ void Presenter::Present(Frame* frame) { if (!swapchain.AcquireNextImage()) { swapchain.Recreate(window.GetWidth(), window.GetHeight()); + if (!swapchain.AcquireNextImage()) { + // User resizes the window too fast and GPU can't keep up. Skip this frame. + LOG_WARNING(Render_Vulkan, "Skipping frame!"); + free_frame(); + return; + } } + // Reset fence for queue submission. Do it here instead of GetRenderFrame() because we may + // skip frame because of slow swapchain recreation. If a frame skip occurs, we skip signal + // the frame's present fence and future GetRenderFrame() call will hang waiting for this frame. + instance.GetDevice().resetFences(frame->present_done); + ImGui::Core::NewFrame(); const vk::Image swapchain_image = swapchain.Image(); @@ -737,11 +755,7 @@ void Presenter::Present(Frame* frame) { swapchain.Recreate(window.GetWidth(), window.GetHeight()); } - // Free the frame for reuse - std::scoped_lock fl{free_mutex}; - free_queue.push(frame); - free_cv.notify_one(); - + free_frame(); DebugState.IncFlipFrameNum(); } @@ -776,9 +790,6 @@ Frame* Presenter::GetRenderFrame() { } } - // Reset fence for next queue submission. - device.resetFences(frame->present_done); - // If the window dimensions changed, recreate this frame if (frame->width != window.GetWidth() || frame->height != window.GetHeight()) { RecreateFrame(frame, window.GetWidth(), window.GetHeight()); diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 380660a2f..44f4be6dd 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -84,6 +84,7 @@ void Swapchain::Create(u32 width_, u32 height_, vk::SurfaceKHR surface_) { } void Swapchain::Recreate(u32 width_, u32 height_) { + LOG_DEBUG(Render_Vulkan, "Recreate the swapchain: width={} height={}", width_, height_); Create(width_, height_, surface); } From 38f1cc265295e30c429a1d604d38599eccce5303 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sun, 29 Dec 2024 03:30:37 -0800 Subject: [PATCH 15/53] renderer_vulkan: Render polygons using triangle fans. (#1969) --- src/video_core/buffer_cache/buffer_cache.cpp | 30 ++----------------- src/video_core/buffer_cache/buffer_cache.h | 2 +- .../renderer_vulkan/liverpool_to_vk.cpp | 4 +-- .../renderer_vulkan/liverpool_to_vk.h | 9 ------ .../renderer_vulkan/vk_rasterizer.cpp | 26 +++++++--------- 5 files changed, 15 insertions(+), 56 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 59c1e0bc3..0088ea4fa 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -234,46 +234,22 @@ bool BufferCache::BindVertexBuffers( return has_step_rate; } -u32 BufferCache::BindIndexBuffer(bool& is_indexed, u32 index_offset) { - // Emulate QuadList and Polygon primitive types with CPU made index buffer. +void BufferCache::BindIndexBuffer(u32 index_offset) { const auto& regs = liverpool->regs; - if (!is_indexed) { - if (regs.primitive_type != AmdGpu::PrimitiveType::Polygon) { - return regs.num_indices; - } - - // Emit indices. - const u32 index_size = 3 * regs.num_indices; - const auto [data, offset] = stream_buffer.Map(index_size); - Vulkan::LiverpoolToVK::EmitPolygonToTriangleListIndices(data, regs.num_indices); - stream_buffer.Commit(); - - // Bind index buffer. - is_indexed = true; - - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.bindIndexBuffer(stream_buffer.Handle(), offset, vk::IndexType::eUint16); - return index_size / sizeof(u16); - } // Figure out index type and size. const bool is_index16 = regs.index_buffer_type.index_type == AmdGpu::Liverpool::IndexType::Index16; const vk::IndexType index_type = is_index16 ? vk::IndexType::eUint16 : vk::IndexType::eUint32; const u32 index_size = is_index16 ? sizeof(u16) : sizeof(u32); - VAddr index_address = regs.index_base_address.Address(); - index_address += index_offset * index_size; - - if (regs.primitive_type == AmdGpu::PrimitiveType::Polygon) { - UNREACHABLE(); - } + const VAddr index_address = + regs.index_base_address.Address() + index_offset * index_size; // Bind index buffer. const u32 index_buffer_size = regs.num_indices * index_size; const auto [vk_buffer, offset] = ObtainBuffer(index_address, index_buffer_size, false); const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.bindIndexBuffer(vk_buffer->Handle(), offset, index_type); - return regs.num_indices; } void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) { diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index bcbaa45dc..0c70fa10b 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -83,7 +83,7 @@ public: const std::optional& fetch_shader); /// Bind host index buffer for the current draw. - u32 BindIndexBuffer(bool& is_indexed, u32 index_offset); + void BindIndexBuffer(u32 index_offset); /// Writes a value to GPU buffer. void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 25ff88b9d..6bd50ab06 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -103,6 +103,7 @@ vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type) { case AmdGpu::PrimitiveType::TriangleList: return vk::PrimitiveTopology::eTriangleList; case AmdGpu::PrimitiveType::TriangleFan: + case AmdGpu::PrimitiveType::Polygon: return vk::PrimitiveTopology::eTriangleFan; case AmdGpu::PrimitiveType::TriangleStrip: return vk::PrimitiveTopology::eTriangleStrip; @@ -116,9 +117,6 @@ vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type) { return vk::PrimitiveTopology::eTriangleStripWithAdjacency; case AmdGpu::PrimitiveType::PatchPrimitive: return vk::PrimitiveTopology::ePatchList; - case AmdGpu::PrimitiveType::Polygon: - // Needs to generate index buffer on the fly. - return vk::PrimitiveTopology::eTriangleList; case AmdGpu::PrimitiveType::QuadList: case AmdGpu::PrimitiveType::RectList: return vk::PrimitiveTopology::ePatchList; diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.h b/src/video_core/renderer_vulkan/liverpool_to_vk.h index d5f8e693b..25a27e20e 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.h +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h @@ -70,15 +70,6 @@ vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color vk::SampleCountFlagBits NumSamples(u32 num_samples, vk::SampleCountFlags supported_flags); -inline void EmitPolygonToTriangleListIndices(u8* out_ptr, u32 num_vertices) { - u16* out_data = reinterpret_cast(out_ptr); - for (u16 i = 1; i < num_vertices - 1; i++) { - *out_data++ = 0; - *out_data++ = i; - *out_data++ = i + 1; - } -} - static inline vk::Format PromoteFormatToDepth(vk::Format fmt) { if (fmt == vk::Format::eR32Sfloat) { return vk::Format::eD32Sfloat; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index d458fa124..4384cdbea 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -12,7 +12,6 @@ #include "video_core/renderer_vulkan/vk_shader_hle.h" #include "video_core/texture_cache/image_view.h" #include "video_core/texture_cache/texture_cache.h" -#include "vk_rasterizer.h" #ifdef MemoryBarrier #undef MemoryBarrier @@ -252,7 +251,9 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex); const auto& fetch_shader = pipeline->GetFetchShader(); buffer_cache.BindVertexBuffers(vs_info, fetch_shader); - const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset); + if (is_indexed) { + buffer_cache.BindIndexBuffer(index_offset); + } BeginRendering(*pipeline, state); UpdateDynamicState(*pipeline); @@ -263,10 +264,11 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); if (is_indexed) { - cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, s32(vertex_offset), - instance_offset); + cmdbuf.drawIndexed(regs.num_indices, regs.num_instances.NumInstances(), 0, + s32(vertex_offset), instance_offset); } else { - cmdbuf.draw(num_indices, regs.num_instances.NumInstances(), vertex_offset, instance_offset); + cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), vertex_offset, + instance_offset); } ResetBindings(); @@ -280,22 +282,12 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 return; } - const auto& regs = liverpool->regs; - if (regs.primitive_type == AmdGpu::PrimitiveType::Polygon) { - // We use a generated index buffer to convert polygons to triangles. Since it - // changes type of the draw, arguments are not valid for this case. We need to run a - // conversion pass to repack the indirect arguments buffer first. - LOG_WARNING(Render_Vulkan, "Primitive type is not supported for indirect draw"); - return; - } - const GraphicsPipeline* pipeline = pipeline_cache.GetGraphicsPipeline(); if (!pipeline) { return; } auto state = PrepareRenderState(pipeline->GetMrtMask()); - if (!BindResources(pipeline)) { return; } @@ -303,7 +295,9 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex); const auto& fetch_shader = pipeline->GetFetchShader(); buffer_cache.BindVertexBuffers(vs_info, fetch_shader); - buffer_cache.BindIndexBuffer(is_indexed, 0); + if (is_indexed) { + buffer_cache.BindIndexBuffer(0); + } const auto& [buffer, base] = buffer_cache.ObtainBuffer(arg_address + offset, stride * max_count, false); From ac2e8c26027059af7c80892f6245193890d3d1c2 Mon Sep 17 00:00:00 2001 From: Nenkai Date: Sun, 29 Dec 2024 19:15:04 +0100 Subject: [PATCH 16/53] gnmdriver: remove redundant EqEventType assert (#1975) --- src/core/libraries/gnmdriver/gnmdriver.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 91a1329e5..805c9124e 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -1015,11 +1015,7 @@ int PS4_SYSV_ABI sceGnmGetDebugTimestamp() { int PS4_SYSV_ABI sceGnmGetEqEventType(const SceKernelEvent* ev) { LOG_TRACE(Lib_GnmDriver, "called"); - - auto data = sceKernelGetEventData(ev); - ASSERT(static_cast(data) == GnmEventType::GfxEop); - - return data; + return sceKernelGetEventData(ev); } int PS4_SYSV_ABI sceGnmGetEqTimeStamp() { From dd3f24614b47986ff2b921e6ff9e45c979ed4ed6 Mon Sep 17 00:00:00 2001 From: Vladislav Mikhalin Date: Sun, 29 Dec 2024 14:57:32 +0300 Subject: [PATCH 17/53] infra: updated github issue templates --- .github/ISSUE_TEMPLATE/app-bug-report.yaml | 55 +++++++++++++ .github/ISSUE_TEMPLATE/bug_report.md | 30 ------- .github/ISSUE_TEMPLATE/config.yml | 10 +++ .github/ISSUE_TEMPLATE/feature-request.yaml | 54 ++++++++++++ .github/ISSUE_TEMPLATE/game-bug-report.yaml | 91 +++++++++++++++++++++ 5 files changed, 210 insertions(+), 30 deletions(-) create mode 100644 .github/ISSUE_TEMPLATE/app-bug-report.yaml delete mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/config.yml create mode 100644 .github/ISSUE_TEMPLATE/feature-request.yaml create mode 100644 .github/ISSUE_TEMPLATE/game-bug-report.yaml diff --git a/.github/ISSUE_TEMPLATE/app-bug-report.yaml b/.github/ISSUE_TEMPLATE/app-bug-report.yaml new file mode 100644 index 000000000..c38bbb814 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/app-bug-report.yaml @@ -0,0 +1,55 @@ +# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +# SPDX-License-Identifier: GPL-2.0-or-later +# Docs - https://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema +name: Application Bug Report +description: Problem with the application itself (ie. bad file path handling, UX issue) +title: "[APP BUG]: " +body: + - type: markdown + attributes: + value: | + ## Important: Read First + + Please do not make support requests on GitHub. Our issue tracker is for tracking bugs and feature requests only + If you need help using the emulator or unsure about your issue please contact us on [discord](https://discord.gg/bFJxfftGW6). + + Please make an effort to make sure your issue isn't already reported. + + Do not create issues involving software piracy, our rules specifically prohibit this. Otherwise your issue will be closed and you will be banned in this repository. + - type: checkboxes + id: checklist + attributes: + label: Checklist + options: + - label: I have searched for a similar issue in this repository and did not find one. + required: true + - label: I am using an official build obtained from [releases](https://github.com/shadps4-emu/shadPS4/releases) or updated one of those builds using its in-app updater. + required: true + - type: textarea + id: desc + attributes: + label: Describe the Bug + description: "A clear and concise description of what the bug is" + validations: + required: true + - type: textarea + id: repro + attributes: + label: Reproduction Steps + description: "Detailed steps to reproduce the behavior" + validations: + required: true + - type: textarea + id: expected + attributes: + label: Expected Behavior + description: "A clear and concise description of what you expected to happen" + validations: + required: false + - type: input + id: os + attributes: + label: Specify OS Version + placeholder: "Example: Windows 11, Arch Linux, MacOS 15" + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md deleted file mode 100644 index 02017dddf..000000000 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ /dev/null @@ -1,30 +0,0 @@ ---- -name: Bug report -about: Report a bug in the emulator -title: '' -labels: '' -assignees: '' - ---- - -Checklist: -[ ] I have searched for a similar issue in this repository and did not find one. -[ ] I have asked for support on shadPS4 discord server. -[ ] I am using an official build obtained from [releases](https://github.com/shadps4-emu/shadPS4/releases) or updated it using its in-app updater. -[ ] I have re-dumped the game and performed a clean install without mods. -[ ] I have disabled all patches and cheats. - -Description: - - -Steps To Reproduce: - - -Logs: - - -System Information: - - -Additional Information: - diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 000000000..5adcf1437 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,10 @@ +# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +# SPDX-License-Identifier: GPL-2.0-or-later +blank_issues_enabled: false +contact_links: + - name: Discord + url: https://discord.gg/bFJxfftGW6 + about: Get direct support and hang out with us + - name: Wiki + url: https://github.com/shadps4-emu/shadPS4/wiki + about: Information, guides, etc. diff --git a/.github/ISSUE_TEMPLATE/feature-request.yaml b/.github/ISSUE_TEMPLATE/feature-request.yaml new file mode 100644 index 000000000..a1b49362a --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature-request.yaml @@ -0,0 +1,54 @@ +# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +# SPDX-License-Identifier: GPL-2.0-or-later +# Docs - https://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema +name: Feature Request +description: Suggest a new feature or improve an existing one +title: "[Feature Request]: " +body: + - type: markdown + attributes: + value: | + ## Important: Read First + + Please make an effort to make sure your issue isn't already reported. + + Do not create issues involving software piracy, our rules specifically prohibit this. Otherwise your issue will be closed and you will be banned in this repository. + + - type: checkboxes + id: checklist + attributes: + label: Checklist + options: + - label: I have searched for a similar issue in this repository and did not find one. + required: true + - type: textarea + id: desc + attributes: + label: Description + description: | + A concise description of the feature you want + + Include step by step examples of how the feature should work under various circumstances + validations: + required: true + - type: textarea + id: reason + attributes: + label: Reason + description: | + Give a reason why you want this feature + - How will it make things easier for you? + - How does this feature help your enjoyment of the emulator? + - What does it provide that isn't being provided currently? + validations: + required: true + - type: textarea + id: examples + attributes: + label: Examples + description: | + Provide examples of the feature as implemented by other software + + Include screenshots or video if you like to help demonstrate how you'd like this feature to work + validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/game-bug-report.yaml b/.github/ISSUE_TEMPLATE/game-bug-report.yaml new file mode 100644 index 000000000..7eb9441d2 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/game-bug-report.yaml @@ -0,0 +1,91 @@ +# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +# SPDX-License-Identifier: GPL-2.0-or-later +# Docs - https://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema +name: Game Emulation Bug Report +description: Problem in a game (ie. graphical artifacts, crashes, etc.) +title: "[GAME BUG]: " +body: + - type: markdown + attributes: + value: | + ## Important: Read First + + Please do not make support requests on GitHub. Our issue tracker is for tracking bugs and feature requests only + If you need help using the emulator or unsure about your issue please contact us on [discord](https://discord.gg/bFJxfftGW6). + + You can also check the [Game Compatibility Repository](https://github.com/shadps4-emu/shadps4-game-compatibility) for the information about the status of the game. + + Please make an effort to make sure your issue isn't already reported. + + Do not create issues involving software piracy, our rules specifically prohibit this. Otherwise your issue will be closed and you will be banned in this repository. + - type: checkboxes + id: checklist + attributes: + label: Checklist + options: + - label: I have searched for a similar issue in this repository and did not find one. + required: true + - label: I am using an official build obtained from [releases](https://github.com/shadps4-emu/shadPS4/releases) or updated one of those builds using its in-app updater. + required: true + - label: I have re-dumped the game and performed a clean install without mods. + required: true + - label: I have disabled all patches and cheats. + required: true + - label: I have all the required [system modules](https://github.com/shadps4-emu/shadps4-game-compatibility?tab=readme-ov-file#informations) installed. + required: true + - type: textarea + id: desc + attributes: + label: Describe the Bug + description: "A clear and concise description of what the bug is" + validations: + required: true + - type: textarea + id: repro + attributes: + label: Reproduction Steps + description: "Detailed steps to reproduce the behavior" + validations: + required: true + - type: input + id: os + attributes: + label: Specify OS Version + placeholder: "Example: Windows 11, Arch Linux, MacOS 15" + validations: + required: true + - type: input + id: cpu + attributes: + label: CPU + placeholder: "Example: Intel Core i7-8700" + validations: + required: true + - type: input + id: gpu + attributes: + label: GPU + placeholder: "Example: nVidia GTX 1650" + validations: + required: true + - type: input + id: ram + attributes: + label: Amount of RAM in GB + placeholder: "Example: 16 GB" + validations: + required: true + - type: input + id: vram + attributes: + label: Amount of VRAM in GB + placeholder: "Example: 8 GB" + validations: + required: true + - type: textarea + id: logs + attributes: + label: "Logs" + description: Attach any logs here. Log can be found by right clicking on a game name -> Open Folder... -> Open Log Folder. Make sure that the log type is set to `sync`. + validations: + required: false From 90912233967f97b41d4ed8017eae15667adf1547 Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Mon, 30 Dec 2024 21:59:14 -0600 Subject: [PATCH 18/53] Fix sceKernelGetEventFilter (#1987) --- src/core/libraries/kernel/equeue.cpp | 2 +- src/core/libraries/kernel/equeue.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/libraries/kernel/equeue.cpp b/src/core/libraries/kernel/equeue.cpp index 3ae77e46b..03259cd22 100644 --- a/src/core/libraries/kernel/equeue.cpp +++ b/src/core/libraries/kernel/equeue.cpp @@ -346,7 +346,7 @@ int PS4_SYSV_ABI sceKernelDeleteUserEvent(SceKernelEqueue eq, int id) { return ORBIS_OK; } -s16 PS4_SYSV_ABI sceKernelGetEventFilter(const SceKernelEvent* ev) { +int PS4_SYSV_ABI sceKernelGetEventFilter(const SceKernelEvent* ev) { return ev->filter; } diff --git a/src/core/libraries/kernel/equeue.h b/src/core/libraries/kernel/equeue.h index f8759137c..17900238f 100644 --- a/src/core/libraries/kernel/equeue.h +++ b/src/core/libraries/kernel/equeue.h @@ -21,7 +21,7 @@ class EqueueInternal; struct EqueueEvent; struct SceKernelEvent { - enum Filter : s16 { + enum Filter : int { None = 0, Read = -1, Write = -2, From 62780e4e431b42cacadeb63dfbd360b0891f4928 Mon Sep 17 00:00:00 2001 From: baggins183 Date: Mon, 30 Dec 2024 20:00:52 -0800 Subject: [PATCH 19/53] Initialize V0 to PrimitiveId in hull shader (#1985) --- src/shader_recompiler/frontend/translate/translate.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index a14bff706..237acf309 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -124,12 +124,12 @@ void Translator::EmitPrologue() { } break; case LogicalStage::TessellationControl: { + ir.SetVectorReg(IR::VectorReg::V0, ir.GetAttributeU32(IR::Attribute::PrimitiveId)); // Should be laid out like: // [0:8]: patch id within VGT // [8:12]: output control point id ir.SetVectorReg(IR::VectorReg::V1, ir.GetAttributeU32(IR::Attribute::PackedHullInvocationInfo)); - // TODO PrimitiveId is probably V2 but haven't seen it yet break; } case LogicalStage::TessellationEval: From 284f473a52456917e2cf7fd61a151f39abf09403 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Mon, 30 Dec 2024 20:10:29 -0800 Subject: [PATCH 20/53] shader_recompiler: Fix BitCount64 and FindILsb64 (#1978) --- .../backend/spirv/emit_spirv_integer.cpp | 19 +++++++++++++++++-- .../frontend/translate/scalar_alu.cpp | 5 ++--- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index def1f816e..70411ecec 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -202,7 +202,14 @@ Id EmitBitCount32(EmitContext& ctx, Id value) { } Id EmitBitCount64(EmitContext& ctx, Id value) { - return ctx.OpBitCount(ctx.U64, value); + // Vulkan restricts some bitwise operations to 32-bit only, so decompose into + // two 32-bit values and add the result. + const Id unpacked{ctx.OpBitcast(ctx.U32[2], value)}; + const Id lo{ctx.OpCompositeExtract(ctx.U32[1], unpacked, 0U)}; + const Id hi{ctx.OpCompositeExtract(ctx.U32[1], unpacked, 1U)}; + const Id lo_count{ctx.OpBitCount(ctx.U32[1], lo)}; + const Id hi_count{ctx.OpBitCount(ctx.U32[1], hi)}; + return ctx.OpIAdd(ctx.U32[1], lo_count, hi_count); } Id EmitBitwiseNot32(EmitContext& ctx, Id value) { @@ -222,7 +229,15 @@ Id EmitFindILsb32(EmitContext& ctx, Id value) { } Id EmitFindILsb64(EmitContext& ctx, Id value) { - return ctx.OpFindILsb(ctx.U64, value); + // Vulkan restricts some bitwise operations to 32-bit only, so decompose into + // two 32-bit values and select the correct result. + const Id unpacked{ctx.OpBitcast(ctx.U32[2], value)}; + const Id lo{ctx.OpCompositeExtract(ctx.U32[1], unpacked, 0U)}; + const Id hi{ctx.OpCompositeExtract(ctx.U32[1], unpacked, 1U)}; + const Id lo_lsb{ctx.OpFindILsb(ctx.U32[1], lo)}; + const Id hi_lsb{ctx.OpFindILsb(ctx.U32[1], hi)}; + const Id found_lo{ctx.OpINotEqual(ctx.U32[1], lo_lsb, ctx.ConstU32(u32(-1)))}; + return ctx.OpSelect(ctx.U32[1], found_lo, lo_lsb, hi_lsb); } Id EmitSMin32(EmitContext& ctx, Id a, Id b) { diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index 3a2b01a90..e18cda012 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -597,14 +597,13 @@ void Translator::S_BCNT1_I32_B64(const GcnInst& inst) { void Translator::S_FF1_I32_B32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 result{ir.Select(ir.IEqual(src0, ir.Imm32(0U)), ir.Imm32(-1), ir.FindILsb(src0))}; + const IR::U32 result{ir.FindILsb(src0)}; SetDst(inst.dst[0], result); } void Translator::S_FF1_I32_B64(const GcnInst& inst) { const IR::U64 src0{GetSrc64(inst.src[0])}; - const IR::U32 result{ - ir.Select(ir.IEqual(src0, ir.Imm64(u64(0))), ir.Imm32(-1), ir.FindILsb(src0))}; + const IR::U32 result{ir.FindILsb(src0)}; SetDst(inst.dst[0], result); } From 41d64a200dc6ac519b2db0df9d7692afef3a8344 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Mon, 30 Dec 2024 20:14:47 -0800 Subject: [PATCH 21/53] shader_recompiler: Add swizzle support for unsupported formats. (#1869) * shader_recompiler: Add swizzle support for unsupported formats. * renderer_vulkan: Rework MRT swizzles and add unsupported format swizzle support. * shader_recompiler: Clean up swizzle handling and handle ImageRead storage swizzle. * shader_recompiler: Fix type errors * liverpool_to_vk: Remove redundant clear color swizzles. * shader_recompiler: Reduce CompositeConstruct to constants where possible. * shader_recompiler: Fix ImageRead/Write and StoreBufferFormatF32 types. * amdgpu: Add a few more unsupported format remaps. --- CMakeLists.txt | 1 + .../backend/spirv/emit_spirv_composite.cpp | 98 +++++++++-- .../backend/spirv/emit_spirv_image.cpp | 6 +- .../backend/spirv/emit_spirv_instructions.h | 38 ++++- .../frontend/translate/export.cpp | 32 ++-- .../frontend/translate/translate.cpp | 25 +-- .../frontend/translate/vector_memory.cpp | 4 +- src/shader_recompiler/ir/ir_emitter.cpp | 80 +++++++++ src/shader_recompiler/ir/ir_emitter.h | 7 + src/shader_recompiler/ir/opcodes.inc | 18 +- .../ir/passes/resource_tracking_pass.cpp | 100 ++++++----- src/shader_recompiler/ir/reinterpret.h | 24 +++ src/shader_recompiler/runtime_info.h | 2 +- src/shader_recompiler/specialization.h | 10 +- src/video_core/amdgpu/liverpool.h | 50 +++++- src/video_core/amdgpu/resource.h | 156 ++++++++++++------ .../renderer_vulkan/liverpool_to_vk.cpp | 97 ++++------- .../renderer_vulkan/liverpool_to_vk.h | 7 +- .../renderer_vulkan/vk_graphics_pipeline.h | 2 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 14 +- src/video_core/texture_cache/image_info.cpp | 4 +- src/video_core/texture_cache/image_view.cpp | 29 +--- 22 files changed, 522 insertions(+), 282 deletions(-) create mode 100644 src/shader_recompiler/ir/reinterpret.h diff --git a/CMakeLists.txt b/CMakeLists.txt index af811e9fb..833bbe3ce 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -701,6 +701,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/post_order.h src/shader_recompiler/ir/program.cpp src/shader_recompiler/ir/program.h + src/shader_recompiler/ir/reinterpret.h src/shader_recompiler/ir/reg.h src/shader_recompiler/ir/type.cpp src/shader_recompiler/ir/type.h diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp index 74e736cf6..d064b5d05 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -6,16 +6,22 @@ namespace Shader::Backend::SPIRV { -Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2) { - return ctx.OpCompositeConstruct(ctx.U32[2], e1, e2); +template +Id EmitCompositeConstruct(EmitContext& ctx, IR::Inst* inst, Args&&... args) { + return inst->AreAllArgsImmediates() ? ctx.ConstantComposite(args...) + : ctx.OpCompositeConstruct(args...); } -Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3) { - return ctx.OpCompositeConstruct(ctx.U32[3], e1, e2, e3); +Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { + return EmitCompositeConstruct(ctx, inst, ctx.U32[2], e1, e2); } -Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { - return ctx.OpCompositeConstruct(ctx.U32[4], e1, e2, e3, e4); +Id EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3) { + return EmitCompositeConstruct(ctx, inst, ctx.U32[3], e1, e2, e3); +} + +Id EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4) { + return EmitCompositeConstruct(ctx, inst, ctx.U32[4], e1, e2, e3, e4); } Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index) { @@ -42,16 +48,30 @@ Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index); } -Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) { - return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2); +Id EmitCompositeShuffleU32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) { + return ctx.OpVectorShuffle(ctx.U32[2], composite1, composite2, comp0, comp1); } -Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3) { - return ctx.OpCompositeConstruct(ctx.F16[3], e1, e2, e3); +Id EmitCompositeShuffleU32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2) { + return ctx.OpVectorShuffle(ctx.U32[3], composite1, composite2, comp0, comp1, comp2); } -Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { - return ctx.OpCompositeConstruct(ctx.F16[4], e1, e2, e3, e4); +Id EmitCompositeShuffleU32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3) { + return ctx.OpVectorShuffle(ctx.U32[4], composite1, composite2, comp0, comp1, comp2, comp3); +} + +Id EmitCompositeConstructF16x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { + return EmitCompositeConstruct(ctx, inst, ctx.F16[2], e1, e2); +} + +Id EmitCompositeConstructF16x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3) { + return EmitCompositeConstruct(ctx, inst, ctx.F16[3], e1, e2, e3); +} + +Id EmitCompositeConstructF16x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4) { + return EmitCompositeConstruct(ctx, inst, ctx.F16[4], e1, e2, e3, e4); } Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) { @@ -78,16 +98,30 @@ Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index); } -Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) { - return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2); +Id EmitCompositeShuffleF16x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) { + return ctx.OpVectorShuffle(ctx.F16[2], composite1, composite2, comp0, comp1); } -Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3) { - return ctx.OpCompositeConstruct(ctx.F32[3], e1, e2, e3); +Id EmitCompositeShuffleF16x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2) { + return ctx.OpVectorShuffle(ctx.F16[3], composite1, composite2, comp0, comp1, comp2); } -Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { - return ctx.OpCompositeConstruct(ctx.F32[4], e1, e2, e3, e4); +Id EmitCompositeShuffleF16x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3) { + return ctx.OpVectorShuffle(ctx.F16[4], composite1, composite2, comp0, comp1, comp2, comp3); +} + +Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { + return EmitCompositeConstruct(ctx, inst, ctx.F32[2], e1, e2); +} + +Id EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3) { + return EmitCompositeConstruct(ctx, inst, ctx.F32[3], e1, e2, e3); +} + +Id EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4) { + return EmitCompositeConstruct(ctx, inst, ctx.F32[4], e1, e2, e3, e4); } Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) { @@ -114,6 +148,20 @@ Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index return ctx.OpCompositeInsert(ctx.F32[4], object, composite, index); } +Id EmitCompositeShuffleF32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) { + return ctx.OpVectorShuffle(ctx.F32[2], composite1, composite2, comp0, comp1); +} + +Id EmitCompositeShuffleF32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2) { + return ctx.OpVectorShuffle(ctx.F32[3], composite1, composite2, comp0, comp1, comp2); +} + +Id EmitCompositeShuffleF32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3) { + return ctx.OpVectorShuffle(ctx.F32[4], composite1, composite2, comp0, comp1, comp2, comp3); +} + void EmitCompositeConstructF64x2(EmitContext&) { UNREACHABLE_MSG("SPIR-V Instruction"); } @@ -150,4 +198,18 @@ Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index); } +Id EmitCompositeShuffleF64x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) { + return ctx.OpVectorShuffle(ctx.F64[2], composite1, composite2, comp0, comp1); +} + +Id EmitCompositeShuffleF64x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2) { + return ctx.OpVectorShuffle(ctx.F64[3], composite1, composite2, comp0, comp1, comp2); +} + +Id EmitCompositeShuffleF64x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3) { + return ctx.OpVectorShuffle(ctx.F64[4], composite1, composite2, comp0, comp1, comp2, comp3); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 2946edab3..c3d937fe7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -238,7 +238,7 @@ Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod } texel = ctx.OpImageRead(color_type, image, coords, operands.mask, operands.operands); } - return !texture.is_integer ? ctx.OpBitcast(ctx.U32[4], texel) : texel; + return texture.is_integer ? ctx.OpBitcast(ctx.F32[4], texel) : texel; } void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id ms, @@ -253,8 +253,8 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id } else if (Sirit::ValidId(lod)) { LOG_WARNING(Render, "Image write with LOD not supported by driver"); } - ctx.OpImageWrite(image, coords, ctx.OpBitcast(color_type, color), operands.mask, - operands.operands); + const Id texel = texture.is_integer ? ctx.OpBitcast(color_type, color) : color; + ctx.OpImageWrite(image, coords, texel, operands.mask, operands.operands); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index d26cf6662..0d9fcff46 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -120,33 +120,48 @@ Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value); -Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); -Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); -Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); +Id EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3); +Id EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4); Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2); -Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); -Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeShuffleU32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); +Id EmitCompositeShuffleU32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2); +Id EmitCompositeShuffleU32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3); +Id EmitCompositeConstructF16x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); +Id EmitCompositeConstructF16x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF16x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4); Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2); -Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); -Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeShuffleF16x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); +Id EmitCompositeShuffleF16x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2); +Id EmitCompositeShuffleF16x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3); +Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); +Id EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4); Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeShuffleF32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); +Id EmitCompositeShuffleF32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2); +Id EmitCompositeShuffleF32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3); void EmitCompositeConstructF64x2(EmitContext& ctx); void EmitCompositeConstructF64x3(EmitContext& ctx); void EmitCompositeConstructF64x4(EmitContext& ctx); @@ -156,6 +171,11 @@ void EmitCompositeExtractF64x4(EmitContext& ctx); Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeShuffleF64x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); +Id EmitCompositeShuffleF64x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2); +Id EmitCompositeShuffleF64x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3); Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp index 5927aa696..83240e17f 100644 --- a/src/shader_recompiler/frontend/translate/export.cpp +++ b/src/shader_recompiler/frontend/translate/export.cpp @@ -25,34 +25,28 @@ void Translator::EmitExport(const GcnInst& inst) { IR::VectorReg(inst.src[3].code), }; - const auto swizzle = [&](u32 comp) { + const auto set_attribute = [&](u32 comp, IR::F32 value) { if (!IR::IsMrt(attrib)) { - return comp; + ir.SetAttribute(attrib, value, comp); + return; } const u32 index = u32(attrib) - u32(IR::Attribute::RenderTarget0); - switch (runtime_info.fs_info.color_buffers[index].mrt_swizzle) { - case MrtSwizzle::Identity: - return comp; - case MrtSwizzle::Alt: - static constexpr std::array AltSwizzle = {2, 1, 0, 3}; - return AltSwizzle[comp]; - case MrtSwizzle::Reverse: - static constexpr std::array RevSwizzle = {3, 2, 1, 0}; - return RevSwizzle[comp]; - case MrtSwizzle::ReverseAlt: - static constexpr std::array AltRevSwizzle = {3, 0, 1, 2}; - return AltRevSwizzle[comp]; - default: - UNREACHABLE(); + const auto [r, g, b, a] = runtime_info.fs_info.color_buffers[index].swizzle; + const std::array swizzle_array = {r, g, b, a}; + const auto swizzled_comp = swizzle_array[comp]; + if (u32(swizzled_comp) < u32(AmdGpu::CompSwizzle::Red)) { + ir.SetAttribute(attrib, value, comp); + return; } + ir.SetAttribute(attrib, value, u32(swizzled_comp) - u32(AmdGpu::CompSwizzle::Red)); }; const auto unpack = [&](u32 idx) { const IR::Value value = ir.UnpackHalf2x16(ir.GetVectorReg(vsrc[idx])); const IR::F32 r = IR::F32{ir.CompositeExtract(value, 0)}; const IR::F32 g = IR::F32{ir.CompositeExtract(value, 1)}; - ir.SetAttribute(attrib, r, swizzle(idx * 2)); - ir.SetAttribute(attrib, g, swizzle(idx * 2 + 1)); + set_attribute(idx * 2, r); + set_attribute(idx * 2 + 1, g); }; // Components are float16 packed into a VGPR @@ -73,7 +67,7 @@ void Translator::EmitExport(const GcnInst& inst) { continue; } const IR::F32 comp = ir.GetVectorReg(vsrc[i]); - ir.SetAttribute(attrib, comp, swizzle(i)); + set_attribute(i, comp); } } if (IR::IsMrt(attrib)) { diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 237acf309..7f5504663 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -10,6 +10,7 @@ #include "shader_recompiler/info.h" #include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/reg.h" +#include "shader_recompiler/ir/reinterpret.h" #include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/resource.h" #include "video_core/amdgpu/types.h" @@ -475,26 +476,12 @@ void Translator::EmitFetch(const GcnInst& inst) { // Read the V# of the attribute to figure out component number and type. const auto buffer = info.ReadUdReg(attrib.sgpr_base, attrib.dword_offset); + const auto values = + ir.CompositeConstruct(ir.GetAttribute(attr, 0), ir.GetAttribute(attr, 1), + ir.GetAttribute(attr, 2), ir.GetAttribute(attr, 3)); + const auto swizzled = ApplySwizzle(ir, values, buffer.DstSelect()); for (u32 i = 0; i < 4; i++) { - const IR::F32 comp = [&] { - switch (buffer.GetSwizzle(i)) { - case AmdGpu::CompSwizzle::One: - return ir.Imm32(1.f); - case AmdGpu::CompSwizzle::Zero: - return ir.Imm32(0.f); - case AmdGpu::CompSwizzle::Red: - return ir.GetAttribute(attr, 0); - case AmdGpu::CompSwizzle::Green: - return ir.GetAttribute(attr, 1); - case AmdGpu::CompSwizzle::Blue: - return ir.GetAttribute(attr, 2); - case AmdGpu::CompSwizzle::Alpha: - return ir.GetAttribute(attr, 3); - default: - UNREACHABLE(); - } - }(); - ir.SetVectorReg(dst_reg++, comp); + ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(swizzled, i)}); } // In case of programmable step rates we need to fallback to instance data pulling in diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 79d46cd42..c5be08b7d 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -326,7 +326,7 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, const GcnInst& inst) { const IR::VectorReg src_reg{inst.src[1].code}; - std::array comps{}; + std::array comps{}; for (u32 i = 0; i < num_dwords; i++) { comps[i] = ir.GetVectorReg(src_reg + i); } @@ -424,7 +424,7 @@ void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) { if (((mimg.dmask >> i) & 1) == 0) { continue; } - IR::U32 value = IR::U32{ir.CompositeExtract(texel, i)}; + IR::F32 value = IR::F32{ir.CompositeExtract(texel, i)}; ir.SetVectorReg(dest_reg++, value); } } diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 20e6eae0b..823f9bdcd 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -663,6 +663,86 @@ Value IREmitter::CompositeInsert(const Value& vector, const Value& object, size_ } } +Value IREmitter::CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0, + size_t comp1) { + if (vector1.Type() != vector2.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", vector1.Type(), vector2.Type()); + } + if (comp0 >= 4 || comp1 >= 4) { + UNREACHABLE_MSG("One or more out of bounds elements {}, {}", comp0, comp1); + } + const auto shuffle{[&](Opcode opcode) -> Value { + return Inst(opcode, vector1, vector2, Value{static_cast(comp0)}, + Value{static_cast(comp1)}); + }}; + switch (vector1.Type()) { + case Type::U32x4: + return shuffle(Opcode::CompositeShuffleU32x2); + case Type::F16x4: + return shuffle(Opcode::CompositeShuffleF16x2); + case Type::F32x4: + return shuffle(Opcode::CompositeShuffleF32x2); + case Type::F64x4: + return shuffle(Opcode::CompositeShuffleF64x2); + default: + ThrowInvalidType(vector1.Type()); + } +} + +Value IREmitter::CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0, + size_t comp1, size_t comp2) { + if (vector1.Type() != vector2.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", vector1.Type(), vector2.Type()); + } + if (comp0 >= 6 || comp1 >= 6 || comp2 >= 6) { + UNREACHABLE_MSG("One or more out of bounds elements {}, {}, {}", comp0, comp1, comp2); + } + const auto shuffle{[&](Opcode opcode) -> Value { + return Inst(opcode, vector1, vector2, Value{static_cast(comp0)}, + Value{static_cast(comp1)}, Value{static_cast(comp2)}); + }}; + switch (vector1.Type()) { + case Type::U32x4: + return shuffle(Opcode::CompositeShuffleU32x3); + case Type::F16x4: + return shuffle(Opcode::CompositeShuffleF16x3); + case Type::F32x4: + return shuffle(Opcode::CompositeShuffleF32x3); + case Type::F64x4: + return shuffle(Opcode::CompositeShuffleF64x3); + default: + ThrowInvalidType(vector1.Type()); + } +} + +Value IREmitter::CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0, + size_t comp1, size_t comp2, size_t comp3) { + if (vector1.Type() != vector2.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", vector1.Type(), vector2.Type()); + } + if (comp0 >= 8 || comp1 >= 8 || comp2 >= 8 || comp3 >= 8) { + UNREACHABLE_MSG("One or more out of bounds elements {}, {}, {}, {}", comp0, comp1, comp2, + comp3); + } + const auto shuffle{[&](Opcode opcode) -> Value { + return Inst(opcode, vector1, vector2, Value{static_cast(comp0)}, + Value{static_cast(comp1)}, Value{static_cast(comp2)}, + Value{static_cast(comp3)}); + }}; + switch (vector1.Type()) { + case Type::U32x4: + return shuffle(Opcode::CompositeShuffleU32x4); + case Type::F16x4: + return shuffle(Opcode::CompositeShuffleF16x4); + case Type::F32x4: + return shuffle(Opcode::CompositeShuffleF32x4); + case Type::F64x4: + return shuffle(Opcode::CompositeShuffleF64x4); + default: + ThrowInvalidType(vector1.Type()); + } +} + Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) { if (true_value.Type() != false_value.Type()) { UNREACHABLE_MSG("Mismatching types {} and {}", true_value.Type(), false_value.Type()); diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index f65baee2a..9aab9459b 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -155,6 +155,13 @@ public: [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); [[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element); + [[nodiscard]] Value CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0, + size_t comp1); + [[nodiscard]] Value CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0, + size_t comp1, size_t comp2); + [[nodiscard]] Value CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0, + size_t comp1, size_t comp2, size_t comp3); + [[nodiscard]] Value Select(const U1& condition, const Value& true_value, const Value& false_value); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 1194c3792..6242a230e 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -99,7 +99,7 @@ OPCODE(StoreBufferU32, Void, Opaq OPCODE(StoreBufferU32x2, Void, Opaque, Opaque, U32x2, ) OPCODE(StoreBufferU32x3, Void, Opaque, Opaque, U32x3, ) OPCODE(StoreBufferU32x4, Void, Opaque, Opaque, U32x4, ) -OPCODE(StoreBufferFormatF32, Void, Opaque, Opaque, U32x4, ) +OPCODE(StoreBufferFormatF32, Void, Opaque, Opaque, F32x4, ) // Buffer atomic operations OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 ) @@ -124,6 +124,9 @@ OPCODE(CompositeExtractU32x4, U32, U32x OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, ) OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, ) OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, ) +OPCODE(CompositeShuffleU32x2, U32x2, U32x2, U32x2, U32, U32, ) +OPCODE(CompositeShuffleU32x3, U32x3, U32x3, U32x3, U32, U32, U32, ) +OPCODE(CompositeShuffleU32x4, U32x4, U32x4, U32x4, U32, U32, U32, U32, ) OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) @@ -133,6 +136,9 @@ OPCODE(CompositeExtractF16x4, F16, F16x OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, ) OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, ) OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, ) +OPCODE(CompositeShuffleF16x2, F16x2, F16x2, F16x2, U32, U32, ) +OPCODE(CompositeShuffleF16x3, F16x3, F16x3, F16x3, U32, U32, U32, ) +OPCODE(CompositeShuffleF16x4, F16x4, F16x4, F16x4, U32, U32, U32, U32, ) OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) @@ -142,6 +148,9 @@ OPCODE(CompositeExtractF32x4, F32, F32x OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, ) OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, ) OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, ) +OPCODE(CompositeShuffleF32x2, F32x2, F32x2, F32x2, U32, U32, ) +OPCODE(CompositeShuffleF32x3, F32x3, F32x3, F32x3, U32, U32, U32, ) +OPCODE(CompositeShuffleF32x4, F32x4, F32x4, F32x4, U32, U32, U32, U32, ) OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) @@ -151,6 +160,9 @@ OPCODE(CompositeExtractF64x4, F64, F64x OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) +OPCODE(CompositeShuffleF64x2, F64x2, F64x2, F64x2, U32, U32, ) +OPCODE(CompositeShuffleF64x3, F64x3, F64x3, F64x3, U32, U32, U32, ) +OPCODE(CompositeShuffleF64x4, F64x4, F64x4, F64x4, U32, U32, U32, U32, ) // Select operations OPCODE(SelectU1, U1, U1, U1, U1, ) @@ -346,8 +358,8 @@ OPCODE(ImageGatherDref, F32x4, Opaq OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, ) OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, F32, ) -OPCODE(ImageRead, U32x4, Opaque, Opaque, U32, U32, ) -OPCODE(ImageWrite, Void, Opaque, Opaque, U32, U32, U32x4, ) +OPCODE(ImageRead, F32x4, Opaque, Opaque, U32, U32, ) +OPCODE(ImageWrite, Void, Opaque, Opaque, U32, U32, F32x4, ) // Image atomic operations OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, ) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index e6d23bfe7..636752912 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -8,6 +8,7 @@ #include "shader_recompiler/ir/breadth_first_search.h" #include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/program.h" +#include "shader_recompiler/ir/reinterpret.h" #include "video_core/amdgpu/resource.h" namespace Shader::Optimization { @@ -128,35 +129,6 @@ bool IsImageInstruction(const IR::Inst& inst) { } } -IR::Value SwizzleVector(IR::IREmitter& ir, auto sharp, IR::Value texel) { - boost::container::static_vector comps; - for (u32 i = 0; i < 4; i++) { - switch (sharp.GetSwizzle(i)) { - case AmdGpu::CompSwizzle::Zero: - comps.emplace_back(ir.Imm32(0.f)); - break; - case AmdGpu::CompSwizzle::One: - comps.emplace_back(ir.Imm32(1.f)); - break; - case AmdGpu::CompSwizzle::Red: - comps.emplace_back(ir.CompositeExtract(texel, 0)); - break; - case AmdGpu::CompSwizzle::Green: - comps.emplace_back(ir.CompositeExtract(texel, 1)); - break; - case AmdGpu::CompSwizzle::Blue: - comps.emplace_back(ir.CompositeExtract(texel, 2)); - break; - case AmdGpu::CompSwizzle::Alpha: - comps.emplace_back(ir.CompositeExtract(texel, 3)); - break; - default: - UNREACHABLE(); - } - } - return ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]); -}; - class Descriptors { public: explicit Descriptors(Info& info_) @@ -409,15 +381,6 @@ void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; inst.SetArg(0, ir.Imm32(binding)); ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable); - - // Apply dst_sel swizzle on formatted buffer instructions - if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) { - inst.SetArg(2, SwizzleVector(ir, buffer, inst.Arg(2))); - } else { - const auto inst_info = inst.Flags(); - const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info); - inst.ReplaceUsesWith(SwizzleVector(ir, buffer, texel)); - } } IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t, @@ -765,10 +728,6 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip }(); inst.SetArg(1, coords); - if (inst.GetOpcode() == IR::Opcode::ImageWrite) { - inst.SetArg(4, SwizzleVector(ir, image, inst.Arg(4))); - } - if (inst_info.has_lod) { ASSERT(inst.GetOpcode() == IR::Opcode::ImageRead || inst.GetOpcode() == IR::Opcode::ImageWrite); @@ -783,6 +742,50 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip } } +void PatchTextureBufferInterpretation(IR::Block& block, IR::Inst& inst, Info& info) { + const auto binding = inst.Arg(0).U32(); + const auto buffer_res = info.texture_buffers[binding]; + const auto buffer = buffer_res.GetSharp(info); + if (!buffer.Valid()) { + // Don't need to swizzle invalid buffer. + return; + } + + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) { + inst.SetArg(2, ApplySwizzle(ir, inst.Arg(2), buffer.DstSelect())); + } else if (inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32) { + const auto inst_info = inst.Flags(); + const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info); + const auto swizzled = ApplySwizzle(ir, texel, buffer.DstSelect()); + inst.ReplaceUsesWith(swizzled); + } +} + +void PatchImageInterpretation(IR::Block& block, IR::Inst& inst, Info& info) { + const auto binding = inst.Arg(0).U32(); + const auto image_res = info.images[binding & 0xFFFF]; + const auto image = image_res.GetSharp(info); + if (!image.Valid() || !image_res.IsStorage(image)) { + // Don't need to swizzle invalid or non-storage image. + return; + } + + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + if (inst.GetOpcode() == IR::Opcode::ImageWrite) { + inst.SetArg(4, ApplySwizzle(ir, inst.Arg(4), image.DstSelect())); + } else if (inst.GetOpcode() == IR::Opcode::ImageRead) { + const auto inst_info = inst.Flags(); + const auto lod = inst.Arg(2); + const auto ms = inst.Arg(3); + const auto texel = + ir.ImageRead(inst.Arg(0), inst.Arg(1), lod.IsEmpty() ? IR::U32{} : IR::U32{lod}, + ms.IsEmpty() ? IR::U32{} : IR::U32{ms}, inst_info); + const auto swizzled = ApplySwizzle(ir, texel, image.DstSelect()); + inst.ReplaceUsesWith(swizzled); + } +} + void PatchDataRingInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { // Insert gds binding in the shader if it doesn't exist already. @@ -852,6 +855,19 @@ void ResourceTrackingPass(IR::Program& program) { } } } + // Second pass to reinterpret format read/write where needed, since we now know + // the bindings and their properties. + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (IsTextureBufferInstruction(inst)) { + PatchTextureBufferInterpretation(*block, inst, info); + continue; + } + if (IsImageInstruction(inst)) { + PatchImageInterpretation(*block, inst, info); + } + } + } } } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/reinterpret.h b/src/shader_recompiler/ir/reinterpret.h new file mode 100644 index 000000000..73d587a56 --- /dev/null +++ b/src/shader_recompiler/ir/reinterpret.h @@ -0,0 +1,24 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "shader_recompiler/ir/ir_emitter.h" +#include "video_core/amdgpu/resource.h" + +namespace Shader::IR { + +/// Applies a component swizzle to a vec4. +inline Value ApplySwizzle(IREmitter& ir, const Value& vector, const AmdGpu::CompMapping& swizzle) { + // Constants are indexed as 0 and 1, and components are 4-7. Thus we can apply a swizzle + // using two vectors and a shuffle, using one vector of constants and one of the components. + const auto zero = ir.Imm32(0.f); + const auto one = ir.Imm32(1.f); + const auto constants_vec = ir.CompositeConstruct(zero, one, zero, zero); + const auto swizzled = + ir.CompositeShuffle(constants_vec, vector, size_t(swizzle.r), size_t(swizzle.g), + size_t(swizzle.b), size_t(swizzle.a)); + return swizzled; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index bbf74f5d3..781a0b14a 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -180,7 +180,7 @@ struct FragmentRuntimeInfo { std::array inputs; struct PsColorBuffer { AmdGpu::NumberFormat num_format; - MrtSwizzle mrt_swizzle; + AmdGpu::CompMapping swizzle; auto operator<=>(const PsColorBuffer&) const noexcept = default; }; diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index 5bf97ee51..f8a86c63b 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -31,7 +31,7 @@ struct BufferSpecialization { struct TextureBufferSpecialization { bool is_integer = false; - u32 dst_select = 0; + AmdGpu::CompMapping dst_select{}; auto operator<=>(const TextureBufferSpecialization&) const = default; }; @@ -40,13 +40,9 @@ struct ImageSpecialization { AmdGpu::ImageType type = AmdGpu::ImageType::Color2D; bool is_integer = false; bool is_storage = false; - u32 dst_select = 0; + AmdGpu::CompMapping dst_select{}; - bool operator==(const ImageSpecialization& other) const { - return type == other.type && is_integer == other.is_integer && - is_storage == other.is_storage && - (dst_select != 0 ? dst_select == other.dst_select : true); - } + auto operator<=>(const ImageSpecialization&) const = default; }; struct FMaskSpecialization { diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 83271a82d..f1607f03e 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -889,10 +889,54 @@ struct Liverpool { return !info.linear_general; } - NumberFormat NumFormat() const { + [[nodiscard]] DataFormat DataFormat() const { + return RemapDataFormat(info.format); + } + + [[nodiscard]] NumberFormat NumFormat() const { // There is a small difference between T# and CB number types, account for it. - return info.number_type == AmdGpu::NumberFormat::SnormNz ? AmdGpu::NumberFormat::Srgb - : info.number_type.Value(); + return RemapNumberFormat(info.number_type == NumberFormat::SnormNz + ? NumberFormat::Srgb + : info.number_type.Value()); + } + + [[nodiscard]] CompMapping Swizzle() const { + // clang-format off + static constexpr std::array, 4> mrt_swizzles{{ + // Standard + std::array{{ + {.r = CompSwizzle::Red, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Blue, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Blue, .a = CompSwizzle::Alpha}, + }}, + // Alternate + std::array{{ + {.r = CompSwizzle::Green, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Red, .g = CompSwizzle::Alpha, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Alpha, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Blue, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Alpha}, + }}, + // StandardReverse + std::array{{ + {.r = CompSwizzle::Blue, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Green, .g = CompSwizzle::Red, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Blue, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Alpha, .g = CompSwizzle::Blue, .b = CompSwizzle::Green, .a = CompSwizzle::Red}, + }}, + // AlternateReverse + std::array{{ + {.r = CompSwizzle::Alpha, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Alpha, .g = CompSwizzle::Red, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Alpha, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Alpha, .g = CompSwizzle::Red, .b = CompSwizzle::Green, .a = CompSwizzle::Blue}, + }}, + }}; + // clang-format on + const auto swap_idx = static_cast(info.comp_swap.Value()); + const auto components_idx = NumComponents(info.format) - 1; + const auto mrt_swizzle = mrt_swizzles[swap_idx][components_idx]; + return RemapComponents(info.format, mrt_swizzle); } }; diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index 6bbe1fb7e..4de25adbf 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -20,6 +20,85 @@ enum class CompSwizzle : u32 { Alpha = 7, }; +struct CompMapping { + CompSwizzle r : 3; + CompSwizzle g : 3; + CompSwizzle b : 3; + CompSwizzle a : 3; + + auto operator<=>(const CompMapping& other) const = default; + + template + [[nodiscard]] std::array Apply(const std::array& data) const { + return { + ApplySingle(data, r), + ApplySingle(data, g), + ApplySingle(data, b), + ApplySingle(data, a), + }; + } + +private: + template + T ApplySingle(const std::array& data, const CompSwizzle swizzle) const { + switch (swizzle) { + case CompSwizzle::Zero: + return T(0); + case CompSwizzle::One: + return T(1); + case CompSwizzle::Red: + return data[0]; + case CompSwizzle::Green: + return data[1]; + case CompSwizzle::Blue: + return data[2]; + case CompSwizzle::Alpha: + return data[3]; + default: + UNREACHABLE(); + } + } +}; + +inline DataFormat RemapDataFormat(const DataFormat format) { + switch (format) { + case DataFormat::Format11_11_10: + return DataFormat::Format10_11_11; + case DataFormat::Format10_10_10_2: + return DataFormat::Format2_10_10_10; + case DataFormat::Format5_5_5_1: + return DataFormat::Format1_5_5_5; + default: + return format; + } +} + +inline NumberFormat RemapNumberFormat(const NumberFormat format) { + return format; +} + +inline CompMapping RemapComponents(const DataFormat format, const CompMapping components) { + switch (format) { + case DataFormat::Format11_11_10: + return { + .r = components.b, + .g = components.g, + .b = components.r, + .a = components.a, + }; + case DataFormat::Format10_10_10_2: + case DataFormat::Format5_5_5_1: + return { + .r = components.a, + .g = components.b, + .b = components.g, + .a = components.r, + }; + default: + return components; + } +} + // Table 8.5 Buffer Resource Descriptor [Sea Islands Series Instruction Set Architecture] struct Buffer { u64 base_address : 44; @@ -52,21 +131,22 @@ struct Buffer { return std::memcmp(this, &other, sizeof(Buffer)) == 0; } - u32 DstSelect() const { - return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9); - } - - CompSwizzle GetSwizzle(u32 comp) const noexcept { - const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w}; - return static_cast(select[comp]); + CompMapping DstSelect() const { + const CompMapping dst_sel{ + .r = CompSwizzle(dst_sel_x), + .g = CompSwizzle(dst_sel_y), + .b = CompSwizzle(dst_sel_z), + .a = CompSwizzle(dst_sel_w), + }; + return RemapComponents(DataFormat(data_format), dst_sel); } NumberFormat GetNumberFmt() const noexcept { - return static_cast(num_format); + return RemapNumberFormat(NumberFormat(num_format)); } DataFormat GetDataFmt() const noexcept { - return static_cast(data_format); + return RemapDataFormat(DataFormat(data_format)); } u32 GetStride() const noexcept { @@ -186,10 +266,11 @@ struct Image { static constexpr Image Null() { Image image{}; image.data_format = u64(DataFormat::Format8_8_8_8); - image.dst_sel_x = 4; - image.dst_sel_y = 5; - image.dst_sel_z = 6; - image.dst_sel_w = 7; + image.num_format = u64(NumberFormat::Unorm); + image.dst_sel_x = u64(CompSwizzle::Red); + image.dst_sel_y = u64(CompSwizzle::Green); + image.dst_sel_z = u64(CompSwizzle::Blue); + image.dst_sel_w = u64(CompSwizzle::Alpha); image.tiling_index = u64(TilingMode::Texture_MicroTiled); image.type = u64(ImageType::Color2D); return image; @@ -207,43 +288,14 @@ struct Image { return base_address != 0; } - u32 DstSelect() const { - return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9); - } - - CompSwizzle GetSwizzle(u32 comp) const noexcept { - const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w}; - return static_cast(select[comp]); - } - - static char SelectComp(u32 sel) { - switch (sel) { - case 0: - return '0'; - case 1: - return '1'; - case 4: - return 'R'; - case 5: - return 'G'; - case 6: - return 'B'; - case 7: - return 'A'; - default: - UNREACHABLE(); - } - } - - std::string DstSelectName() const { - std::string result = "["; - u32 dst_sel = DstSelect(); - for (u32 i = 0; i < 4; i++) { - result += SelectComp(dst_sel & 7); - dst_sel >>= 3; - } - result += ']'; - return result; + CompMapping DstSelect() const { + const CompMapping dst_sel{ + .r = CompSwizzle(dst_sel_x), + .g = CompSwizzle(dst_sel_y), + .b = CompSwizzle(dst_sel_z), + .a = CompSwizzle(dst_sel_w), + }; + return RemapComponents(DataFormat(data_format), dst_sel); } u32 Pitch() const { @@ -285,11 +337,11 @@ struct Image { } DataFormat GetDataFmt() const noexcept { - return static_cast(data_format); + return RemapDataFormat(DataFormat(data_format)); } NumberFormat GetNumberFmt() const noexcept { - return static_cast(num_format); + return RemapNumberFormat(NumberFormat(num_format)); } TilingMode GetTilingMode() const { diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 6bd50ab06..c41b760ba 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -324,6 +324,34 @@ vk::BorderColor BorderColor(AmdGpu::BorderColor color) { } } +vk::ComponentSwizzle ComponentSwizzle(AmdGpu::CompSwizzle comp_swizzle) { + switch (comp_swizzle) { + case AmdGpu::CompSwizzle::Zero: + return vk::ComponentSwizzle::eZero; + case AmdGpu::CompSwizzle::One: + return vk::ComponentSwizzle::eOne; + case AmdGpu::CompSwizzle::Red: + return vk::ComponentSwizzle::eR; + case AmdGpu::CompSwizzle::Green: + return vk::ComponentSwizzle::eG; + case AmdGpu::CompSwizzle::Blue: + return vk::ComponentSwizzle::eB; + case AmdGpu::CompSwizzle::Alpha: + return vk::ComponentSwizzle::eA; + default: + UNREACHABLE(); + } +} + +vk::ComponentMapping ComponentMapping(AmdGpu::CompMapping comp_mapping) { + return vk::ComponentMapping{ + .r = ComponentSwizzle(comp_mapping.r), + .g = ComponentSwizzle(comp_mapping.g), + .b = ComponentSwizzle(comp_mapping.b), + .a = ComponentSwizzle(comp_mapping.a), + }; +} + static constexpr vk::FormatFeatureFlags2 BufferRead = vk::FormatFeatureFlagBits2::eUniformTexelBuffer | vk::FormatFeatureFlagBits2::eVertexBuffer; static constexpr vk::FormatFeatureFlags2 BufferWrite = @@ -538,10 +566,8 @@ std::span SurfaceFormats() { // 10_11_11 CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format10_11_11, AmdGpu::NumberFormat::Float, vk::Format::eB10G11R11UfloatPack32), - // 11_11_10 - CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format11_11_10, AmdGpu::NumberFormat::Float, - vk::Format::eB10G11R11UfloatPack32), - // 10_10_10_2 + // 11_11_10 - Remapped to 10_11_11. + // 10_10_10_2 - Remapped to 2_10_10_10. // 2_10_10_10 CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Unorm, vk::Format::eA2B10G10R10UnormPack32), @@ -614,7 +640,7 @@ std::span SurfaceFormats() { // 1_5_5_5 CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format1_5_5_5, AmdGpu::NumberFormat::Unorm, vk::Format::eR5G5B5A1UnormPack16), - // 5_5_5_1 + // 5_5_5_1 - Remapped to 1_5_5_5. // 4_4_4_4 CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format4_4_4_4, AmdGpu::NumberFormat::Unorm, vk::Format::eR4G4B4A4UnormPack16), @@ -677,31 +703,6 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu return format->vk_format; } -vk::Format AdjustColorBufferFormat(vk::Format base_format, - Liverpool::ColorBuffer::SwapMode comp_swap) { - const bool comp_swap_alt = comp_swap == Liverpool::ColorBuffer::SwapMode::Alternate; - const bool comp_swap_reverse = comp_swap == Liverpool::ColorBuffer::SwapMode::StandardReverse; - const bool comp_swap_alt_reverse = - comp_swap == Liverpool::ColorBuffer::SwapMode::AlternateReverse; - if (comp_swap_alt) { - switch (base_format) { - case vk::Format::eR8G8B8A8Unorm: - return vk::Format::eB8G8R8A8Unorm; - case vk::Format::eB8G8R8A8Unorm: - return vk::Format::eR8G8B8A8Unorm; - case vk::Format::eR8G8B8A8Srgb: - return vk::Format::eB8G8R8A8Srgb; - case vk::Format::eB8G8R8A8Srgb: - return vk::Format::eR8G8B8A8Srgb; - case vk::Format::eA2B10G10R10UnormPack32: - return vk::Format::eA2R10G10B10UnormPack32; - default: - break; - } - } - return base_format; -} - static constexpr DepthFormatInfo CreateDepthFormatInfo( const DepthBuffer::ZFormat z_format, const DepthBuffer::StencilFormat stencil_format, const vk::Format vk_format) { @@ -744,21 +745,12 @@ vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat } vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color_buffer) { - const auto comp_swap = color_buffer.info.comp_swap.Value(); - const auto format = color_buffer.info.format.Value(); - const auto number_type = color_buffer.info.number_type.Value(); + const auto comp_swizzle = color_buffer.Swizzle(); + const auto format = color_buffer.DataFormat(); + const auto number_type = color_buffer.NumFormat(); const auto& c0 = color_buffer.clear_word0; const auto& c1 = color_buffer.clear_word1; - const auto num_bits = AmdGpu::NumBits(color_buffer.info.format); - const auto num_components = AmdGpu::NumComponents(format); - - const bool comp_swap_alt = - comp_swap == AmdGpu::Liverpool::ColorBuffer::SwapMode::Alternate || - comp_swap == AmdGpu::Liverpool::ColorBuffer::SwapMode::AlternateReverse; - const bool comp_swap_reverse = - comp_swap == AmdGpu::Liverpool::ColorBuffer::SwapMode::StandardReverse || - comp_swap == AmdGpu::Liverpool::ColorBuffer::SwapMode::AlternateReverse; vk::ClearColorValue color{}; @@ -1079,26 +1071,7 @@ vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color break; } - if (num_components == 1) { - if (comp_swap != Liverpool::ColorBuffer::SwapMode::Standard) { - color.float32[static_cast(comp_swap)] = color.float32[0]; - color.float32[0] = 0.0f; - } - } else { - if (comp_swap_alt && num_components == 4) { - std::swap(color.float32[0], color.float32[2]); - } - - if (comp_swap_reverse) { - std::reverse(std::begin(color.float32), std::begin(color.float32) + num_components); - } - - if (comp_swap_alt && num_components != 4) { - color.float32[3] = color.float32[num_components - 1]; - color.float32[num_components - 1] = 0.0f; - } - } - + color.float32 = comp_swizzle.Apply(color.float32); return {.color = color}; } diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.h b/src/video_core/renderer_vulkan/liverpool_to_vk.h index 25a27e20e..a68280e7d 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.h +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h @@ -42,6 +42,10 @@ vk::SamplerMipmapMode MipFilter(AmdGpu::MipFilter filter); vk::BorderColor BorderColor(AmdGpu::BorderColor color); +vk::ComponentSwizzle ComponentSwizzle(AmdGpu::CompSwizzle comp_swizzle); + +vk::ComponentMapping ComponentMapping(AmdGpu::CompMapping comp_mapping); + struct SurfaceFormatInfo { AmdGpu::DataFormat data_format; AmdGpu::NumberFormat number_format; @@ -52,9 +56,6 @@ std::span SurfaceFormats(); vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format); -vk::Format AdjustColorBufferFormat(vk::Format base_format, - Liverpool::ColorBuffer::SwapMode comp_swap); - struct DepthFormatInfo { Liverpool::DepthBuffer::ZFormat z_format; Liverpool::DepthBuffer::StencilFormat stencil_format; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index ee8afa3e6..c8f4999b1 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -32,7 +32,7 @@ struct GraphicsPipelineKey { u32 num_color_attachments; std::array color_formats; std::array color_num_formats; - std::array mrt_swizzles; + std::array color_swizzles; vk::Format depth_format; vk::Format stencil_format; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index c880cad70..cd1b42b05 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -168,7 +168,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS for (u32 i = 0; i < Shader::MaxColorBuffers; i++) { info.fs_info.color_buffers[i] = { .num_format = graphics_key.color_num_formats[i], - .mrt_swizzle = static_cast(graphics_key.mrt_swizzles[i]), + .swizzle = graphics_key.color_swizzles[i], }; } break; @@ -304,7 +304,7 @@ bool PipelineCache::RefreshGraphicsKey() { key.color_num_formats.fill(AmdGpu::NumberFormat::Unorm); key.blend_controls.fill({}); key.write_masks.fill({}); - key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard); + key.color_swizzles.fill({}); key.vertex_buffer_formats.fill(vk::Format::eUndefined); key.patch_control_points = 0; @@ -327,14 +327,10 @@ bool PipelineCache::RefreshGraphicsKey() { continue; } - const auto base_format = - LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat()); key.color_formats[remapped_cb] = - LiverpoolToVK::AdjustColorBufferFormat(base_format, col_buf.info.comp_swap.Value()); + LiverpoolToVK::SurfaceFormat(col_buf.DataFormat(), col_buf.NumFormat()); key.color_num_formats[remapped_cb] = col_buf.NumFormat(); - if (base_format == key.color_formats[remapped_cb]) { - key.mrt_swizzles[remapped_cb] = col_buf.info.comp_swap.Value(); - } + key.color_swizzles[remapped_cb] = col_buf.Swizzle(); } fetch_shader = std::nullopt; @@ -450,7 +446,7 @@ bool PipelineCache::RefreshGraphicsKey() { // of the latter we need to change format to undefined, and either way we need to // increment the index for the null attachment binding. key.color_formats[remapped_cb] = vk::Format::eUndefined; - key.mrt_swizzles[remapped_cb] = Liverpool::ColorBuffer::SwapMode::Standard; + key.color_swizzles[remapped_cb] = {}; ++remapped_cb; continue; } diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 2cc4aab38..0559f1be3 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -265,9 +265,9 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, const AmdGpu::Liverpool::CbDbExtent& hint /*= {}*/) noexcept { props.is_tiled = buffer.IsTiled(); tiling_mode = buffer.GetTilingMode(); - pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat()); + pixel_format = LiverpoolToVK::SurfaceFormat(buffer.DataFormat(), buffer.NumFormat()); num_samples = buffer.NumSamples(); - num_bits = NumBits(buffer.info.format); + num_bits = NumBits(buffer.DataFormat()); type = vk::ImageType::e2D; size.width = hint.Valid() ? hint.width : buffer.Pitch(); size.height = hint.Valid() ? hint.height : buffer.Height(); diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 9e67b7f73..a9ae41dd1 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -31,25 +31,6 @@ vk::ImageViewType ConvertImageViewType(AmdGpu::ImageType type) { } } -vk::ComponentSwizzle ConvertComponentSwizzle(u32 dst_sel) { - switch (dst_sel) { - case 0: - return vk::ComponentSwizzle::eZero; - case 1: - return vk::ComponentSwizzle::eOne; - case 4: - return vk::ComponentSwizzle::eR; - case 5: - return vk::ComponentSwizzle::eG; - case 6: - return vk::ComponentSwizzle::eB; - case 7: - return vk::ComponentSwizzle::eA; - default: - UNREACHABLE(); - } -} - ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept : is_storage{desc.IsStorage(image)} { const auto dfmt = image.GetDataFmt(); @@ -87,21 +68,15 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageReso } if (!is_storage) { - mapping.r = ConvertComponentSwizzle(image.dst_sel_x); - mapping.g = ConvertComponentSwizzle(image.dst_sel_y); - mapping.b = ConvertComponentSwizzle(image.dst_sel_z); - mapping.a = ConvertComponentSwizzle(image.dst_sel_w); + mapping = Vulkan::LiverpoolToVK::ComponentMapping(image.DstSelect()); } } ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer) noexcept { - const auto base_format = - Vulkan::LiverpoolToVK::SurfaceFormat(col_buffer.info.format, col_buffer.NumFormat()); range.base.layer = col_buffer.view.slice_start; range.extent.layers = col_buffer.NumSlices() - range.base.layer; type = range.extent.layers > 1 ? vk::ImageViewType::e2DArray : vk::ImageViewType::e2D; - format = Vulkan::LiverpoolToVK::AdjustColorBufferFormat(base_format, - col_buffer.info.comp_swap.Value()); + format = Vulkan::LiverpoolToVK::SurfaceFormat(col_buffer.DataFormat(), col_buffer.NumFormat()); } ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer, From e1cf1f500da9237eccd4c4a4352332e7ca8326cf Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Tue, 31 Dec 2024 00:04:26 -0800 Subject: [PATCH 22/53] native_clock: Remove unused process code. (#1989) --- src/common/native_clock.cpp | 11 ----------- src/common/native_clock.h | 1 - 2 files changed, 12 deletions(-) diff --git a/src/common/native_clock.cpp b/src/common/native_clock.cpp index c3fa637aa..0c05dbe84 100644 --- a/src/common/native_clock.cpp +++ b/src/common/native_clock.cpp @@ -4,11 +4,6 @@ #include "common/native_clock.h" #include "common/rdtsc.h" #include "common/uint128.h" -#ifdef _WIN64 -#include -#else -#include -#endif namespace Common { @@ -34,10 +29,4 @@ u64 NativeClock::GetUptime() const { return FencedRDTSC(); } -u64 NativeClock::GetProcessTimeUS() const { - timespec ret; - clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ret); - return ret.tv_nsec / 1000 + ret.tv_sec * 1000000; -} - } // namespace Common diff --git a/src/common/native_clock.h b/src/common/native_clock.h index b5e389452..1542c2f3a 100644 --- a/src/common/native_clock.h +++ b/src/common/native_clock.h @@ -20,7 +20,6 @@ public: u64 GetTimeUS(u64 base_ptc = 0) const; u64 GetTimeMS(u64 base_ptc = 0) const; u64 GetUptime() const; - u64 GetProcessTimeUS() const; private: u64 rdtsc_frequency; From 052473e04882c871b1e9bc70769ef7db6045af7b Mon Sep 17 00:00:00 2001 From: Vladislav Mikhalin Date: Tue, 31 Dec 2024 12:02:33 +0300 Subject: [PATCH 23/53] infra: emphasize the contact information (#1990) --- .github/ISSUE_TEMPLATE/app-bug-report.yaml | 4 ++-- .github/ISSUE_TEMPLATE/game-bug-report.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/app-bug-report.yaml b/.github/ISSUE_TEMPLATE/app-bug-report.yaml index c38bbb814..cd540e06e 100644 --- a/.github/ISSUE_TEMPLATE/app-bug-report.yaml +++ b/.github/ISSUE_TEMPLATE/app-bug-report.yaml @@ -10,8 +10,8 @@ body: value: | ## Important: Read First - Please do not make support requests on GitHub. Our issue tracker is for tracking bugs and feature requests only - If you need help using the emulator or unsure about your issue please contact us on [discord](https://discord.gg/bFJxfftGW6). + **Please do not make support requests on GitHub. Our issue tracker is for tracking bugs and feature requests only. + If you have a support request or are unsure about the nature of your issue please contact us on [discord](https://discord.gg/bFJxfftGW6).** Please make an effort to make sure your issue isn't already reported. diff --git a/.github/ISSUE_TEMPLATE/game-bug-report.yaml b/.github/ISSUE_TEMPLATE/game-bug-report.yaml index 7eb9441d2..407ee2fe3 100644 --- a/.github/ISSUE_TEMPLATE/game-bug-report.yaml +++ b/.github/ISSUE_TEMPLATE/game-bug-report.yaml @@ -10,8 +10,8 @@ body: value: | ## Important: Read First - Please do not make support requests on GitHub. Our issue tracker is for tracking bugs and feature requests only - If you need help using the emulator or unsure about your issue please contact us on [discord](https://discord.gg/bFJxfftGW6). + **Please do not make support requests on GitHub. Our issue tracker is for tracking bugs and feature requests only. + If you have a support request or are unsure about the nature of your issue please contact us on [discord](https://discord.gg/bFJxfftGW6).** You can also check the [Game Compatibility Repository](https://github.com/shadps4-emu/shadps4-game-compatibility) for the information about the status of the game. From f41829707dfea64bf43e9f93d5737ee45fb2036d Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Tue, 31 Dec 2024 02:16:26 -0800 Subject: [PATCH 24/53] equeue: Fix regression from Filter type. (#1992) --- src/core/libraries/kernel/equeue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/libraries/kernel/equeue.h b/src/core/libraries/kernel/equeue.h index 17900238f..f8759137c 100644 --- a/src/core/libraries/kernel/equeue.h +++ b/src/core/libraries/kernel/equeue.h @@ -21,7 +21,7 @@ class EqueueInternal; struct EqueueEvent; struct SceKernelEvent { - enum Filter : int { + enum Filter : s16 { None = 0, Read = -1, Write = -2, From 927dc6d95c0b5b8ba0dc069cf655d8c58c230529 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Tue, 31 Dec 2024 02:38:30 -0800 Subject: [PATCH 25/53] vk_platform: Fix incorrect type for MVK debug flag. (#1993) --- src/video_core/renderer_vulkan/vk_platform.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp index ab61af6a4..7f0bcb5d2 100644 --- a/src/video_core/renderer_vulkan/vk_platform.cpp +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -283,6 +283,9 @@ vk::UniqueInstance CreateInstance(Frontend::WindowSystemType window_type, bool e Common::FS::GetUserPathString(Common::FS::PathType::LogDir); const char* log_path = crash_diagnostic_path.c_str(); vk::Bool32 enable_force_barriers = vk::True; +#ifdef __APPLE__ + const vk::Bool32 mvk_debug_mode = enable_crash_diagnostic ? vk::True : vk::False; +#endif const std::array layer_setings = { vk::LayerSettingEXT{ @@ -356,7 +359,7 @@ vk::UniqueInstance CreateInstance(Frontend::WindowSystemType window_type, bool e .pSettingName = "MVK_CONFIG_DEBUG", .type = vk::LayerSettingTypeEXT::eBool32, .valueCount = 1, - .pValues = &enable_crash_diagnostic, + .pValues = &mvk_debug_mode, } #endif }; From 48c51bd9eff0dc94e84d7b75afb98f8f02a28832 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Tue, 31 Dec 2024 02:38:52 -0800 Subject: [PATCH 26/53] audio: Accurate audio output timing. (#1986) * audio: Accurate audio output timing. * audio: Handle SDL audio queue stalls. * audio: Format info cleanup. --- .gitmodules | 4 - CMakeLists.txt | 5 +- LICENSES/ISC.txt | 7 - externals/CMakeLists.txt | 10 - externals/cubeb | 1 - src/common/config.cpp | 17 - src/common/config.h | 2 - src/common/ringbuffer.h | 374 -------------------- src/core/libraries/audio/audioout.cpp | 240 +++++-------- src/core/libraries/audio/audioout.h | 45 ++- src/core/libraries/audio/audioout_backend.h | 21 +- src/core/libraries/audio/cubeb_audio.cpp | 174 --------- src/core/libraries/audio/sdl_audio.cpp | 62 +++- src/qt_gui/settings_dialog.cpp | 6 - src/qt_gui/settings_dialog.ui | 23 -- 15 files changed, 170 insertions(+), 821 deletions(-) delete mode 100644 LICENSES/ISC.txt delete mode 160000 externals/cubeb delete mode 100644 src/common/ringbuffer.h delete mode 100644 src/core/libraries/audio/cubeb_audio.cpp diff --git a/.gitmodules b/.gitmodules index 1c05ba6f3..3d0d21c5b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -119,7 +119,3 @@ path = externals/MoltenVK/cereal url = https://github.com/USCiLab/cereal shallow = true -[submodule "externals/cubeb"] - path = externals/cubeb - url = https://github.com/mozilla/cubeb - shallow = true diff --git a/CMakeLists.txt b/CMakeLists.txt index 833bbe3ce..c0f675266 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -127,7 +127,6 @@ find_package(xxHash 0.8.2 MODULE) find_package(ZLIB 1.3 MODULE) find_package(Zydis 5.0.0 CONFIG) find_package(pugixml 1.14 CONFIG) -find_package(cubeb CONFIG) if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR NOT MSVC) find_package(cryptopp 8.9.0 MODULE) @@ -203,7 +202,6 @@ set(AUDIO_LIB src/core/libraries/audio/audioin.cpp src/core/libraries/audio/audioout.h src/core/libraries/audio/audioout_backend.h src/core/libraries/audio/audioout_error.h - src/core/libraries/audio/cubeb_audio.cpp src/core/libraries/audio/sdl_audio.cpp src/core/libraries/ngs2/ngs2.cpp src/core/libraries/ngs2/ngs2.h @@ -499,7 +497,6 @@ set(COMMON src/common/logging/backend.cpp src/common/polyfill_thread.h src/common/rdtsc.cpp src/common/rdtsc.h - src/common/ringbuffer.h src/common/signal_context.h src/common/signal_context.cpp src/common/singleton.h @@ -892,7 +889,7 @@ endif() create_target_directory_groups(shadps4) target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API FFmpeg::ffmpeg Dear_ImGui gcn half::half ZLIB::ZLIB PNG::PNG) -target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::glslang SDL3::SDL3 pugixml::pugixml stb::headers cubeb::cubeb) +target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::glslang SDL3::SDL3 pugixml::pugixml stb::headers) target_compile_definitions(shadps4 PRIVATE IMGUI_USER_CONFIG="imgui/imgui_config.h") target_compile_definitions(Dear_ImGui PRIVATE IMGUI_USER_CONFIG="${PROJECT_SOURCE_DIR}/src/imgui/imgui_config.h") diff --git a/LICENSES/ISC.txt b/LICENSES/ISC.txt deleted file mode 100644 index b9bcfa3a4..000000000 --- a/LICENSES/ISC.txt +++ /dev/null @@ -1,7 +0,0 @@ -ISC License - - - -Permission to use, copy, modify, and /or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 8bdf089f8..4350948b7 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -228,16 +228,6 @@ if (NOT TARGET stb::headers) add_library(stb::headers ALIAS stb) endif() -# cubeb -if (NOT TARGET cubeb::cubeb) - option(BUILD_TESTS "" OFF) - option(BUILD_TOOLS "" OFF) - option(BUNDLE_SPEEX "" ON) - option(USE_SANITIZERS "" OFF) - add_subdirectory(cubeb) - add_library(cubeb::cubeb ALIAS cubeb) -endif() - # Apple-only dependencies if (APPLE) # date diff --git a/externals/cubeb b/externals/cubeb deleted file mode 160000 index 9a9d034c5..000000000 --- a/externals/cubeb +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 9a9d034c51859a045a34f201334f612c51e6c19d diff --git a/src/common/config.cpp b/src/common/config.cpp index 93627d8c9..deef0fa88 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -67,7 +67,6 @@ static int cursorHideTimeout = 5; // 5 seconds (default) static bool separateupdatefolder = false; static bool compatibilityData = false; static bool checkCompatibilityOnStartup = false; -static std::string audioBackend = "cubeb"; // Gui std::vector settings_install_dirs = {}; @@ -240,10 +239,6 @@ bool getCheckCompatibilityOnStartup() { return checkCompatibilityOnStartup; } -std::string getAudioBackend() { - return audioBackend; -} - void setGpuId(s32 selectedGpuId) { gpuId = selectedGpuId; } @@ -376,10 +371,6 @@ void setCheckCompatibilityOnStartup(bool use) { checkCompatibilityOnStartup = use; } -void setAudioBackend(std::string backend) { - audioBackend = backend; -} - void setMainWindowGeometry(u32 x, u32 y, u32 w, u32 h) { main_window_geometry_x = x; main_window_geometry_y = y; @@ -620,12 +611,6 @@ void load(const std::filesystem::path& path) { vkCrashDiagnostic = toml::find_or(vk, "crashDiagnostic", false); } - if (data.contains("Audio")) { - const toml::value& audio = data.at("Audio"); - - audioBackend = toml::find_or(audio, "backend", "cubeb"); - } - if (data.contains("Debug")) { const toml::value& debug = data.at("Debug"); @@ -724,7 +709,6 @@ void save(const std::filesystem::path& path) { data["Vulkan"]["rdocEnable"] = rdocEnable; data["Vulkan"]["rdocMarkersEnable"] = vkMarkers; data["Vulkan"]["crashDiagnostic"] = vkCrashDiagnostic; - data["Audio"]["backend"] = audioBackend; data["Debug"]["DebugDump"] = isDebugDump; data["Debug"]["CollectShader"] = isShaderDebug; @@ -828,7 +812,6 @@ void setDefaultValues() { separateupdatefolder = false; compatibilityData = false; checkCompatibilityOnStartup = false; - audioBackend = "cubeb"; } } // namespace Config diff --git a/src/common/config.h b/src/common/config.h index 43ef5024b..701aadb12 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -24,7 +24,6 @@ bool getEnableDiscordRPC(); bool getSeparateUpdateEnabled(); bool getCompatibilityEnabled(); bool getCheckCompatibilityOnStartup(); -std::string getAudioBackend(); std::string getLogFilter(); std::string getLogType(); @@ -76,7 +75,6 @@ void setSeparateUpdateEnabled(bool use); void setGameInstallDirs(const std::vector& settings_install_dirs_config); void setCompatibilityEnabled(bool use); void setCheckCompatibilityOnStartup(bool use); -void setAudioBackend(std::string backend); void setCursorState(s16 cursorState); void setCursorHideTimeout(int newcursorHideTimeout); diff --git a/src/common/ringbuffer.h b/src/common/ringbuffer.h deleted file mode 100644 index 6a71c2888..000000000 --- a/src/common/ringbuffer.h +++ /dev/null @@ -1,374 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2016 Mozilla Foundation -// SPDX-License-Identifier: ISC - -#pragma once - -#include -#include -#include -#include -#include -#include "common/assert.h" - -/** - * Single producer single consumer lock-free and wait-free ring buffer. - * - * This data structure allows producing data from one thread, and consuming it - * on another thread, safely and without explicit synchronization. If used on - * two threads, this data structure uses atomics for thread safety. It is - * possible to disable the use of atomics at compile time and only use this data - * structure on one thread. - * - * The role for the producer and the consumer must be constant, i.e., the - * producer should always be on one thread and the consumer should always be on - * another thread. - * - * Some words about the inner workings of this class: - * - Capacity is fixed. Only one allocation is performed, in the constructor. - * When reading and writing, the return value of the method allows checking if - * the ring buffer is empty or full. - * - We always keep the read index at least one element ahead of the write - * index, so we can distinguish between an empty and a full ring buffer: an - * empty ring buffer is when the write index is at the same position as the - * read index. A full buffer is when the write index is exactly one position - * before the read index. - * - We synchronize updates to the read index after having read the data, and - * the write index after having written the data. This means that the each - * thread can only touch a portion of the buffer that is not touched by the - * other thread. - * - Callers are expected to provide buffers. When writing to the queue, - * elements are copied into the internal storage from the buffer passed in. - * When reading from the queue, the user is expected to provide a buffer. - * Because this is a ring buffer, data might not be contiguous in memory, - * providing an external buffer to copy into is an easy way to have linear - * data for further processing. - */ -template -class RingBuffer { -public: - /** - * Constructor for a ring buffer. - * - * This performs an allocation, but is the only allocation that will happen - * for the life time of a `RingBuffer`. - * - * @param capacity The maximum number of element this ring buffer will hold. - */ - RingBuffer(int capacity) - /* One more element to distinguish from empty and full buffer. */ - : capacity_(capacity + 1) { - ASSERT(storage_capacity() < std::numeric_limits::max() / 2 && - "buffer too large for the type of index used."); - ASSERT(capacity_ > 0); - - data_.reset(new T[storage_capacity()]); - /* If this queue is using atomics, initializing those members as the last - * action in the constructor acts as a full barrier, and allow capacity() to - * be thread-safe. */ - write_index_ = 0; - read_index_ = 0; - } - /** - * Push `count` zero or default constructed elements in the array. - * - * Only safely called on the producer thread. - * - * @param count The number of elements to enqueue. - * @return The number of element enqueued. - */ - int enqueue_default(int count) { - return enqueue(nullptr, count); - } - /** - * @brief Put an element in the queue - * - * Only safely called on the producer thread. - * - * @param element The element to put in the queue. - * - * @return 1 if the element was inserted, 0 otherwise. - */ - int enqueue(T& element) { - return enqueue(&element, 1); - } - /** - * Push `count` elements in the ring buffer. - * - * Only safely called on the producer thread. - * - * @param elements a pointer to a buffer containing at least `count` elements. - * If `elements` is nullptr, zero or default constructed elements are - * enqueued. - * @param count The number of elements to read from `elements` - * @return The number of elements successfully coped from `elements` and - * inserted into the ring buffer. - */ - int enqueue(T* elements, int count) { -#ifndef NDEBUG - assert_correct_thread(producer_id); -#endif - - int wr_idx = write_index_.load(std::memory_order_relaxed); - int rd_idx = read_index_.load(std::memory_order_acquire); - - if (full_internal(rd_idx, wr_idx)) { - return 0; - } - - int to_write = std::min(available_write_internal(rd_idx, wr_idx), count); - - /* First part, from the write index to the end of the array. */ - int first_part = std::min(storage_capacity() - wr_idx, to_write); - /* Second part, from the beginning of the array */ - int second_part = to_write - first_part; - - if (elements) { - Copy(data_.get() + wr_idx, elements, first_part); - Copy(data_.get(), elements + first_part, second_part); - } else { - ConstructDefault(data_.get() + wr_idx, first_part); - ConstructDefault(data_.get(), second_part); - } - - write_index_.store(increment_index(wr_idx, to_write), std::memory_order_release); - - return to_write; - } - /** - * Retrieve at most `count` elements from the ring buffer, and copy them to - * `elements`, if non-null. - * - * Only safely called on the consumer side. - * - * @param elements A pointer to a buffer with space for at least `count` - * elements. If `elements` is `nullptr`, `count` element will be discarded. - * @param count The maximum number of elements to dequeue. - * @return The number of elements written to `elements`. - */ - int dequeue(T* elements, int count) { -#ifndef NDEBUG - assert_correct_thread(consumer_id); -#endif - - int rd_idx = read_index_.load(std::memory_order_relaxed); - int wr_idx = write_index_.load(std::memory_order_acquire); - - if (empty_internal(rd_idx, wr_idx)) { - return 0; - } - - int to_read = std::min(available_read_internal(rd_idx, wr_idx), count); - - int first_part = std::min(storage_capacity() - rd_idx, to_read); - int second_part = to_read - first_part; - - if (elements) { - Copy(elements, data_.get() + rd_idx, first_part); - Copy(elements + first_part, data_.get(), second_part); - } - - read_index_.store(increment_index(rd_idx, to_read), std::memory_order_release); - - return to_read; - } - /** - * Get the number of available element for consuming. - * - * Only safely called on the consumer thread. - * - * @return The number of available elements for reading. - */ - int available_read() const { -#ifndef NDEBUG - assert_correct_thread(consumer_id); -#endif - return available_read_internal(read_index_.load(std::memory_order_relaxed), - write_index_.load(std::memory_order_acquire)); - } - /** - * Get the number of available elements for consuming. - * - * Only safely called on the producer thread. - * - * @return The number of empty slots in the buffer, available for writing. - */ - int available_write() const { -#ifndef NDEBUG - assert_correct_thread(producer_id); -#endif - return available_write_internal(read_index_.load(std::memory_order_acquire), - write_index_.load(std::memory_order_relaxed)); - } - /** - * Get the total capacity, for this ring buffer. - * - * Can be called safely on any thread. - * - * @return The maximum capacity of this ring buffer. - */ - int capacity() const { - return storage_capacity() - 1; - } - /** - * Reset the consumer and producer thread identifier, in case the thread are - * being changed. This has to be externally synchronized. This is no-op when - * asserts are disabled. - */ - void reset_thread_ids() { -#ifndef NDEBUG - consumer_id = producer_id = std::thread::id(); -#endif - } - -private: - /** Return true if the ring buffer is empty. - * - * @param read_index the read index to consider - * @param write_index the write index to consider - * @return true if the ring buffer is empty, false otherwise. - **/ - bool empty_internal(int read_index, int write_index) const { - return write_index == read_index; - } - /** Return true if the ring buffer is full. - * - * This happens if the write index is exactly one element behind the read - * index. - * - * @param read_index the read index to consider - * @param write_index the write index to consider - * @return true if the ring buffer is full, false otherwise. - **/ - bool full_internal(int read_index, int write_index) const { - return (write_index + 1) % storage_capacity() == read_index; - } - /** - * Return the size of the storage. It is one more than the number of elements - * that can be stored in the buffer. - * - * @return the number of elements that can be stored in the buffer. - */ - int storage_capacity() const { - return capacity_; - } - /** - * Returns the number of elements available for reading. - * - * @return the number of available elements for reading. - */ - int available_read_internal(int read_index, int write_index) const { - if (write_index >= read_index) { - return write_index - read_index; - } else { - return write_index + storage_capacity() - read_index; - } - } - /** - * Returns the number of empty elements, available for writing. - * - * @return the number of elements that can be written into the array. - */ - int available_write_internal(int read_index, int write_index) const { - /* We substract one element here to always keep at least one sample - * free in the buffer, to distinguish between full and empty array. */ - int rv = read_index - write_index - 1; - if (write_index >= read_index) { - rv += storage_capacity(); - } - return rv; - } - /** - * Increments an index, wrapping it around the storage. - * - * @param index a reference to the index to increment. - * @param increment the number by which `index` is incremented. - * @return the new index. - */ - int increment_index(int index, int increment) const { - ASSERT(increment >= 0); - return (index + increment) % storage_capacity(); - } - /** - * @brief This allows checking that enqueue (resp. dequeue) are always called - * by the right thread. - * - * @param id the id of the thread that has called the calling method first. - */ -#ifndef NDEBUG - static void assert_correct_thread(std::thread::id& id) { - if (id == std::thread::id()) { - id = std::this_thread::get_id(); - return; - } - ASSERT(id == std::this_thread::get_id()); - } -#endif - /** Similar to memcpy, but accounts for the size of an element. */ - template - void PodCopy(CopyT* destination, const CopyT* source, size_t count) { - static_assert(std::is_trivial::value, "Requires trivial type"); - ASSERT(destination && source); - memcpy(destination, source, count * sizeof(CopyT)); - } - /** Similar to a memset to zero, but accounts for the size of an element. */ - template - void PodZero(ZeroT* destination, size_t count) { - static_assert(std::is_trivial::value, "Requires trivial type"); - ASSERT(destination); - memset(destination, 0, count * sizeof(ZeroT)); - } - template - void Copy(CopyT* destination, const CopyT* source, size_t count, Trait) { - for (size_t i = 0; i < count; i++) { - destination[i] = source[i]; - } - } - template - void Copy(CopyT* destination, const CopyT* source, size_t count, std::true_type) { - PodCopy(destination, source, count); - } - /** - * This allows copying a number of elements from a `source` pointer to a - * `destination` pointer, using `memcpy` if it is safe to do so, or a loop that - * calls the constructors and destructors otherwise. - */ - template - void Copy(CopyT* destination, const T* source, size_t count) { - ASSERT(destination && source); - Copy(destination, source, count, typename std::is_trivial::type()); - } - template - void ConstructDefault(ConstructT* destination, size_t count, Trait) { - for (size_t i = 0; i < count; i++) { - destination[i] = ConstructT(); - } - } - template - void ConstructDefault(ConstructT* destination, size_t count, std::true_type) { - PodZero(destination, count); - } - /** - * This allows zeroing (using memset) or default-constructing a number of - * elements calling the constructors and destructors if necessary. - */ - template - void ConstructDefault(ConstructT* destination, size_t count) { - ASSERT(destination); - ConstructDefault(destination, count, typename std::is_arithmetic::type()); - } - /** Index at which the oldest element is at, in samples. */ - std::atomic read_index_; - /** Index at which to write new elements. `write_index` is always at - * least one element ahead of `read_index_`. */ - std::atomic write_index_; - /** Maximum number of elements that can be stored in the ring buffer. */ - const int capacity_; - /** Data storage */ - std::unique_ptr data_; -#ifndef NDEBUG - /** The id of the only thread that is allowed to read from the queue. */ - mutable std::thread::id consumer_id; - /** The id of the only thread that is allowed to write from the queue. */ - mutable std::thread::id producer_id; -#endif -}; diff --git a/src/core/libraries/audio/audioout.cpp b/src/core/libraries/audio/audioout.cpp index 89ea1d3f5..d69454c39 100644 --- a/src/core/libraries/audio/audioout.cpp +++ b/src/core/libraries/audio/audioout.cpp @@ -9,6 +9,8 @@ #include "common/assert.h" #include "common/config.h" #include "common/logging/log.h" +#include "common/polyfill_thread.h" +#include "common/thread.h" #include "core/libraries/audio/audioout.h" #include "core/libraries/audio/audioout_backend.h" #include "core/libraries/audio/audioout_error.h" @@ -21,111 +23,28 @@ std::array ports_out{}; static std::unique_ptr audio; -static std::string_view GetAudioOutPort(OrbisAudioOutPort port) { - switch (port) { - case OrbisAudioOutPort::Main: - return "MAIN"; - case OrbisAudioOutPort::Bgm: - return "BGM"; - case OrbisAudioOutPort::Voice: - return "VOICE"; - case OrbisAudioOutPort::Personal: - return "PERSONAL"; - case OrbisAudioOutPort::Padspk: - return "PADSPK"; - case OrbisAudioOutPort::Aux: - return "AUX"; - default: - return "INVALID"; - } -} - -static std::string_view GetAudioOutParamFormat(OrbisAudioOutParamFormat param) { - switch (param) { - case OrbisAudioOutParamFormat::S16Mono: - return "S16_MONO"; - case OrbisAudioOutParamFormat::S16Stereo: - return "S16_STEREO"; - case OrbisAudioOutParamFormat::S16_8CH: - return "S16_8CH"; - case OrbisAudioOutParamFormat::FloatMono: - return "FLOAT_MONO"; - case OrbisAudioOutParamFormat::FloatStereo: - return "FLOAT_STEREO"; - case OrbisAudioOutParamFormat::Float_8CH: - return "FLOAT_8CH"; - case OrbisAudioOutParamFormat::S16_8CH_Std: - return "S16_8CH_STD"; - case OrbisAudioOutParamFormat::Float_8CH_Std: - return "FLOAT_8CH_STD"; - default: - return "INVALID"; - } -} - -static std::string_view GetAudioOutParamAttr(OrbisAudioOutParamAttr attr) { - switch (attr) { - case OrbisAudioOutParamAttr::None: - return "NONE"; - case OrbisAudioOutParamAttr::Restricted: - return "RESTRICTED"; - case OrbisAudioOutParamAttr::MixToMain: - return "MIX_TO_MAIN"; - default: - return "INVALID"; - } -} - -static bool IsFormatFloat(const OrbisAudioOutParamFormat format) { - switch (format) { - case OrbisAudioOutParamFormat::S16Mono: - case OrbisAudioOutParamFormat::S16Stereo: - case OrbisAudioOutParamFormat::S16_8CH: - case OrbisAudioOutParamFormat::S16_8CH_Std: - return false; - case OrbisAudioOutParamFormat::FloatMono: - case OrbisAudioOutParamFormat::FloatStereo: - case OrbisAudioOutParamFormat::Float_8CH: - case OrbisAudioOutParamFormat::Float_8CH_Std: - return true; - default: - UNREACHABLE_MSG("Unknown format"); - } -} - -static u8 GetFormatNumChannels(const OrbisAudioOutParamFormat format) { - switch (format) { - case OrbisAudioOutParamFormat::S16Mono: - case OrbisAudioOutParamFormat::FloatMono: - return 1; - case OrbisAudioOutParamFormat::S16Stereo: - case OrbisAudioOutParamFormat::FloatStereo: - return 2; - case OrbisAudioOutParamFormat::S16_8CH: - case OrbisAudioOutParamFormat::Float_8CH: - case OrbisAudioOutParamFormat::S16_8CH_Std: - case OrbisAudioOutParamFormat::Float_8CH_Std: - return 8; - default: - UNREACHABLE_MSG("Unknown format"); - } -} - -static u8 GetFormatSampleSize(const OrbisAudioOutParamFormat format) { - switch (format) { - case OrbisAudioOutParamFormat::S16Mono: - case OrbisAudioOutParamFormat::S16Stereo: - case OrbisAudioOutParamFormat::S16_8CH: - case OrbisAudioOutParamFormat::S16_8CH_Std: - return 2; - case OrbisAudioOutParamFormat::FloatMono: - case OrbisAudioOutParamFormat::FloatStereo: - case OrbisAudioOutParamFormat::Float_8CH: - case OrbisAudioOutParamFormat::Float_8CH_Std: - return 4; - default: - UNREACHABLE_MSG("Unknown format"); - } +static AudioFormatInfo GetFormatInfo(const OrbisAudioOutParamFormat format) { + static constexpr std::array format_infos = {{ + // S16Mono + {false, 2, 1, {0}}, + // S16Stereo + {false, 2, 2, {0, 1}}, + // S16_8CH + {false, 2, 8, {0, 1, 2, 3, 4, 5, 6, 7}}, + // FloatMono + {true, 4, 1, {0}}, + // FloatStereo + {true, 4, 2, {0, 1}}, + // Float_8CH + {true, 4, 8, {0, 1, 2, 3, 4, 5, 6, 7}}, + // S16_8CH_Std + {false, 2, 8, {0, 1, 2, 3, 6, 7, 4, 5}}, + // Float_8CH_Std + {true, 4, 8, {0, 1, 2, 3, 6, 7, 4, 5}}, + }}; + const auto index = static_cast(format); + ASSERT_MSG(index < format_infos.size(), "Unknown audio format {}", index); + return format_infos[index]; } int PS4_SYSV_ABI sceAudioOutDeviceIdOpen() { @@ -180,6 +99,10 @@ int PS4_SYSV_ABI sceAudioOutClose(s32 handle) { return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; } + port.output_thread.Stop(); + std::free(port.output_buffer); + port.output_buffer = nullptr; + port.output_ready = false; port.impl = nullptr; return ORBIS_OK; } @@ -263,7 +186,7 @@ int PS4_SYSV_ABI sceAudioOutGetPortState(s32 handle, OrbisAudioOutPortState* sta case OrbisAudioOutPort::Bgm: case OrbisAudioOutPort::Voice: state->output = 1; - state->channel = port.channels_num > 2 ? 2 : port.channels_num; + state->channel = port.format_info.num_channels > 2 ? 2 : port.format_info.num_channels; break; case OrbisAudioOutPort::Personal: case OrbisAudioOutPort::Padspk: @@ -311,16 +234,7 @@ int PS4_SYSV_ABI sceAudioOutInit() { if (audio != nullptr) { return ORBIS_AUDIO_OUT_ERROR_ALREADY_INIT; } - const auto backend = Config::getAudioBackend(); - if (backend == "cubeb") { - audio = std::make_unique(); - } else if (backend == "sdl") { - audio = std::make_unique(); - } else { - // Cubeb as a default fallback. - LOG_ERROR(Lib_AudioOut, "Invalid audio backend '{}', defaulting to cubeb.", backend); - audio = std::make_unique(); - } + audio = std::make_unique(); return ORBIS_OK; } @@ -354,6 +268,30 @@ int PS4_SYSV_ABI sceAudioOutMbusInit() { return ORBIS_OK; } +static void AudioOutputThread(PortOut* port, const std::stop_token& stop) { + { + const auto thread_name = fmt::format("shadPS4:AudioOutputThread:{}", fmt::ptr(port)); + Common::SetCurrentThreadName(thread_name.c_str()); + } + + Common::AccurateTimer timer( + std::chrono::nanoseconds(1000000000ULL * port->buffer_frames / port->sample_rate)); + while (true) { + timer.Start(); + { + std::unique_lock lock{port->output_mutex}; + Common::CondvarWait(port->output_cv, lock, stop, [&] { return port->output_ready; }); + if (stop.stop_requested()) { + break; + } + port->impl->Output(port->output_buffer); + port->output_ready = false; + } + port->output_cv.notify_one(); + timer.End(); + } +} + s32 PS4_SYSV_ABI sceAudioOutOpen(UserService::OrbisUserServiceUserId user_id, OrbisAudioOutPort port_type, s32 index, u32 length, u32 sample_rate, @@ -361,9 +299,9 @@ s32 PS4_SYSV_ABI sceAudioOutOpen(UserService::OrbisUserServiceUserId user_id, LOG_INFO(Lib_AudioOut, "id = {} port_type = {} index = {} length = {} sample_rate = {} " "param_type = {} attr = {}", - user_id, GetAudioOutPort(port_type), index, length, sample_rate, - GetAudioOutParamFormat(param_type.data_format), - GetAudioOutParamAttr(param_type.attributes)); + user_id, magic_enum::enum_name(port_type), index, length, sample_rate, + magic_enum::enum_name(param_type.data_format.Value()), + magic_enum::enum_name(param_type.attributes.Value())); if ((port_type < OrbisAudioOutPort::Main || port_type > OrbisAudioOutPort::Padspk) && (port_type != OrbisAudioOutPort::Aux)) { LOG_ERROR(Lib_AudioOut, "Invalid port type"); @@ -403,17 +341,18 @@ s32 PS4_SYSV_ABI sceAudioOutOpen(UserService::OrbisUserServiceUserId user_id, } port->type = port_type; - port->format = format; - port->is_float = IsFormatFloat(format); - port->sample_size = GetFormatSampleSize(format); - port->channels_num = GetFormatNumChannels(format); - port->samples_num = length; - port->frame_size = port->sample_size * port->channels_num; - port->buffer_size = port->frame_size * port->samples_num; - port->freq = sample_rate; + port->format_info = GetFormatInfo(format); + port->sample_rate = sample_rate; + port->buffer_frames = length; port->volume.fill(SCE_AUDIO_OUT_VOLUME_0DB); + port->impl = audio->Open(*port); + port->output_buffer = std::malloc(port->BufferSize()); + port->output_ready = false; + port->output_thread.Run( + [port](const std::stop_token& stop) { AudioOutputThread(&*port, stop); }); + return std::distance(ports_out.begin(), port) + 1; } @@ -426,24 +365,30 @@ s32 PS4_SYSV_ABI sceAudioOutOutput(s32 handle, void* ptr) { if (handle < 1 || handle > SCE_AUDIO_OUT_NUM_PORTS) { return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; } - if (ptr == nullptr) { - // Nothing to output - return ORBIS_OK; - } auto& port = ports_out.at(handle - 1); if (!port.impl) { return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; } - port.impl->Output(ptr, port.buffer_size); + { + std::unique_lock lock{port.output_mutex}; + port.output_cv.wait(lock, [&] { return !port.output_ready; }); + if (ptr != nullptr) { + std::memcpy(port.output_buffer, ptr, port.BufferSize()); + port.output_ready = true; + } + } + port.output_cv.notify_one(); return ORBIS_OK; } int PS4_SYSV_ABI sceAudioOutOutputs(OrbisAudioOutOutputParam* param, u32 num) { for (u32 i = 0; i < num; i++) { - if (const auto err = sceAudioOutOutput(param[i].handle, param[i].ptr); err != 0) - return err; + const auto [handle, ptr] = param[i]; + if (const auto ret = sceAudioOutOutput(handle, ptr); ret != ORBIS_OK) { + return ret; + } } return ORBIS_OK; } @@ -549,30 +494,9 @@ s32 PS4_SYSV_ABI sceAudioOutSetVolume(s32 handle, s32 flag, s32* vol) { return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; } - for (int i = 0; i < port.channels_num; i++, flag >>= 1u) { - auto bit = flag & 0x1u; - if (bit == 1) { - int src_index = i; - if (port.format == OrbisAudioOutParamFormat::Float_8CH_Std || - port.format == OrbisAudioOutParamFormat::S16_8CH_Std) { - switch (i) { - case 4: - src_index = 6; - break; - case 5: - src_index = 7; - break; - case 6: - src_index = 4; - break; - case 7: - src_index = 5; - break; - default: - break; - } - } - port.volume[i] = vol[src_index]; + for (int i = 0; i < port.format_info.num_channels; i++, flag >>= 1u) { + if (flag & 0x1u) { + port.volume[i] = vol[i]; } } diff --git a/src/core/libraries/audio/audioout.h b/src/core/libraries/audio/audioout.h index 58c77db99..4f7378dcd 100644 --- a/src/core/libraries/audio/audioout.h +++ b/src/core/libraries/audio/audioout.h @@ -6,6 +6,7 @@ #include #include "common/bit_field.h" +#include "core/libraries/kernel/threads.h" #include "core/libraries/system/userservice.h" namespace Libraries::AudioOut { @@ -14,12 +15,12 @@ class PortBackend; // Main up to 8 ports, BGM 1 port, voice up to 4 ports, // personal up to 4 ports, padspk up to 5 ports, aux 1 port -constexpr int SCE_AUDIO_OUT_NUM_PORTS = 22; -constexpr int SCE_AUDIO_OUT_VOLUME_0DB = 32768; // max volume value +constexpr s32 SCE_AUDIO_OUT_NUM_PORTS = 22; +constexpr s32 SCE_AUDIO_OUT_VOLUME_0DB = 32768; // max volume value enum class OrbisAudioOutPort { Main = 0, Bgm = 1, Voice = 2, Personal = 3, Padspk = 4, Aux = 127 }; -enum class OrbisAudioOutParamFormat { +enum class OrbisAudioOutParamFormat : u32 { S16Mono = 0, S16Stereo = 1, S16_8CH = 2, @@ -30,7 +31,7 @@ enum class OrbisAudioOutParamFormat { Float_8CH_Std = 7 }; -enum class OrbisAudioOutParamAttr { +enum class OrbisAudioOutParamAttr : u32 { None = 0, Restricted = 1, MixToMain = 2, @@ -59,19 +60,37 @@ struct OrbisAudioOutPortState { u64 reserved64[2]; }; +struct AudioFormatInfo { + bool is_float; + u8 sample_size; + u8 num_channels; + /// Layout array remapping channel indices, specified in this order: + /// FL, FR, FC, LFE, BL, BR, SL, SR + std::array channel_layout; + + [[nodiscard]] u16 FrameSize() const { + return sample_size * num_channels; + } +}; + struct PortOut { std::unique_ptr impl{}; + void* output_buffer; + std::mutex output_mutex; + std::condition_variable_any output_cv; + bool output_ready; + Kernel::Thread output_thread{}; + OrbisAudioOutPort type; - OrbisAudioOutParamFormat format; - bool is_float; - u8 sample_size; - u8 channels_num; - u32 samples_num; - u32 frame_size; - u32 buffer_size; - u32 freq; - std::array volume; + AudioFormatInfo format_info; + u32 sample_rate; + u32 buffer_frames; + std::array volume; + + [[nodiscard]] u32 BufferSize() const { + return buffer_frames * format_info.FrameSize(); + } }; int PS4_SYSV_ABI sceAudioOutDeviceIdOpen(); diff --git a/src/core/libraries/audio/audioout_backend.h b/src/core/libraries/audio/audioout_backend.h index f423d4963..0f36f19c8 100644 --- a/src/core/libraries/audio/audioout_backend.h +++ b/src/core/libraries/audio/audioout_backend.h @@ -3,8 +3,6 @@ #pragma once -typedef struct cubeb cubeb; - namespace Libraries::AudioOut { struct PortOut; @@ -13,7 +11,10 @@ class PortBackend { public: virtual ~PortBackend() = default; - virtual void Output(void* ptr, size_t size) = 0; + /// Guaranteed to be called in intervals of at least port buffer time, + /// with size equal to port buffer size. + virtual void Output(void* ptr) = 0; + virtual void SetVolume(const std::array& ch_volumes) = 0; }; @@ -25,20 +26,6 @@ public: virtual std::unique_ptr Open(PortOut& port) = 0; }; -class CubebAudioOut final : public AudioOutBackend { -public: - CubebAudioOut(); - ~CubebAudioOut() override; - - std::unique_ptr Open(PortOut& port) override; - -private: - cubeb* ctx = nullptr; -#ifdef _WIN32 - bool owns_com = false; -#endif -}; - class SDLAudioOut final : public AudioOutBackend { public: std::unique_ptr Open(PortOut& port) override; diff --git a/src/core/libraries/audio/cubeb_audio.cpp b/src/core/libraries/audio/cubeb_audio.cpp deleted file mode 100644 index 4127931b7..000000000 --- a/src/core/libraries/audio/cubeb_audio.cpp +++ /dev/null @@ -1,174 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include -#include -#include - -#include "common/logging/log.h" -#include "common/ringbuffer.h" -#include "core/libraries/audio/audioout.h" -#include "core/libraries/audio/audioout_backend.h" - -#ifdef _WIN32 -#include -#endif - -namespace Libraries::AudioOut { - -class CubebPortBackend : public PortBackend { -public: - CubebPortBackend(cubeb* ctx, const PortOut& port) - : frame_size(port.frame_size), buffer(static_cast(port.buffer_size) * 4) { - if (!ctx) { - return; - } - const auto get_channel_layout = [&port] -> cubeb_channel_layout { - switch (port.channels_num) { - case 1: - return CUBEB_LAYOUT_MONO; - case 2: - return CUBEB_LAYOUT_STEREO; - case 8: - return CUBEB_LAYOUT_3F4_LFE; - default: - UNREACHABLE(); - } - }; - cubeb_stream_params stream_params = { - .format = port.is_float ? CUBEB_SAMPLE_FLOAT32LE : CUBEB_SAMPLE_S16LE, - .rate = port.freq, - .channels = port.channels_num, - .layout = get_channel_layout(), - .prefs = CUBEB_STREAM_PREF_NONE, - }; - u32 latency_frames = 512; - if (const auto ret = cubeb_get_min_latency(ctx, &stream_params, &latency_frames); - ret != CUBEB_OK) { - LOG_WARNING(Lib_AudioOut, - "Could not get minimum cubeb audio latency, falling back to default: {}", - ret); - } - char stream_name[64]; - snprintf(stream_name, sizeof(stream_name), "shadPS4 stream %p", this); - if (const auto ret = cubeb_stream_init(ctx, &stream, stream_name, nullptr, nullptr, nullptr, - &stream_params, latency_frames, &DataCallback, - &StateCallback, this); - ret != CUBEB_OK) { - LOG_ERROR(Lib_AudioOut, "Failed to create cubeb stream: {}", ret); - return; - } - if (const auto ret = cubeb_stream_start(stream); ret != CUBEB_OK) { - LOG_ERROR(Lib_AudioOut, "Failed to start cubeb stream: {}", ret); - cubeb_stream_destroy(stream); - stream = nullptr; - return; - } - } - - ~CubebPortBackend() override { - if (!stream) { - return; - } - if (const auto ret = cubeb_stream_stop(stream); ret != CUBEB_OK) { - LOG_WARNING(Lib_AudioOut, "Failed to stop cubeb stream: {}", ret); - } - cubeb_stream_destroy(stream); - stream = nullptr; - } - - void Output(void* ptr, size_t size) override { - if (!stream) { - return; - } - auto* data = static_cast(ptr); - - std::unique_lock lock{buffer_mutex}; - buffer_cv.wait(lock, [&] { return buffer.available_write() >= size; }); - buffer.enqueue(data, static_cast(size)); - } - - void SetVolume(const std::array& ch_volumes) override { - if (!stream) { - return; - } - // Cubeb does not have per-channel volumes, for now just take the maximum of the channels. - const auto vol = *std::ranges::max_element(ch_volumes); - if (const auto ret = - cubeb_stream_set_volume(stream, static_cast(vol) / SCE_AUDIO_OUT_VOLUME_0DB); - ret != CUBEB_OK) { - LOG_WARNING(Lib_AudioOut, "Failed to change cubeb stream volume: {}", ret); - } - } - -private: - static long DataCallback(cubeb_stream* stream, void* user_data, const void* in, void* out, - long num_frames) { - auto* stream_data = static_cast(user_data); - const auto out_data = static_cast(out); - const auto requested_size = static_cast(num_frames * stream_data->frame_size); - - std::unique_lock lock{stream_data->buffer_mutex}; - const auto dequeued_size = stream_data->buffer.dequeue(out_data, requested_size); - lock.unlock(); - stream_data->buffer_cv.notify_one(); - - if (dequeued_size < requested_size) { - // Need to fill remaining space with silence. - std::memset(out_data + dequeued_size, 0, requested_size - dequeued_size); - } - return num_frames; - } - - static void StateCallback(cubeb_stream* stream, void* user_data, cubeb_state state) { - switch (state) { - case CUBEB_STATE_STARTED: - LOG_INFO(Lib_AudioOut, "Cubeb stream started"); - break; - case CUBEB_STATE_STOPPED: - LOG_INFO(Lib_AudioOut, "Cubeb stream stopped"); - break; - case CUBEB_STATE_DRAINED: - LOG_INFO(Lib_AudioOut, "Cubeb stream drained"); - break; - case CUBEB_STATE_ERROR: - LOG_ERROR(Lib_AudioOut, "Cubeb stream encountered an error"); - break; - } - } - - size_t frame_size; - RingBuffer buffer; - std::mutex buffer_mutex; - std::condition_variable buffer_cv; - cubeb_stream* stream{}; -}; - -CubebAudioOut::CubebAudioOut() { -#ifdef _WIN32 - // Need to initialize COM for this thread on Windows, in case WASAPI backend is used. - owns_com = CoInitializeEx(nullptr, COINIT_MULTITHREADED) == S_OK; -#endif - if (const auto ret = cubeb_init(&ctx, "shadPS4", nullptr); ret != CUBEB_OK) { - LOG_CRITICAL(Lib_AudioOut, "Failed to create cubeb context: {}", ret); - } -} - -CubebAudioOut::~CubebAudioOut() { - if (ctx) { - cubeb_destroy(ctx); - ctx = nullptr; - } -#ifdef _WIN32 - if (owns_com) { - CoUninitialize(); - owns_com = false; - } -#endif -} - -std::unique_ptr CubebAudioOut::Open(PortOut& port) { - return std::make_unique(ctx, port); -} - -} // namespace Libraries::AudioOut diff --git a/src/core/libraries/audio/sdl_audio.cpp b/src/core/libraries/audio/sdl_audio.cpp index 598941ba7..59d2d5cfb 100644 --- a/src/core/libraries/audio/sdl_audio.cpp +++ b/src/core/libraries/audio/sdl_audio.cpp @@ -3,6 +3,7 @@ #include #include +#include #include "common/logging/log.h" #include "core/libraries/audio/audioout.h" @@ -10,15 +11,21 @@ namespace Libraries::AudioOut { -constexpr int AUDIO_STREAM_BUFFER_THRESHOLD = 65536; // Define constant for buffer threshold - class SDLPortBackend : public PortBackend { public: - explicit SDLPortBackend(const PortOut& port) { + explicit SDLPortBackend(const PortOut& port) + : frame_size(port.format_info.FrameSize()), buffer_size(port.BufferSize()) { + // We want the latency for delivering frames out to be as small as possible, + // so set the sample frames hint to the number of frames per buffer. + const auto samples_num_str = std::to_string(port.buffer_frames); + if (!SDL_SetHint(SDL_HINT_AUDIO_DEVICE_SAMPLE_FRAMES, samples_num_str.c_str())) { + LOG_WARNING(Lib_AudioOut, "Failed to set SDL audio sample frames hint to {}: {}", + samples_num_str, SDL_GetError()); + } const SDL_AudioSpec fmt = { - .format = port.is_float ? SDL_AUDIO_F32 : SDL_AUDIO_S16, - .channels = port.channels_num, - .freq = static_cast(port.freq), + .format = port.format_info.is_float ? SDL_AUDIO_F32LE : SDL_AUDIO_S16LE, + .channels = port.format_info.num_channels, + .freq = static_cast(port.sample_rate), }; stream = SDL_OpenAudioDeviceStream(SDL_AUDIO_DEVICE_DEFAULT_PLAYBACK, &fmt, nullptr, nullptr); @@ -26,6 +33,15 @@ public: LOG_ERROR(Lib_AudioOut, "Failed to create SDL audio stream: {}", SDL_GetError()); return; } + queue_threshold = CalculateQueueThreshold(); + if (!SDL_SetAudioStreamInputChannelMap(stream, port.format_info.channel_layout.data(), + port.format_info.num_channels)) { + LOG_ERROR(Lib_AudioOut, "Failed to configure SDL audio stream channel map: {}", + SDL_GetError()); + SDL_DestroyAudioStream(stream); + stream = nullptr; + return; + } if (!SDL_ResumeAudioStreamDevice(stream)) { LOG_ERROR(Lib_AudioOut, "Failed to resume SDL audio stream: {}", SDL_GetError()); SDL_DestroyAudioStream(stream); @@ -42,14 +58,23 @@ public: stream = nullptr; } - void Output(void* ptr, size_t size) override { + void Output(void* ptr) override { if (!stream) { return; } - SDL_PutAudioStreamData(stream, ptr, static_cast(size)); - while (SDL_GetAudioStreamAvailable(stream) > AUDIO_STREAM_BUFFER_THRESHOLD) { - // Yield to allow the stream to drain. - std::this_thread::yield(); + // AudioOut library manages timing, but we still need to guard against the SDL + // audio queue stalling, which may happen during device changes, for example. + // Otherwise, latency may grow over time unbounded. + if (const auto queued = SDL_GetAudioStreamQueued(stream); queued >= queue_threshold) { + LOG_WARNING(Lib_AudioOut, + "SDL audio queue backed up ({} queued, {} threshold), clearing.", queued, + queue_threshold); + SDL_ClearAudioStream(stream); + // Recalculate the threshold in case this happened because of a device change. + queue_threshold = CalculateQueueThreshold(); + } + if (!SDL_PutAudioStreamData(stream, ptr, static_cast(buffer_size))) { + LOG_ERROR(Lib_AudioOut, "Failed to output to SDL audio stream: {}", SDL_GetError()); } } @@ -66,6 +91,21 @@ public: } private: + [[nodiscard]] u32 CalculateQueueThreshold() const { + SDL_AudioSpec discard; + int sdl_buffer_frames; + if (!SDL_GetAudioDeviceFormat(SDL_GetAudioStreamDevice(stream), &discard, + &sdl_buffer_frames)) { + LOG_WARNING(Lib_AudioOut, "Failed to get SDL audio stream buffer size: {}", + SDL_GetError()); + sdl_buffer_frames = 0; + } + return std::max(buffer_size, sdl_buffer_frames * frame_size) * 4; + } + + u32 frame_size; + u32 buffer_size; + u32 queue_threshold; SDL_AudioStream* stream; }; diff --git a/src/qt_gui/settings_dialog.cpp b/src/qt_gui/settings_dialog.cpp index 6d4de6603..0c1375c7f 100644 --- a/src/qt_gui/settings_dialog.cpp +++ b/src/qt_gui/settings_dialog.cpp @@ -211,7 +211,6 @@ SettingsDialog::SettingsDialog(std::span physical_devices, ui->enableCompatibilityCheckBox->installEventFilter(this); ui->checkCompatibilityOnStartupCheckBox->installEventFilter(this); ui->updateCompatibilityButton->installEventFilter(this); - ui->audioBackendComboBox->installEventFilter(this); // Input ui->hideCursorGroupBox->installEventFilter(this); @@ -305,8 +304,6 @@ void SettingsDialog::LoadValuesFromConfig() { toml::find_or(data, "General", "compatibilityEnabled", false)); ui->checkCompatibilityOnStartupCheckBox->setChecked( toml::find_or(data, "General", "checkCompatibilityOnStartup", false)); - ui->audioBackendComboBox->setCurrentText( - QString::fromStdString(toml::find_or(data, "Audio", "backend", "cubeb"))); #ifdef ENABLE_UPDATER ui->updateCheckBox->setChecked(toml::find_or(data, "General", "autoUpdate", false)); @@ -428,8 +425,6 @@ void SettingsDialog::updateNoteTextEdit(const QString& elementName) { text = tr("checkCompatibilityOnStartupCheckBox"); } else if (elementName == "updateCompatibilityButton") { text = tr("updateCompatibilityButton"); - } else if (elementName == "audioBackendGroupBox") { - text = tr("audioBackendGroupBox"); } // Input @@ -543,7 +538,6 @@ void SettingsDialog::UpdateSettings() { Config::setUpdateChannel(ui->updateComboBox->currentText().toStdString()); Config::setCompatibilityEnabled(ui->enableCompatibilityCheckBox->isChecked()); Config::setCheckCompatibilityOnStartup(ui->checkCompatibilityOnStartupCheckBox->isChecked()); - Config::setAudioBackend(ui->audioBackendComboBox->currentText().toStdString()); #ifdef ENABLE_DISCORD_RPC auto* rpc = Common::Singleton::Instance(); diff --git a/src/qt_gui/settings_dialog.ui b/src/qt_gui/settings_dialog.ui index f2d6b77d2..af1edb0dd 100644 --- a/src/qt_gui/settings_dialog.ui +++ b/src/qt_gui/settings_dialog.ui @@ -263,29 +263,6 @@ - - - - Audio Backend - - - - - - - cubeb - - - - - sdl - - - - - - - From 174b5c0f954c1348be29c0b9177a6b89145dd6c9 Mon Sep 17 00:00:00 2001 From: psucien Date: Tue, 31 Dec 2024 17:24:56 +0100 Subject: [PATCH 27/53] kernel: equeue: added missing `sceKernelDeleteHRTimerEvent` --- src/core/libraries/kernel/equeue.cpp | 14 ++++++++++++++ src/core/libraries/kernel/equeue.h | 7 +++++++ 2 files changed, 21 insertions(+) diff --git a/src/core/libraries/kernel/equeue.cpp b/src/core/libraries/kernel/equeue.cpp index 03259cd22..64d4966c0 100644 --- a/src/core/libraries/kernel/equeue.cpp +++ b/src/core/libraries/kernel/equeue.cpp @@ -283,6 +283,19 @@ s32 PS4_SYSV_ABI sceKernelAddHRTimerEvent(SceKernelEqueue eq, int id, timespec* return ORBIS_OK; } +int PS4_SYSV_ABI sceKernelDeleteHRTimerEvent(SceKernelEqueue eq, int id) { + if (eq == nullptr) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + if (eq->HasSmallTimer()) { + return eq->RemoveSmallTimer(id) ? ORBIS_OK : ORBIS_KERNEL_ERROR_ENOENT; + } else { + return eq->RemoveEvent(id, SceKernelEvent::Filter::HrTimer) ? ORBIS_OK + : ORBIS_KERNEL_ERROR_ENOENT; + } +} + int PS4_SYSV_ABI sceKernelAddUserEvent(SceKernelEqueue eq, int id) { if (eq == nullptr) { return ORBIS_KERNEL_ERROR_EBADF; @@ -362,6 +375,7 @@ void RegisterEventQueue(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("4R6-OvI2cEA", "libkernel", 1, "libkernel", 1, 1, sceKernelAddUserEvent); LIB_FUNCTION("WDszmSbWuDk", "libkernel", 1, "libkernel", 1, 1, sceKernelAddUserEventEdge); LIB_FUNCTION("R74tt43xP6k", "libkernel", 1, "libkernel", 1, 1, sceKernelAddHRTimerEvent); + LIB_FUNCTION("J+LF6LwObXU", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteHRTimerEvent); LIB_FUNCTION("F6e0kwo4cnk", "libkernel", 1, "libkernel", 1, 1, sceKernelTriggerUserEvent); LIB_FUNCTION("LJDwdSNTnDg", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteUserEvent); LIB_FUNCTION("mJ7aghmgvfc", "libkernel", 1, "libkernel", 1, 1, sceKernelGetEventId); diff --git a/src/core/libraries/kernel/equeue.h b/src/core/libraries/kernel/equeue.h index f8759137c..2db5e6ca7 100644 --- a/src/core/libraries/kernel/equeue.h +++ b/src/core/libraries/kernel/equeue.h @@ -111,6 +111,13 @@ public: bool HasSmallTimer() const { return small_timer_event.event.data != 0; } + bool RemoveSmallTimer(u64 id) { + if (HasSmallTimer() && small_timer_event.event.ident == id) { + small_timer_event = {}; + return true; + } + return false; + } int WaitForSmallTimer(SceKernelEvent* ev, int num, u32 micros); From 65cd3be4cae48c9479a83092b2d7b4a28ffaf0e4 Mon Sep 17 00:00:00 2001 From: oltolm Date: Tue, 31 Dec 2024 19:08:47 +0100 Subject: [PATCH 28/53] Qt: fix deprecation warnings (#1672) --- src/qt_gui/check_update.cpp | 7 +++++-- src/qt_gui/settings_dialog.cpp | 22 +++++++++++++++++----- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/src/qt_gui/check_update.cpp b/src/qt_gui/check_update.cpp index edd55b804..e3e019144 100644 --- a/src/qt_gui/check_update.cpp +++ b/src/qt_gui/check_update.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -24,11 +25,9 @@ #include #include #include -#include #include "check_update.h" using namespace Common::FS; -namespace fs = std::filesystem; CheckUpdate::CheckUpdate(const bool showMessage, QWidget* parent) : QDialog(parent), networkManager(new QNetworkAccessManager(this)) { @@ -254,7 +253,11 @@ void CheckUpdate::setupUI(const QString& downloadUrl, const QString& latestDate, connect(noButton, &QPushButton::clicked, this, [this]() { close(); }); autoUpdateCheckBox->setChecked(Config::autoUpdate()); +#if (QT_VERSION < QT_VERSION_CHECK(6, 7, 0)) connect(autoUpdateCheckBox, &QCheckBox::stateChanged, this, [](int state) { +#else + connect(autoUpdateCheckBox, &QCheckBox::checkStateChanged, this, [](Qt::CheckState state) { +#endif const auto user_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); Config::setAutoUpdate(state == Qt::Checked); Config::save(user_dir / "config.toml"); diff --git a/src/qt_gui/settings_dialog.cpp b/src/qt_gui/settings_dialog.cpp index 0c1375c7f..5cd0a4d65 100644 --- a/src/qt_gui/settings_dialog.cpp +++ b/src/qt_gui/settings_dialog.cpp @@ -3,22 +3,24 @@ #include #include +#include #include +#include -#include #include "common/config.h" +#include "common/version.h" #include "qt_gui/compatibility_info.h" #ifdef ENABLE_DISCORD_RPC #include "common/discord_rpc_handler.h" +#include "common/singleton.h" #endif #ifdef ENABLE_UPDATER #include "check_update.h" #endif #include +#include "background_music_player.h" #include "common/logging/backend.h" #include "common/logging/filter.h" -#include "common/logging/formatter.h" -#include "main_window.h" #include "settings_dialog.h" #include "ui_settings_dialog.h" QStringList languageNames = {"Arabic", @@ -130,8 +132,13 @@ SettingsDialog::SettingsDialog(std::span physical_devices, // GENERAL TAB { #ifdef ENABLE_UPDATER +#if (QT_VERSION < QT_VERSION_CHECK(6, 7, 0)) connect(ui->updateCheckBox, &QCheckBox::stateChanged, this, [](int state) { Config::setAutoUpdate(state == Qt::Checked); }); +#else + connect(ui->updateCheckBox, &QCheckBox::checkStateChanged, this, + [](Qt::CheckState state) { Config::setAutoUpdate(state == Qt::Checked); }); +#endif connect(ui->updateComboBox, &QComboBox::currentTextChanged, this, [](const QString& channel) { Config::setUpdateChannel(channel.toStdString()); }); @@ -150,7 +157,12 @@ SettingsDialog::SettingsDialog(std::span physical_devices, emit CompatibilityChanged(); }); +#if (QT_VERSION < QT_VERSION_CHECK(6, 7, 0)) connect(ui->enableCompatibilityCheckBox, &QCheckBox::stateChanged, this, [this](int state) { +#else + connect(ui->enableCompatibilityCheckBox, &QCheckBox::checkStateChanged, this, + [this](Qt::CheckState state) { +#endif Config::setCompatibilityEnabled(state); emit CompatibilityChanged(); }); @@ -358,7 +370,7 @@ void SettingsDialog::InitializeEmulatorLanguages() { idx++; } - connect(ui->emulatorLanguageComboBox, qOverload(&QComboBox::currentIndexChanged), this, + connect(ui->emulatorLanguageComboBox, &QComboBox::currentIndexChanged, this, &SettingsDialog::OnLanguageChanged); } @@ -578,4 +590,4 @@ void SettingsDialog::ResetInstallFolders() { } Config::setGameInstallDirs(settings_install_dirs_config); } -} \ No newline at end of file +} From d69341fd31a4209cf2b29b62cd3101301212dac4 Mon Sep 17 00:00:00 2001 From: psucien Date: Wed, 1 Jan 2025 03:40:28 +0100 Subject: [PATCH 29/53] hot-fix: detiler: forgotten lut optimizations --- src/video_core/amdgpu/liverpool.cpp | 2 +- src/video_core/host_shaders/detile_m32x1.comp | 2 +- src/video_core/host_shaders/detile_m32x2.comp | 2 +- src/video_core/host_shaders/detile_m32x4.comp | 2 +- src/video_core/host_shaders/detile_m8x1.comp | 2 +- src/video_core/host_shaders/detile_m8x2.comp | 2 +- .../host_shaders/detile_macro32x1.comp | 70 ++++++++++--------- .../host_shaders/detile_macro32x2.comp | 70 ++++++++++--------- 8 files changed, 78 insertions(+), 74 deletions(-) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 2926bcc69..bdf4cc92a 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -815,7 +815,7 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, u32 vqid) { } if (rasterizer && (cs_program.dispatch_initiator & 1)) { const auto cmd_address = reinterpret_cast(header); - rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:Dispatch", vqid, cmd_address)); + rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:DispatchIndirect", vqid, cmd_address)); rasterizer->DispatchDirect(); rasterizer->ScopeMarkerEnd(); } diff --git a/src/video_core/host_shaders/detile_m32x1.comp b/src/video_core/host_shaders/detile_m32x1.comp index 802f5f531..cdc8d0018 100644 --- a/src/video_core/host_shaders/detile_m32x1.comp +++ b/src/video_core/host_shaders/detile_m32x1.comp @@ -20,7 +20,7 @@ layout(push_constant) uniform image_info { } info; // Inverse morton LUT, small enough to fit into K$ -uint rmort[16] = { +const uint rmort[16] = { 0x11011000, 0x31213020, 0x13031202, 0x33233222, 0x51415040, 0x71617060, diff --git a/src/video_core/host_shaders/detile_m32x2.comp b/src/video_core/host_shaders/detile_m32x2.comp index 90063a185..c128ba5a1 100644 --- a/src/video_core/host_shaders/detile_m32x2.comp +++ b/src/video_core/host_shaders/detile_m32x2.comp @@ -20,7 +20,7 @@ layout(push_constant) uniform image_info { } info; // Inverse morton LUT, small enough to fit into K$ -uint rmort[16] = { +const uint rmort[16] = { 0x11011000, 0x31213020, 0x13031202, 0x33233222, 0x51415040, 0x71617060, diff --git a/src/video_core/host_shaders/detile_m32x4.comp b/src/video_core/host_shaders/detile_m32x4.comp index e1b988172..a09a0b4c4 100644 --- a/src/video_core/host_shaders/detile_m32x4.comp +++ b/src/video_core/host_shaders/detile_m32x4.comp @@ -20,7 +20,7 @@ layout(push_constant) uniform image_info { } info; // Inverse morton LUT, small enough to fit into K$ -uint rmort[16] = { +const uint rmort[16] = { 0x11011000, 0x31213020, 0x13031202, 0x33233222, 0x51415040, 0x71617060, diff --git a/src/video_core/host_shaders/detile_m8x1.comp b/src/video_core/host_shaders/detile_m8x1.comp index 39d0aaeb1..ecf706450 100644 --- a/src/video_core/host_shaders/detile_m8x1.comp +++ b/src/video_core/host_shaders/detile_m8x1.comp @@ -48,4 +48,4 @@ void main() { uint dw_ofs_x = target_tile_x * 2 + col; // 2 = uints uint dw_ofs_y = (target_tile_y * MICRO_TILE_DIM + row) * tiles_per_pitch * 2; // 2 = uints out_data[dw_ofs_x + dw_ofs_y] = dst_tx; -} \ No newline at end of file +} diff --git a/src/video_core/host_shaders/detile_m8x2.comp b/src/video_core/host_shaders/detile_m8x2.comp index 3f8e5ab33..909a14acc 100644 --- a/src/video_core/host_shaders/detile_m8x2.comp +++ b/src/video_core/host_shaders/detile_m8x2.comp @@ -25,7 +25,7 @@ layout(push_constant) uniform image_info { #define TEXELS_PER_ELEMENT 2 // Inverse morton LUT, small enough to fit into K$ -uint rmort[16] = { +const uint rmort[16] = { 0x11011000, 0x31213020, 0x13031202, 0x33233222, 0x51415040, 0x71617060, diff --git a/src/video_core/host_shaders/detile_macro32x1.comp b/src/video_core/host_shaders/detile_macro32x1.comp index 086fbcfb5..ecac47d1c 100644 --- a/src/video_core/host_shaders/detile_macro32x1.comp +++ b/src/video_core/host_shaders/detile_macro32x1.comp @@ -21,46 +21,46 @@ layout(push_constant) uniform image_info { } info; // Each LUT is 64 bytes, so should fit into K$ given tiled slices locality -const uint lut_32bpp[][64] = { +const uint lut_32bpp[][16] = { { - 0x00, 0x01, 0x04, 0x05, 0x40, 0x41, 0x44, 0x45, - 0x02, 0x03, 0x06, 0x07, 0x42, 0x43, 0x46, 0x47, - 0x10, 0x11, 0x14, 0x15, 0x50, 0x51, 0x54, 0x55, - 0x12, 0x13, 0x16, 0x17, 0x52, 0x53, 0x56, 0x57, - 0x80, 0x81, 0x84, 0x85, 0xc0, 0xc1, 0xc4, 0xc5, - 0x82, 0x83, 0x86, 0x87, 0xc2, 0xc3, 0xc6, 0xc7, - 0x90, 0x91, 0x94, 0x95, 0xd0, 0xd1, 0xd4, 0xd5, - 0x92, 0x93, 0x96, 0x97, 0xd2, 0xd3, 0xd6, 0xd7, + 0x05040100, 0x45444140, + 0x07060302, 0x47464342, + 0x15141110, 0x55545150, + 0x17161312, 0x57565352, + 0x85848180, 0xc5c4c1c0, + 0x87868382, 0xc7c6c3c2, + 0x95949190, 0xd5d4d1d0, + 0x97969392, 0xd7d6d3d2, }, { - 0x08, 0x09, 0x0c, 0x0d, 0x48, 0x49, 0x4c, 0x4d, - 0x0a, 0x0b, 0x0e, 0x0f, 0x4a, 0x4b, 0x4e, 0x4f, - 0x18, 0x19, 0x1c, 0x1d, 0x58, 0x59, 0x5c, 0x5d, - 0x1a, 0x1b, 0x1e, 0x1f, 0x5a, 0x5b, 0x5e, 0x5f, - 0x88, 0x89, 0x8c, 0x8d, 0xc8, 0xc9, 0xcc, 0xcd, - 0x8a, 0x8b, 0x8e, 0x8f, 0xca, 0xcb, 0xce, 0xcf, - 0x98, 0x99, 0x9c, 0x9d, 0xd8, 0xd9, 0xdc, 0xdd, - 0x9a, 0x9b, 0x9e, 0x9f, 0xda, 0xdb, 0xde, 0xdf, + 0x0d0c0908, 0x4d4c4948, + 0x0f0e0b0a, 0x4f4e4b4a, + 0x1d1c1918, 0x5d5c5958, + 0x1f1e1b1a, 0x5f5e5b5a, + 0x8d8c8988, 0xcdccc9c8, + 0x8f8e8b8a, 0xcfcecbca, + 0x9d9c9998, 0xdddcd9d8, + 0x9f9e9b9a, 0xdfdedbda, }, { - 0x20, 0x21, 0x24, 0x25, 0x60, 0x61, 0x64, 0x65, - 0x22, 0x23, 0x26, 0x27, 0x62, 0x63, 0x66, 0x67, - 0x30, 0x31, 0x34, 0x35, 0x70, 0x71, 0x74, 0x75, - 0x32, 0x33, 0x36, 0x37, 0x72, 0x73, 0x76, 0x77, - 0xa0, 0xa1, 0xa4, 0xa5, 0xe0, 0xe1, 0xe4, 0xe5, - 0xa2, 0xa3, 0xa6, 0xa7, 0xe2, 0xe3, 0xe6, 0xe7, - 0xb0, 0xb1, 0xb4, 0xb5, 0xf0, 0xf1, 0xf4, 0xf5, - 0xb2, 0xb3, 0xb6, 0xb7, 0xf2, 0xf3, 0xf6, 0xf7, + 0x25242120, 0x65646160, + 0x27262322, 0x67666362, + 0x35343130, 0x75747170, + 0x37363332, 0x77767372, + 0xa5a4a1a0, 0xe5e4e1e0, + 0xa7a6a3a2, 0xe7e6e3e2, + 0xb5b4b1b0, 0xf5f4f1f0, + 0xb7b6b3b2, 0xf7f6f3f2, }, { - 0x28, 0x29, 0x2c, 0x2d, 0x68, 0x69, 0x6c, 0x6d, - 0x2a, 0x2b, 0x2e, 0x2f, 0x6a, 0x6b, 0x6e, 0x6f, - 0x38, 0x39, 0x3c, 0x3d, 0x78, 0x79, 0x7c, 0x7d, - 0x3a, 0x3b, 0x3e, 0x3f, 0x7a, 0x7b, 0x7e, 0x7f, - 0xa8, 0xa9, 0xac, 0xad, 0xe8, 0xe9, 0xec, 0xed, - 0xaa, 0xab, 0xae, 0xaf, 0xea, 0xeb, 0xee, 0xef, - 0xb8, 0xb9, 0xbc, 0xbd, 0xf8, 0xf9, 0xfc, 0xfd, - 0xba, 0xbb, 0xbe, 0xbf, 0xfa, 0xfb, 0xfe, 0xff, + 0x2d2c2928, 0x6d6c6968, + 0x2f2e2b2a, 0x6f6e6b6a, + 0x3d3c3938, 0x7d7c7978, + 0x3f3e3b3a, 0x7f7e7b7a, + 0xadaca9a8, 0xedece9e8, + 0xafaeabaa, 0xefeeebea, + 0xbdbcb9b8, 0xfdfcf9f8, + 0xbfbebbba, 0xfffefbfa, } }; @@ -77,7 +77,9 @@ void main() { uint col = bitfieldExtract(x, 0, 3); uint row = bitfieldExtract(y, 0, 3); uint lut = bitfieldExtract(z, 0, 2); - uint idx = lut_32bpp[lut][col + row * MICRO_TILE_DIM]; + uint idx_dw = lut_32bpp[lut][(col + row * MICRO_TILE_DIM) >> 2u]; + uint byte_ofs = gl_LocalInvocationID.x & 3u; + uint idx = bitfieldExtract(idx_dw >> (8 * byte_ofs), 0, 8); uint slice_offs = (z >> 2u) * info.c1 * MICRO_TILE_SZ; uint tile_row = y / MICRO_TILE_DIM; diff --git a/src/video_core/host_shaders/detile_macro32x2.comp b/src/video_core/host_shaders/detile_macro32x2.comp index 296311c7a..d161484c1 100644 --- a/src/video_core/host_shaders/detile_macro32x2.comp +++ b/src/video_core/host_shaders/detile_macro32x2.comp @@ -20,46 +20,46 @@ layout(push_constant) uniform image_info { uint c1; } info; -const uint lut_64bpp[][64] = { +const uint lut_64bpp[][16] = { { - 0x00, 0x01, 0x08, 0x09, 0x40, 0x41, 0x48, 0x49, - 0x02, 0x03, 0x0a, 0x0b, 0x42, 0x43, 0x4a, 0x4b, - 0x10, 0x11, 0x18, 0x19, 0x50, 0x51, 0x58, 0x59, - 0x12, 0x13, 0x1a, 0x1b, 0x52, 0x53, 0x5a, 0x5b, - 0x80, 0x81, 0x88, 0x89, 0xc0, 0xc1, 0xc8, 0xc9, - 0x82, 0x83, 0x8a, 0x8b, 0xc2, 0xc3, 0xca, 0xcb, - 0x90, 0x91, 0x98, 0x99, 0xd0, 0xd1, 0xd8, 0xd9, - 0x92, 0x93, 0x9a, 0x9b, 0xd2, 0xd3, 0xda, 0xdb, + 0x09080100, 0x49484140, + 0x0b0a0302, 0x4a4b4342, + 0x19181110, 0x59585150, + 0x1b1a1312, 0x5a5b5352, + 0x89888180, 0xc9c8c1c0, + 0x8b8a8382, 0xcacbc3c2, + 0x99989190, 0xd9d8d1d0, + 0x9b9a9392, 0xdbdad3d2, }, { - 0x04, 0x05, 0x0c, 0x0d, 0x44, 0x45, 0x4c, 0x4d, - 0x06, 0x07, 0x0e, 0x0f, 0x46, 0x47, 0x4e, 0x4f, - 0x14, 0x15, 0x1c, 0x1d, 0x54, 0x55, 0x5c, 0x5d, - 0x16, 0x17, 0x1e, 0x1f, 0x56, 0x57, 0x5e, 0x5f, - 0x84, 0x85, 0x8c, 0x8d, 0xc4, 0xc5, 0xcc, 0xcd, - 0x86, 0x87, 0x8e, 0x8f, 0xc6, 0xc7, 0xce, 0xcf, - 0x94, 0x95, 0x9c, 0x9d, 0xd4, 0xd5, 0xdc, 0xdd, - 0x96, 0x97, 0x9e, 0x9f, 0xd6, 0xd7, 0xde, 0xdf, + 0x0d0c0504, 0x4d4c4544, + 0x0f0e0706, 0x4f4e4746, + 0x1d1c1514, 0x5d5c5554, + 0x1f1e1716, 0x5f5e5756, + 0x8d8c8584, 0xcdccc5c4, + 0x8f8e8786, 0xcfcec7c6, + 0x9d9c9594, 0xdddcd5d4, + 0x9f9e9796, 0xdfded7d6, }, { - 0x20, 0x21, 0x28, 0x29, 0x60, 0x61, 0x68, 0x69, - 0x22, 0x23, 0x2a, 0x2b, 0x62, 0x63, 0x6a, 0x6b, - 0x30, 0x31, 0x38, 0x39, 0x70, 0x71, 0x78, 0x79, - 0x32, 0x33, 0x3a, 0x3b, 0x72, 0x73, 0x7a, 0x7b, - 0xa0, 0xa1, 0xa8, 0xa9, 0xe0, 0xe1, 0xe8, 0xe9, - 0xa2, 0xa3, 0xaa, 0xab, 0xe2, 0xe3, 0xea, 0xeb, - 0xb0, 0xb1, 0xb8, 0xb9, 0xf0, 0xf1, 0xf8, 0xf9, - 0xb2, 0xb3, 0xba, 0xbb, 0xf2, 0xf3, 0xfa, 0xfb, + 0x29282120, 0x69686160, + 0x2b2a2322, 0x6b6a6362, + 0x39383130, 0x79787170, + 0x3b3a3332, 0x7b7a7372, + 0xa9a8a1a0, 0xe9e8e1e0, + 0xabaaa3a2, 0xebeae3e2, + 0xb9b8b1b0, 0xf9f8f1f0, + 0xbbbab3b2, 0xfbfaf3f2, }, { - 0x24, 0x25, 0x2c, 0x2d, 0x64, 0x65, 0x6c, 0x6d, - 0x26, 0x27, 0x2e, 0x2f, 0x66, 0x67, 0x6e, 0x6f, - 0x34, 0x35, 0x3c, 0x3d, 0x74, 0x75, 0x7c, 0x7d, - 0x36, 0x37, 0x3e, 0x3f, 0x76, 0x77, 0x7e, 0x7f, - 0xa4, 0xa5, 0xac, 0xad, 0xe4, 0xe5, 0xec, 0xed, - 0xa6, 0xa7, 0xae, 0xaf, 0xe6, 0xe7, 0xee, 0xef, - 0xb4, 0xb5, 0xbc, 0xbd, 0xf4, 0xf5, 0xfc, 0xfd, - 0xb6, 0xb7, 0xbe, 0xbf, 0xf6, 0xf7, 0xfe, 0xff, + 0x2d2c2524, 0x6d6c6564, + 0x2f2e2726, 0x6f6e6766, + 0x3d3c3534, 0x7d7c7574, + 0x3f3e3736, 0x7f7e7776, + 0xadaca5a4, 0xedece5e4, + 0xafaea7a6, 0xefeee7e6, + 0xbdbcb5b4, 0xfdfcf5f4, + 0xbfbeb7b6, 0xfffef7f6, }, }; @@ -76,7 +76,9 @@ void main() { uint col = bitfieldExtract(x, 0, 3); uint row = bitfieldExtract(y, 0, 3); uint lut = bitfieldExtract(z, 0, 2); - uint idx = lut_64bpp[lut][col + row * MICRO_TILE_DIM]; + uint idx_dw = lut_64bpp[lut][(col + row * MICRO_TILE_DIM) >> 2u]; + uint byte_ofs = gl_LocalInvocationID.x & 3u; + uint idx = bitfieldExtract(idx_dw >> (8 * byte_ofs), 0, 8); uint slice_offs = (z >> 2u) * info.c1 * MICRO_TILE_SZ; uint tile_row = y / MICRO_TILE_DIM; From 283442b42f8593408d305be3b0cf2f83788b7f1e Mon Sep 17 00:00:00 2001 From: georgemoralis Date: Wed, 1 Jan 2025 12:04:51 +0200 Subject: [PATCH 30/53] Storing encryption trophy key in config.toml (#1930) * get trophy key from toml file * clang format fix * get trophy key from toml file * clang format fix * merge fixes * Update config.cpp --- src/common/config.cpp | 16 ++++++++++++++++ src/common/config.h | 3 +++ src/core/crypto/crypto.cpp | 14 ++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/src/common/config.cpp b/src/common/config.cpp index deef0fa88..4fce7d97f 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -67,6 +67,7 @@ static int cursorHideTimeout = 5; // 5 seconds (default) static bool separateupdatefolder = false; static bool compatibilityData = false; static bool checkCompatibilityOnStartup = false; +static std::string trophyKey = ""; // Gui std::vector settings_install_dirs = {}; @@ -91,6 +92,14 @@ std::string emulator_language = "en"; // Language u32 m_language = 1; // english +std::string getTrophyKey() { + return trophyKey; +} + +void setTrophyKey(std::string key) { + trophyKey = key; +} + bool isNeoMode() { return isNeo; } @@ -652,6 +661,11 @@ void load(const std::filesystem::path& path) { m_language = toml::find_or(settings, "consoleLanguage", 1); } + + if (data.contains("Keys")) { + const toml::value& keys = data.at("Keys"); + trophyKey = toml::find_or(keys, "TrophyKey", ""); + } } void save(const std::filesystem::path& path) { @@ -712,6 +726,8 @@ void save(const std::filesystem::path& path) { data["Debug"]["DebugDump"] = isDebugDump; data["Debug"]["CollectShader"] = isShaderDebug; + data["Keys"]["TrophyKey"] = trophyKey; + std::vector install_dirs; for (const auto& dirString : settings_install_dirs) { install_dirs.emplace_back(std::string{fmt::UTF(dirString.u8string()).data}); diff --git a/src/common/config.h b/src/common/config.h index 701aadb12..9d943008b 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -15,6 +15,9 @@ void load(const std::filesystem::path& path); void save(const std::filesystem::path& path); void saveMainWindow(const std::filesystem::path& path); +std::string getTrophyKey(); +void setTrophyKey(std::string key); + bool isNeoMode(); bool isFullscreenMode(); bool getPlayBGM(); diff --git a/src/core/crypto/crypto.cpp b/src/core/crypto/crypto.cpp index 00f1dea46..472d284fc 100644 --- a/src/core/crypto/crypto.cpp +++ b/src/core/crypto/crypto.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include "crypto.h" CryptoPP::RSA::PrivateKey Crypto::key_pkg_derived_key3_keyset_init() { @@ -137,6 +138,13 @@ void Crypto::aesCbcCfb128DecryptEntry(std::span ivkey, } } +static void hexToBytes(const char* hex, unsigned char* dst) { + for (size_t i = 0; hex[i] != 0; i++) { + const unsigned char value = (hex[i] < 0x3A) ? (hex[i] - 0x30) : (hex[i] - 0x37); + dst[i / 2] |= ((i % 2) == 0) ? (value << 4) : (value); + } +} + void Crypto::decryptEFSM(std::span NPcommID, std::span efsmIv, std::span ciphertext, std::span decrypted) { @@ -145,9 +153,15 @@ void Crypto::decryptEFSM(std::span NPcommID, // step 1: Encrypt NPcommID CryptoPP::CBC_Mode::Encryption encrypt; + const char* TrophyKeyget = Config::getTrophyKey().c_str(); + std::vector TrophyKey; + hexToBytes(TrophyKeyget, TrophyKey.data()); + std::vector trpKey(16); encrypt.ProcessData(trpKey.data(), NPcommID.data(), 16); + encrypt.SetKeyWithIV(TrophyKey.data(), TrophyKey.size(), TrophyIV.data()); + // step 2: decrypt efsm. CryptoPP::CBC_Mode::Decryption decrypt; decrypt.SetKeyWithIV(trpKey.data(), trpKey.size(), efsmIv.data()); From a76e8f0211d540c1bd91ee4d457eb2294daeeaa9 Mon Sep 17 00:00:00 2001 From: polybiusproxy <47796739+polybiusproxy@users.noreply.github.com> Date: Wed, 1 Jan 2025 13:21:00 +0100 Subject: [PATCH 31/53] clang-format --- src/video_core/amdgpu/liverpool.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index bdf4cc92a..985f3c652 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -815,7 +815,8 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, u32 vqid) { } if (rasterizer && (cs_program.dispatch_initiator & 1)) { const auto cmd_address = reinterpret_cast(header); - rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:DispatchIndirect", vqid, cmd_address)); + rasterizer->ScopeMarkerBegin( + fmt::format("acb[{}]:{}:DispatchIndirect", vqid, cmd_address)); rasterizer->DispatchDirect(); rasterizer->ScopeMarkerEnd(); } From 5631a31640c80311e1196b81f4743c92ee4c277c Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Wed, 1 Jan 2025 11:34:40 -0600 Subject: [PATCH 32/53] Fix flags (#1999) --- src/core/libraries/videoout/video_out.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/core/libraries/videoout/video_out.cpp b/src/core/libraries/videoout/video_out.cpp index f36de6ade..78a2b11a4 100644 --- a/src/core/libraries/videoout/video_out.cpp +++ b/src/core/libraries/videoout/video_out.cpp @@ -52,8 +52,7 @@ s32 PS4_SYSV_ABI sceVideoOutAddFlipEvent(Kernel::SceKernelEqueue eq, s32 handle, Kernel::EqueueEvent event{}; event.event.ident = u64(OrbisVideoOutEventId::Flip); event.event.filter = Kernel::SceKernelEvent::Filter::VideoOut; - // The library only sets EV_ADD but kernel driver forces EV_CLEAR - event.event.flags = Kernel::SceKernelEvent::Flags::Clear; + event.event.flags = Kernel::SceKernelEvent::Flags::Add; event.event.udata = udata; event.event.fflags = 0; event.event.data = 0; @@ -79,8 +78,7 @@ s32 PS4_SYSV_ABI sceVideoOutAddVblankEvent(Kernel::SceKernelEqueue eq, s32 handl Kernel::EqueueEvent event{}; event.event.ident = u64(OrbisVideoOutEventId::Vblank); event.event.filter = Kernel::SceKernelEvent::Filter::VideoOut; - // The library only sets EV_ADD but kernel driver forces EV_CLEAR - event.event.flags = Kernel::SceKernelEvent::Flags::Clear; + event.event.flags = Kernel::SceKernelEvent::Flags::Add; event.event.udata = udata; event.event.fflags = 0; event.event.data = 0; From 15c9bb0e83dd809790b366dc125855c050ca8197 Mon Sep 17 00:00:00 2001 From: kalaposfos13 <153381648+kalaposfos13@users.noreply.github.com> Date: Wed, 1 Jan 2025 19:05:22 +0100 Subject: [PATCH 33/53] Motion controls (#1984) * Initial motion controls * Store sensor polling rate, and add more logging * Revert commented out logging for testing purposes * Code cleanup & clang * New orientation handling * clang --- src/core/libraries/pad/pad.cpp | 46 +++++++------- src/input/controller.cpp | 110 +++++++++++++++++++++++++++++++++ src/input/controller.h | 12 ++++ src/sdl_window.cpp | 14 +++++ 4 files changed, 160 insertions(+), 22 deletions(-) diff --git a/src/core/libraries/pad/pad.cpp b/src/core/libraries/pad/pad.cpp index 98f086dd9..27564294e 100644 --- a/src/core/libraries/pad/pad.cpp +++ b/src/core/libraries/pad/pad.cpp @@ -104,8 +104,8 @@ int PS4_SYSV_ABI scePadGetControllerInformation(s32 handle, OrbisPadControllerIn pInfo->touchPadInfo.pixelDensity = 1; pInfo->touchPadInfo.resolution.x = 1920; pInfo->touchPadInfo.resolution.y = 950; - pInfo->stickInfo.deadZoneLeft = 2; - pInfo->stickInfo.deadZoneRight = 2; + pInfo->stickInfo.deadZoneLeft = 20; + pInfo->stickInfo.deadZoneRight = 20; pInfo->connectionType = ORBIS_PAD_PORT_TYPE_STANDARD; pInfo->connectedCount = 1; pInfo->connected = true; @@ -286,6 +286,7 @@ int PS4_SYSV_ABI scePadOutputReport() { } int PS4_SYSV_ABI scePadRead(s32 handle, OrbisPadData* pData, s32 num) { + LOG_TRACE(Lib_Pad, "called"); int connected_count = 0; bool connected = false; Input::State states[64]; @@ -304,16 +305,15 @@ int PS4_SYSV_ABI scePadRead(s32 handle, OrbisPadData* pData, s32 num) { pData[i].rightStick.y = states[i].axes[static_cast(Input::Axis::RightY)]; pData[i].analogButtons.l2 = states[i].axes[static_cast(Input::Axis::TriggerLeft)]; pData[i].analogButtons.r2 = states[i].axes[static_cast(Input::Axis::TriggerRight)]; - pData[i].orientation.x = 0.0f; - pData[i].orientation.y = 0.0f; - pData[i].orientation.z = 0.0f; - pData[i].orientation.w = 1.0f; - pData[i].acceleration.x = 0.0f; - pData[i].acceleration.y = 0.0f; - pData[i].acceleration.z = 0.0f; - pData[i].angularVelocity.x = 0.0f; - pData[i].angularVelocity.y = 0.0f; - pData[i].angularVelocity.z = 0.0f; + pData[i].acceleration.x = states[i].acceleration.x; + pData[i].acceleration.y = states[i].acceleration.y; + pData[i].acceleration.z = states[i].acceleration.z; + pData[i].angularVelocity.x = states[i].angularVelocity.x; + pData[i].angularVelocity.y = states[i].angularVelocity.y; + pData[i].angularVelocity.z = states[i].angularVelocity.z; + Input::GameController::CalculateOrientation(pData[i].acceleration, pData[i].angularVelocity, + 1.0f / controller->accel_poll_rate, + pData[i].orientation); pData[i].touchData.touchNum = (states[i].touchpad[0].state ? 1 : 0) + (states[i].touchpad[1].state ? 1 : 0); pData[i].touchData.touch[0].x = states[i].touchpad[0].x; @@ -352,6 +352,7 @@ int PS4_SYSV_ABI scePadReadHistory() { } int PS4_SYSV_ABI scePadReadState(s32 handle, OrbisPadData* pData) { + LOG_TRACE(Lib_Pad, "called"); if (handle == ORBIS_PAD_ERROR_DEVICE_NO_HANDLE) { return ORBIS_PAD_ERROR_INVALID_HANDLE; } @@ -367,16 +368,15 @@ int PS4_SYSV_ABI scePadReadState(s32 handle, OrbisPadData* pData) { pData->rightStick.y = state.axes[static_cast(Input::Axis::RightY)]; pData->analogButtons.l2 = state.axes[static_cast(Input::Axis::TriggerLeft)]; pData->analogButtons.r2 = state.axes[static_cast(Input::Axis::TriggerRight)]; - pData->orientation.x = 0; - pData->orientation.y = 0; - pData->orientation.z = 0; - pData->orientation.w = 1; - pData->acceleration.x = 0.0f; - pData->acceleration.y = 0.0f; - pData->acceleration.z = 0.0f; - pData->angularVelocity.x = 0.0f; - pData->angularVelocity.y = 0.0f; - pData->angularVelocity.z = 0.0f; + pData->acceleration.x = state.acceleration.x; + pData->acceleration.y = state.acceleration.y; + pData->acceleration.z = state.acceleration.z; + pData->angularVelocity.x = state.angularVelocity.x; + pData->angularVelocity.y = state.angularVelocity.y; + pData->angularVelocity.z = state.angularVelocity.z; + Input::GameController::CalculateOrientation(pData->acceleration, pData->angularVelocity, + 1.0f / controller->accel_poll_rate, + pData->orientation); pData->touchData.touchNum = (state.touchpad[0].state ? 1 : 0) + (state.touchpad[1].state ? 1 : 0); pData->touchData.touch[0].x = state.touchpad[0].x; @@ -498,6 +498,8 @@ int PS4_SYSV_ABI scePadSetLoginUserNumber() { int PS4_SYSV_ABI scePadSetMotionSensorState(s32 handle, bool bEnable) { LOG_ERROR(Lib_Pad, "(STUBBED) called"); return ORBIS_OK; + // it's already handled by the SDL backend and will be on no matter what + // (assuming the controller supports it) } int PS4_SYSV_ABI scePadSetProcessFocus() { diff --git a/src/input/controller.cpp b/src/input/controller.cpp index 3927b096f..daef9c940 100644 --- a/src/input/controller.cpp +++ b/src/input/controller.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include "common/logging/log.h" #include "core/libraries/kernel/time.h" #include "core/libraries/pad/pad.h" #include "input/controller.h" @@ -116,6 +117,103 @@ void GameController::Axis(int id, Input::Axis axis, int value) { AddState(state); } +void GameController::Gyro(int id, const float gyro[3]) { + std::scoped_lock lock{m_mutex}; + auto state = GetLastState(); + state.time = Libraries::Kernel::sceKernelGetProcessTime(); + + // Update the angular velocity (gyro data) + state.angularVelocity.x = gyro[0]; // X-axis + state.angularVelocity.y = gyro[1]; // Y-axis + state.angularVelocity.z = gyro[2]; // Z-axis + + AddState(state); +} +void GameController::Acceleration(int id, const float acceleration[3]) { + std::scoped_lock lock{m_mutex}; + auto state = GetLastState(); + state.time = Libraries::Kernel::sceKernelGetProcessTime(); + + // Update the acceleration values + state.acceleration.x = acceleration[0]; // X-axis + state.acceleration.y = acceleration[1]; // Y-axis + state.acceleration.z = acceleration[2]; // Z-axis + + AddState(state); +} + +// Stolen from +// https://github.com/xioTechnologies/Open-Source-AHRS-With-x-IMU/blob/master/x-IMU%20IMU%20and%20AHRS%20Algorithms/x-IMU%20IMU%20and%20AHRS%20Algorithms/AHRS/MahonyAHRS.cs +float eInt[3] = {0.0f, 0.0f, 0.0f}; // Integral error terms +const float Kp = 50.0f; // Proportional gain +const float Ki = 1.0f; // Integral gain +Libraries::Pad::OrbisFQuaternion o = {1, 0, 0, 0}; +void GameController::CalculateOrientation(Libraries::Pad::OrbisFVector3& acceleration, + Libraries::Pad::OrbisFVector3& angularVelocity, + float deltaTime, + Libraries::Pad::OrbisFQuaternion& orientation) { + float ax = acceleration.x, ay = acceleration.y, az = acceleration.z; + float gx = angularVelocity.x, gy = angularVelocity.y, gz = angularVelocity.z; + + float q1 = o.w, q2 = o.x, q3 = o.y, q4 = o.z; + + // Normalize accelerometer measurement + float norm = std::sqrt(ax * ax + ay * ay + az * az); + if (norm == 0.0f) + return; // Handle NaN + norm = 1.0f / norm; + ax *= norm; + ay *= norm; + az *= norm; + + // Estimated direction of gravity + float vx = 2.0f * (q2 * q4 - q1 * q3); + float vy = 2.0f * (q1 * q2 + q3 * q4); + float vz = q1 * q1 - q2 * q2 - q3 * q3 + q4 * q4; + + // Error is cross product between estimated direction and measured direction of gravity + float ex = (ay * vz - az * vy); + float ey = (az * vx - ax * vz); + float ez = (ax * vy - ay * vx); + if (Ki > 0.0f) { + eInt[0] += ex * deltaTime; // Accumulate integral error + eInt[1] += ey * deltaTime; + eInt[2] += ez * deltaTime; + } else { + eInt[0] = eInt[1] = eInt[2] = 0.0f; // Prevent integral wind-up + } + + // Apply feedback terms + gx += Kp * ex + Ki * eInt[0]; + gy += Kp * ey + Ki * eInt[1]; + gz += Kp * ez + Ki * eInt[2]; + + //// Integrate rate of change of quaternion + // float pa = q2, pb = q3, pc = q4; + // q1 += (-q2 * gx - q3 * gy - q4 * gz) * (0.5f * deltaTime); + // q2 += (pa * gx + pb * gz - pc * gy) * (0.5f * deltaTime); + // q3 += (pb * gy - pa * gz + pc * gx) * (0.5f * deltaTime); + // q4 += (pc * gz + pa * gy - pb * gx) * (0.5f * deltaTime); + q1 += (-q2 * gx - q3 * gy - q4 * gz) * (0.5f * deltaTime); + q2 += (q1 * gx + q3 * gz - q4 * gy) * (0.5f * deltaTime); + q3 += (q1 * gy - q2 * gz + q4 * gx) * (0.5f * deltaTime); + q4 += (q1 * gz + q2 * gy - q3 * gx) * (0.5f * deltaTime); + + // Normalize quaternion + norm = std::sqrt(q1 * q1 + q2 * q2 + q3 * q3 + q4 * q4); + norm = 1.0f / norm; + orientation.w = q1 * norm; + orientation.x = q2 * norm; + orientation.y = q3 * norm; + orientation.z = q4 * norm; + o.w = q1 * norm; + o.x = q2 * norm; + o.y = q3 * norm; + o.z = q4 * norm; + LOG_DEBUG(Lib_Pad, "Calculated orientation: {:.2f} {:.2f} {:.2f} {:.2f}", orientation.x, + orientation.y, orientation.z, orientation.w); +} + void GameController::SetLightBarRGB(u8 r, u8 g, u8 b) { if (m_sdl_gamepad != nullptr) { SDL_SetGamepadLED(m_sdl_gamepad, r, g, b); @@ -149,6 +247,18 @@ void GameController::TryOpenSDLController() { int gamepad_count; SDL_JoystickID* gamepads = SDL_GetGamepads(&gamepad_count); m_sdl_gamepad = gamepad_count > 0 ? SDL_OpenGamepad(gamepads[0]) : nullptr; + if (SDL_SetGamepadSensorEnabled(m_sdl_gamepad, SDL_SENSOR_GYRO, true)) { + gyro_poll_rate = SDL_GetGamepadSensorDataRate(m_sdl_gamepad, SDL_SENSOR_GYRO); + LOG_INFO(Input, "Gyro initialized, poll rate: {}", gyro_poll_rate); + } else { + LOG_ERROR(Input, "Failed to initialize gyro controls for gamepad"); + } + if (SDL_SetGamepadSensorEnabled(m_sdl_gamepad, SDL_SENSOR_ACCEL, true)) { + accel_poll_rate = SDL_GetGamepadSensorDataRate(m_sdl_gamepad, SDL_SENSOR_ACCEL); + LOG_INFO(Input, "Accel initialized, poll rate: {}", accel_poll_rate); + } else { + LOG_ERROR(Input, "Failed to initialize accel controls for gamepad"); + } SDL_free(gamepads); SetLightBarRGB(0, 0, 255); diff --git a/src/input/controller.h b/src/input/controller.h index d425fb46c..c6fc02c24 100644 --- a/src/input/controller.h +++ b/src/input/controller.h @@ -33,6 +33,9 @@ struct State { u64 time = 0; int axes[static_cast(Axis::AxisMax)] = {128, 128, 128, 128, 0, 0}; TouchpadEntry touchpad[2] = {{false, 0, 0}, {false, 0, 0}}; + Libraries::Pad::OrbisFVector3 acceleration = {0.0f, 0.0f, 0.0f}; + Libraries::Pad::OrbisFVector3 angularVelocity = {0.0f, 0.0f, 0.0f}; + Libraries::Pad::OrbisFQuaternion orientation = {0.0f, 0.0f, 0.0f, 1.0f}; }; inline int GetAxis(int min, int max, int value) { @@ -53,12 +56,21 @@ public: void CheckButton(int id, Libraries::Pad::OrbisPadButtonDataOffset button, bool isPressed); void AddState(const State& state); void Axis(int id, Input::Axis axis, int value); + void Gyro(int id, const float gyro[3]); + void Acceleration(int id, const float acceleration[3]); void SetLightBarRGB(u8 r, u8 g, u8 b); bool SetVibration(u8 smallMotor, u8 largeMotor); void SetTouchpadState(int touchIndex, bool touchDown, float x, float y); void TryOpenSDLController(); u32 Poll(); + float gyro_poll_rate; + float accel_poll_rate; + static void CalculateOrientation(Libraries::Pad::OrbisFVector3& acceleration, + Libraries::Pad::OrbisFVector3& angularVelocity, + float deltaTime, + Libraries::Pad::OrbisFQuaternion& orientation); + private: struct StateInternal { bool obtained = false; diff --git a/src/sdl_window.cpp b/src/sdl_window.cpp index 50c3e93ee..d694b0939 100644 --- a/src/sdl_window.cpp +++ b/src/sdl_window.cpp @@ -161,6 +161,20 @@ void WindowSDL::WaitEvent() { case SDL_EVENT_GAMEPAD_TOUCHPAD_MOTION: OnGamepadEvent(&event); break; + // i really would have appreciated ANY KIND OF DOCUMENTATION ON THIS + // AND IT DOESN'T EVEN USE PROPER ENUMS + case SDL_EVENT_GAMEPAD_SENSOR_UPDATE: + switch ((SDL_SensorType)event.gsensor.sensor) { + case SDL_SENSOR_GYRO: + controller->Gyro(0, event.gsensor.data); + break; + case SDL_SENSOR_ACCEL: + controller->Acceleration(0, event.gsensor.data); + break; + default: + break; + } + break; case SDL_EVENT_QUIT: is_open = false; break; From 444016df7e9127a530ff969b674ee8be4aea238d Mon Sep 17 00:00:00 2001 From: psucien <168137814+psucien@users.noreply.github.com> Date: Wed, 1 Jan 2025 19:11:30 +0100 Subject: [PATCH 34/53] Fix for trophy decryption (#2005) * fix for trophy decryption * more sanity checks * switch back to strings for simplicity * get rid of unnecessary span --- src/common/config.cpp | 2 +- src/core/crypto/crypto.cpp | 20 +++++--------------- src/core/crypto/crypto.h | 3 ++- src/core/file_format/trp.cpp | 20 +++++++++++++++++++- 4 files changed, 27 insertions(+), 18 deletions(-) diff --git a/src/common/config.cpp b/src/common/config.cpp index 4fce7d97f..246644e2d 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -67,7 +67,7 @@ static int cursorHideTimeout = 5; // 5 seconds (default) static bool separateupdatefolder = false; static bool compatibilityData = false; static bool checkCompatibilityOnStartup = false; -static std::string trophyKey = ""; +static std::string trophyKey; // Gui std::vector settings_install_dirs = {}; diff --git a/src/core/crypto/crypto.cpp b/src/core/crypto/crypto.cpp index 472d284fc..4020edfd8 100644 --- a/src/core/crypto/crypto.cpp +++ b/src/core/crypto/crypto.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include -#include + #include "crypto.h" CryptoPP::RSA::PrivateKey Crypto::key_pkg_derived_key3_keyset_init() { @@ -138,29 +138,19 @@ void Crypto::aesCbcCfb128DecryptEntry(std::span ivkey, } } -static void hexToBytes(const char* hex, unsigned char* dst) { - for (size_t i = 0; hex[i] != 0; i++) { - const unsigned char value = (hex[i] < 0x3A) ? (hex[i] - 0x30) : (hex[i] - 0x37); - dst[i / 2] |= ((i % 2) == 0) ? (value << 4) : (value); - } -} - -void Crypto::decryptEFSM(std::span NPcommID, +void Crypto::decryptEFSM(std::span trophyKey, + std::span NPcommID, std::span efsmIv, std::span ciphertext, std::span decrypted) { - std::vector TrophyIV = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; // step 1: Encrypt NPcommID CryptoPP::CBC_Mode::Encryption encrypt; - const char* TrophyKeyget = Config::getTrophyKey().c_str(); - std::vector TrophyKey; - hexToBytes(TrophyKeyget, TrophyKey.data()); - + std::vector trophyIv(16, 0); std::vector trpKey(16); + encrypt.SetKeyWithIV(trophyKey.data(), trophyKey.size(), trophyIv.data()); encrypt.ProcessData(trpKey.data(), NPcommID.data(), 16); - encrypt.SetKeyWithIV(TrophyKey.data(), TrophyKey.size(), TrophyIV.data()); // step 2: decrypt efsm. CryptoPP::CBC_Mode::Decryption decrypt; diff --git a/src/core/crypto/crypto.h b/src/core/crypto/crypto.h index 83249bd7d..b5d8104b5 100644 --- a/src/core/crypto/crypto.h +++ b/src/core/crypto/crypto.h @@ -32,7 +32,8 @@ public: void aesCbcCfb128DecryptEntry(std::span ivkey, std::span ciphertext, std::span decrypted); - void decryptEFSM(std::span, std::span efsmIv, + void decryptEFSM(std::span trophyKey, + std::span NPcommID, std::span efsmIv, std::span ciphertext, std::span decrypted); void PfsGenCryptoKey(std::span ekpfs, std::span seed, diff --git a/src/core/file_format/trp.cpp b/src/core/file_format/trp.cpp index 2ca88c778..d25c93c3f 100644 --- a/src/core/file_format/trp.cpp +++ b/src/core/file_format/trp.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/config.h" #include "common/logging/log.h" #include "common/path_util.h" #include "trp.h" @@ -33,12 +34,29 @@ static void removePadding(std::vector& vec) { } } +static void hexToBytes(const char* hex, unsigned char* dst) { + for (size_t i = 0; hex[i] != 0; i++) { + const unsigned char value = (hex[i] < 0x3A) ? (hex[i] - 0x30) : (hex[i] - 0x37); + dst[i / 2] |= ((i % 2) == 0) ? (value << 4) : (value); + } +} + bool TRP::Extract(const std::filesystem::path& trophyPath, const std::string titleId) { std::filesystem::path gameSysDir = trophyPath / "sce_sys/trophy/"; if (!std::filesystem::exists(gameSysDir)) { LOG_CRITICAL(Common_Filesystem, "Game sce_sys directory doesn't exist"); return false; } + + const auto user_key_str = Config::getTrophyKey(); + if (user_key_str.size() != 32) { + LOG_CRITICAL(Common_Filesystem, "Trophy decryption key is not specified"); + return false; + } + + std::array user_key{}; + hexToBytes(user_key_str.c_str(), user_key.data()); + for (int index = 0; const auto& it : std::filesystem::directory_iterator(gameSysDir)) { if (it.is_regular_file()) { GetNPcommID(trophyPath, index); @@ -97,7 +115,7 @@ bool TRP::Extract(const std::filesystem::path& trophyPath, const std::string tit return false; } file.Read(ESFM); - crypto.decryptEFSM(np_comm_id, esfmIv, ESFM, XML); // decrypt + crypto.decryptEFSM(user_key, np_comm_id, esfmIv, ESFM, XML); // decrypt removePadding(XML); std::string xml_name = entry.entry_name; size_t pos = xml_name.find("ESFM"); From 40211642caf32922a13b57703b8a6488a107f716 Mon Sep 17 00:00:00 2001 From: psucien Date: Wed, 1 Jan 2025 21:04:59 +0100 Subject: [PATCH 35/53] kernel: memory: PRT mapped area setter/getter --- src/core/libraries/kernel/memory.cpp | 39 ++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/src/core/libraries/kernel/memory.cpp b/src/core/libraries/kernel/memory.cpp index 7d326cbbf..b18d5f570 100644 --- a/src/core/libraries/kernel/memory.cpp +++ b/src/core/libraries/kernel/memory.cpp @@ -505,6 +505,41 @@ int PS4_SYSV_ABI posix_munmap(void* addr, size_t len) { return result; } +static constexpr int MAX_PTR_APERTURES = 3; +static constexpr VAddr PRT_AREA_START_ADDR = 0x1000000000; +static constexpr size_t PRT_AREA_SIZE = 0xec00000000; +static std::array, MAX_PTR_APERTURES> PrtApertures{}; + +int PS4_SYSV_ABI sceKernelSetPrtAperture(int id, VAddr address, size_t size) { + if (id < 0 || id >= MAX_PTR_APERTURES) { + return ORBIS_KERNEL_ERROR_EINVAL; + } + + if (address < PRT_AREA_START_ADDR || address + size > PRT_AREA_START_ADDR + PRT_AREA_SIZE) { + return ORBIS_KERNEL_ERROR_EINVAL; + } + + if (address % 4096 != 0) { + return ORBIS_KERNEL_ERROR_EINVAL; + } + + LOG_WARNING(Kernel_Vmm, + "PRT aperture id = {}, address = {:#x}, size = {:#x} is set but not used", id, + address, size); + + PrtApertures[id] = {address, size}; + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceKernelGetPrtAperture(int id, VAddr* addres, size_t* size) { + if (id < 0 || id >= MAX_PTR_APERTURES) { + return ORBIS_KERNEL_ERROR_EINVAL; + } + + std::tie(*addres, *size) = PrtApertures[id]; + return ORBIS_OK; +} + void RegisterMemory(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("rTXw65xmLIA", "libkernel", 1, "libkernel", 1, 1, sceKernelAllocateDirectMemory); LIB_FUNCTION("B+vc2AO2Zrc", "libkernel", 1, "libkernel", 1, 1, @@ -551,6 +586,10 @@ void RegisterMemory(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("BPE9s9vQQXo", "libScePosix", 1, "libkernel", 1, 1, posix_mmap); LIB_FUNCTION("UqDGjXA5yUM", "libkernel", 1, "libkernel", 1, 1, posix_munmap); LIB_FUNCTION("UqDGjXA5yUM", "libScePosix", 1, "libkernel", 1, 1, posix_munmap); + + // PRT memory management + LIB_FUNCTION("BohYr-F7-is", "libkernel", 1, "libkernel", 1, 1, sceKernelSetPrtAperture); + LIB_FUNCTION("L0v2Go5jOuM", "libkernel", 1, "libkernel", 1, 1, sceKernelGetPrtAperture); } } // namespace Libraries::Kernel From 46720e756b82ccb9e2ae86a7100dd2e91587c108 Mon Sep 17 00:00:00 2001 From: Vladislav Mikhalin Date: Wed, 1 Jan 2025 23:08:04 +0300 Subject: [PATCH 36/53] infra: more clarifications in issue templates (#2010) --- .github/ISSUE_TEMPLATE/game-bug-report.yaml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/game-bug-report.yaml b/.github/ISSUE_TEMPLATE/game-bug-report.yaml index 407ee2fe3..2d984b697 100644 --- a/.github/ISSUE_TEMPLATE/game-bug-report.yaml +++ b/.github/ISSUE_TEMPLATE/game-bug-report.yaml @@ -13,7 +13,11 @@ body: **Please do not make support requests on GitHub. Our issue tracker is for tracking bugs and feature requests only. If you have a support request or are unsure about the nature of your issue please contact us on [discord](https://discord.gg/bFJxfftGW6).** - You can also check the [Game Compatibility Repository](https://github.com/shadps4-emu/shadps4-game-compatibility) for the information about the status of the game. + This repository does not provide support for modded games. You should perform and test a clean game installation before submitting an issue. + + This repository does not provide support for game patches. If you are having issues with patches please refer to [Cheats and Patches Repository](https://github.com/shadps4-emu/ps4_cheats). + + Before submitting an issue please check [Game Compatibility Repository](https://github.com/shadps4-emu/shadps4-game-compatibility) for the information about the status of the game. Please make an effort to make sure your issue isn't already reported. @@ -21,15 +25,15 @@ body: - type: checkboxes id: checklist attributes: - label: Checklist + label: Checklist (we expect you to perform these steps before opening the issue) options: - label: I have searched for a similar issue in this repository and did not find one. required: true - label: I am using an official build obtained from [releases](https://github.com/shadps4-emu/shadPS4/releases) or updated one of those builds using its in-app updater. required: true - - label: I have re-dumped the game and performed a clean install without mods. + - label: I have re-dumped the game and performed a clean install without mods and the issue is still present. required: true - - label: I have disabled all patches and cheats. + - label: I have disabled all patches and cheats and the issue is still present. required: true - label: I have all the required [system modules](https://github.com/shadps4-emu/shadps4-game-compatibility?tab=readme-ov-file#informations) installed. required: true From 4e0757ed550d5dda41c8389e1d9f6eb27a2b7035 Mon Sep 17 00:00:00 2001 From: georgemoralis Date: Wed, 1 Jan 2025 23:08:56 +0200 Subject: [PATCH 37/53] Removed LLE libs (#2012) * Removed LLE fiber, JpegEnc modules . HLE replacements are good enough * fixup --- src/core/libraries/libs.cpp | 4 ++++ src/emulator.cpp | 4 +--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/core/libraries/libs.cpp b/src/core/libraries/libs.cpp index c30c2d7c3..49cd54a5b 100644 --- a/src/core/libraries/libs.cpp +++ b/src/core/libraries/libs.cpp @@ -47,6 +47,8 @@ #include "core/libraries/videodec/videodec.h" #include "core/libraries/videodec/videodec2.h" #include "core/libraries/videoout/video_out.h" +#include "fiber/fiber.h" +#include "jpeg/jpegenc.h" namespace Libraries { @@ -93,6 +95,8 @@ void InitHLELibs(Core::Loader::SymbolsResolver* sym) { Libraries::Videodec::RegisterlibSceVideodec(sym); Libraries::RazorCpu::RegisterlibSceRazorCpu(sym); Libraries::Move::RegisterlibSceMove(sym); + Libraries::Fiber::RegisterlibSceFiber(sym); + Libraries::JpegEnc::RegisterlibSceJpegEnc(sym); } } // namespace Libraries diff --git a/src/emulator.cpp b/src/emulator.cpp index dbe21a141..4f0c61236 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -282,16 +282,14 @@ void Emulator::Run(const std::filesystem::path& file) { } void Emulator::LoadSystemModules(const std::filesystem::path& file, std::string game_serial) { - constexpr std::array ModulesToLoad{ + constexpr std::array ModulesToLoad{ {{"libSceNgs2.sprx", &Libraries::Ngs2::RegisterlibSceNgs2}, - {"libSceFiber.sprx", &Libraries::Fiber::RegisterlibSceFiber}, {"libSceUlt.sprx", nullptr}, {"libSceJson.sprx", nullptr}, {"libSceJson2.sprx", nullptr}, {"libSceLibcInternal.sprx", &Libraries::LibcInternal::RegisterlibSceLibcInternal}, {"libSceDiscMap.sprx", &Libraries::DiscMap::RegisterlibSceDiscMap}, {"libSceRtc.sprx", &Libraries::Rtc::RegisterlibSceRtc}, - {"libSceJpegEnc.sprx", &Libraries::JpegEnc::RegisterlibSceJpegEnc}, {"libSceCesCs.sprx", nullptr}, {"libSceFont.sprx", nullptr}, {"libSceFontFt.sprx", nullptr}, From 8b66e9f78d9c416b6bd487dc4aeda86c2ec2f37f Mon Sep 17 00:00:00 2001 From: DanielSvoboda Date: Thu, 2 Jan 2025 04:15:08 -0300 Subject: [PATCH 38/53] GUI: Settings - Trophy Key (#2013) * GUI: Settings-Trophy Key * - * QLineEdit::Password * clang * size --- src/qt_gui/settings_dialog.cpp | 10 + src/qt_gui/settings_dialog.ui | 617 ++++++++++++++++--------------- src/qt_gui/translations/ar.ts | 15 + src/qt_gui/translations/da_DK.ts | 15 + src/qt_gui/translations/de.ts | 15 + src/qt_gui/translations/el.ts | 15 + src/qt_gui/translations/en.ts | 15 + src/qt_gui/translations/es_ES.ts | 15 + src/qt_gui/translations/fa_IR.ts | 15 + src/qt_gui/translations/fi.ts | 15 + src/qt_gui/translations/fr.ts | 15 + src/qt_gui/translations/hu_HU.ts | 15 + src/qt_gui/translations/id.ts | 15 + src/qt_gui/translations/it.ts | 15 + src/qt_gui/translations/ja_JP.ts | 15 + src/qt_gui/translations/ko_KR.ts | 15 + src/qt_gui/translations/lt_LT.ts | 15 + src/qt_gui/translations/nb.ts | 15 + src/qt_gui/translations/nl.ts | 15 + src/qt_gui/translations/pl_PL.ts | 15 + src/qt_gui/translations/pt_BR.ts | 15 + src/qt_gui/translations/ro_RO.ts | 15 + src/qt_gui/translations/ru_RU.ts | 15 + src/qt_gui/translations/sq.ts | 15 + src/qt_gui/translations/tr_TR.ts | 15 + src/qt_gui/translations/uk_UA.ts | 15 + src/qt_gui/translations/vi_VN.ts | 15 + src/qt_gui/translations/zh_CN.ts | 15 + src/qt_gui/translations/zh_TW.ts | 15 + 29 files changed, 736 insertions(+), 296 deletions(-) diff --git a/src/qt_gui/settings_dialog.cpp b/src/qt_gui/settings_dialog.cpp index 5cd0a4d65..6d76a5318 100644 --- a/src/qt_gui/settings_dialog.cpp +++ b/src/qt_gui/settings_dialog.cpp @@ -213,6 +213,8 @@ SettingsDialog::SettingsDialog(std::span physical_devices, ui->showSplashCheckBox->installEventFilter(this); ui->discordRPCCheckbox->installEventFilter(this); ui->userName->installEventFilter(this); + ui->label_Trophy->installEventFilter(this); + ui->trophyKeyLineEdit->installEventFilter(this); ui->logTypeGroupBox->installEventFilter(this); ui->logFilter->installEventFilter(this); #ifdef ENABLE_UPDATER @@ -307,6 +309,9 @@ void SettingsDialog::LoadValuesFromConfig() { QString::fromStdString(toml::find_or(data, "General", "logFilter", ""))); ui->userNameLineEdit->setText( QString::fromStdString(toml::find_or(data, "General", "userName", "shadPS4"))); + ui->trophyKeyLineEdit->setText( + QString::fromStdString(toml::find_or(data, "Keys", "TrophyKey", ""))); + ui->trophyKeyLineEdit->setEchoMode(QLineEdit::Password); ui->debugDump->setChecked(toml::find_or(data, "Debug", "DebugDump", false)); ui->vkValidationCheckBox->setChecked(toml::find_or(data, "Vulkan", "validation", false)); ui->vkSyncValidationCheckBox->setChecked( @@ -419,6 +424,10 @@ void SettingsDialog::updateNoteTextEdit(const QString& elementName) { text = tr("discordRPCCheckbox"); } else if (elementName == "userName") { text = tr("userName"); + } else if (elementName == "label_Trophy") { + text = tr("TrophyKey"); + } else if (elementName == "trophyKeyLineEdit") { + text = tr("TrophyKey"); } else if (elementName == "logTypeGroupBox") { text = tr("logTypeGroupBox"); } else if (elementName == "logFilter") { @@ -529,6 +538,7 @@ void SettingsDialog::UpdateSettings() { Config::setLogType(ui->logTypeComboBox->currentText().toStdString()); Config::setLogFilter(ui->logFilterLineEdit->text().toStdString()); Config::setUserName(ui->userNameLineEdit->text().toStdString()); + Config::setTrophyKey(ui->trophyKeyLineEdit->text().toStdString()); Config::setCursorState(ui->hideCursorComboBox->currentIndex()); Config::setCursorHideTimeout(ui->idleTimeoutSpinBox->value()); Config::setGpuId(ui->graphicsAdapterBox->currentIndex() - 1); diff --git a/src/qt_gui/settings_dialog.ui b/src/qt_gui/settings_dialog.ui index af1edb0dd..2e7e3db37 100644 --- a/src/qt_gui/settings_dialog.ui +++ b/src/qt_gui/settings_dialog.ui @@ -11,8 +11,8 @@ 0 0 - 950 - 780 + 970 + 670 @@ -67,8 +67,8 @@ 0 0 - 822 - 487 + 946 + 536 @@ -77,87 +77,7 @@ 0 - - - - - - Logger - - - - - - - 0 - - - 0 - - - 0 - - - 0 - - - - - Log Type - - - - - - - async - - - - - sync - - - - - - - - - - - - - - 6 - - - 0 - - - - - - - Log Filter - - - - - - - - - - - - - - - - - - + @@ -194,7 +114,7 @@ - + @@ -268,10 +188,10 @@ - + - -1 + 6 QLayout::SizeConstraint::SetDefaultConstraint @@ -436,138 +356,8 @@ - - - - - - 0 - 0 - - - - - 0 - 0 - - - - GUI Settings - - - - 1 - - - 11 - - - - - Disable Trophy Pop-ups - - - - - - - - 0 - 0 - - - - Play title music - - - - - - - 1 - - - 0 - - - - - Qt::Orientation::Vertical - - - QSizePolicy::Policy::Fixed - - - - 20 - 13 - - - - - - - - - 0 - 0 - - - - - 16777215 - 16777215 - - - - Volume - - - - - - - Set the volume of the background music. - - - 100 - - - 10 - - - 20 - - - 50 - - - Qt::Orientation::Horizontal - - - false - - - false - - - QSlider::TickPosition::NoTicks - - - 10 - - - - - - - - - - - - + @@ -638,6 +428,160 @@ + + + + + + + 0 + 0 + + + + + 0 + 0 + + + + GUI Settings + + + + 1 + + + 11 + + + + + + 0 + 0 + + + + Play title music + + + + + + + 1 + + + 0 + + + + + + 0 + 0 + + + + + 16777215 + 16777215 + + + + Volume + + + + + + + Set the volume of the background music. + + + 100 + + + 10 + + + 20 + + + 50 + + + Qt::Orientation::Horizontal + + + false + + + false + + + QSlider::TickPosition::NoTicks + + + 10 + + + + + + + 6 + + + 0 + + + + + + + Trophy + + + + + + Disable Trophy Pop-ups + + + + + + + Trophy Key + + + + + + + + 0 + 0 + + + + + + + + + + + + + + + + + + @@ -655,8 +599,8 @@ 0 0 - 396 - 222 + 926 + 536 @@ -946,8 +890,8 @@ 0 0 - 536 - 192 + 926 + 536 @@ -1197,8 +1141,8 @@ 0 0 - 146 - 215 + 926 + 536 @@ -1211,18 +1155,25 @@ - - - Remove + + + 0 - - - - - - Add... - - + + + + Add... + + + + + + + Remove + + + + @@ -1263,71 +1214,145 @@ 0 0 - 288 - 163 + 926 + 536 - + + + 0 + + + 0 + - - - true - - - General - - - Qt::AlignmentFlag::AlignLeading|Qt::AlignmentFlag::AlignLeft|Qt::AlignmentFlag::AlignTop - - - - - - Enable Debug Dumping - - - - - - - Qt::Orientation::Vertical - - - QSizePolicy::Policy::MinimumExpanding - - - - 0 - 0 - - - - - - - - Enable Vulkan Validation Layers - - - - - - - Enable Vulkan Synchronization Validation - - - - - - - Enable RenderDoc Debugging - - - - - + + + + + true + + + General + + + Qt::AlignmentFlag::AlignLeading|Qt::AlignmentFlag::AlignLeft|Qt::AlignmentFlag::AlignTop + + + + + + Enable Debug Dumping + + + + + + + Enable Vulkan Validation Layers + + + + + + + Enable Vulkan Synchronization Validation + + + + + + + Enable RenderDoc Debugging + + + + + + + + + + + + + + Logger + + + + + + + 0 + + + 0 + + + 0 + + + 0 + + + + + Log Type + + + + + + + async + + + + + sync + + + + + + + + + + + + + + 6 + + + 0 + + + + + + + Log Filter + + + + + + + + + + + + + + + + diff --git a/src/qt_gui/translations/ar.ts b/src/qt_gui/translations/ar.ts index 1f65db04a..9296ef9a4 100644 --- a/src/qt_gui/translations/ar.ts +++ b/src/qt_gui/translations/ar.ts @@ -537,6 +537,16 @@ Username اسم المستخدم + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName اسم المستخدم:\nيضبط اسم حساب PS4، الذي قد يتم عرضه في بعض الألعاب. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + logTypeGroupBox diff --git a/src/qt_gui/translations/da_DK.ts b/src/qt_gui/translations/da_DK.ts index 943e2d092..ef14d1496 100644 --- a/src/qt_gui/translations/da_DK.ts +++ b/src/qt_gui/translations/da_DK.ts @@ -537,6 +537,16 @@ Username Username + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Brugernavn:\nIndstiller PS4-kontoens navn, som kan blive vist i nogle spil. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + logTypeGroupBox diff --git a/src/qt_gui/translations/de.ts b/src/qt_gui/translations/de.ts index cbbef8215..d4587fc87 100644 --- a/src/qt_gui/translations/de.ts +++ b/src/qt_gui/translations/de.ts @@ -537,6 +537,16 @@ Username Benutzername + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Benutzername:\nLegt den Namen des PS4-Kontos fest, der in einigen Spielen angezeigt werden kann. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + logTypeGroupBox diff --git a/src/qt_gui/translations/el.ts b/src/qt_gui/translations/el.ts index 8737f5216..671676d62 100644 --- a/src/qt_gui/translations/el.ts +++ b/src/qt_gui/translations/el.ts @@ -537,6 +537,16 @@ Username Username + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Όνομα Χρήστη:\nΟρίζει το όνομα του λογαριασμού PS4, το οποίο μπορεί να εμφανιστεί σε ορισμένα παιχνίδια. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + logTypeGroupBox diff --git a/src/qt_gui/translations/en.ts b/src/qt_gui/translations/en.ts index 692aa527e..326cf27b3 100644 --- a/src/qt_gui/translations/en.ts +++ b/src/qt_gui/translations/en.ts @@ -537,6 +537,16 @@ Username Username + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Username:\nSets the PS4's account username, which may be displayed by some games. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + logTypeGroupBox diff --git a/src/qt_gui/translations/es_ES.ts b/src/qt_gui/translations/es_ES.ts index 70be2253d..775f78958 100644 --- a/src/qt_gui/translations/es_ES.ts +++ b/src/qt_gui/translations/es_ES.ts @@ -537,6 +537,16 @@ Username Nombre de usuario + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Nombre de Usuario:\nEstablece el nombre de usuario de la cuenta de PS4, que puede ser mostrado por algunos juegos. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + logTypeGroupBox diff --git a/src/qt_gui/translations/fa_IR.ts b/src/qt_gui/translations/fa_IR.ts index 54187cf9b..eb60613d2 100644 --- a/src/qt_gui/translations/fa_IR.ts +++ b/src/qt_gui/translations/fa_IR.ts @@ -537,6 +537,16 @@ Username نام کاربری + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName نام کاربری:\nنام کاربری حساب PS4 را تنظیم می‌کند که ممکن است توسط برخی بازی‌ها نمایش داده شود. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + logTypeGroupBox diff --git a/src/qt_gui/translations/fi.ts b/src/qt_gui/translations/fi.ts index bdc1eb703..e7af0f986 100644 --- a/src/qt_gui/translations/fi.ts +++ b/src/qt_gui/translations/fi.ts @@ -537,6 +537,16 @@ Username Username + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Käyttäjänimi:\nAsettaa PS4-tilin käyttäjänimen, joka voi näkyä joissakin peleissä. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + logTypeGroupBox diff --git a/src/qt_gui/translations/fr.ts b/src/qt_gui/translations/fr.ts index 19b0f9358..aa2abed78 100644 --- a/src/qt_gui/translations/fr.ts +++ b/src/qt_gui/translations/fr.ts @@ -537,6 +537,16 @@ Username Nom d'utilisateur + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Nom d'utilisateur:\nDéfinit le nom d'utilisateur du compte PS4, qui peut être affiché par certains jeux. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + logTypeGroupBox diff --git a/src/qt_gui/translations/hu_HU.ts b/src/qt_gui/translations/hu_HU.ts index bc337f2cd..51f149422 100644 --- a/src/qt_gui/translations/hu_HU.ts +++ b/src/qt_gui/translations/hu_HU.ts @@ -537,6 +537,16 @@ Username Felhasználónév + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Felhasználónév:\nBeállítja a PS4 fiók felhasználónevét, amelyet egyes játékok megjeleníthetnek. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + logTypeGroupBox diff --git a/src/qt_gui/translations/id.ts b/src/qt_gui/translations/id.ts index 7a0bf5d05..836bcf2a2 100644 --- a/src/qt_gui/translations/id.ts +++ b/src/qt_gui/translations/id.ts @@ -537,6 +537,16 @@ Username Username + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Nama Pengguna:\nMenetapkan nama pengguna akun PS4, yang mungkin ditampilkan oleh beberapa permainan. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + logTypeGroupBox diff --git a/src/qt_gui/translations/it.ts b/src/qt_gui/translations/it.ts index 1391fbc55..6e6022c17 100644 --- a/src/qt_gui/translations/it.ts +++ b/src/qt_gui/translations/it.ts @@ -537,6 +537,16 @@ Username Nome Utente + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Nome Utente:\nImposta il nome utente dell'account PS4, che potrebbe essere visualizzato da alcuni giochi. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + logTypeGroupBox diff --git a/src/qt_gui/translations/ja_JP.ts b/src/qt_gui/translations/ja_JP.ts index 58f213e03..573a05e45 100644 --- a/src/qt_gui/translations/ja_JP.ts +++ b/src/qt_gui/translations/ja_JP.ts @@ -537,6 +537,16 @@ Username ユーザー名 + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName ユーザー名:\nPS4のアカウントユーザー名を設定します。これは、一部のゲームで表示される場合があります。 + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + logTypeGroupBox diff --git a/src/qt_gui/translations/ko_KR.ts b/src/qt_gui/translations/ko_KR.ts index 75a1b53cf..8b4ac76f3 100644 --- a/src/qt_gui/translations/ko_KR.ts +++ b/src/qt_gui/translations/ko_KR.ts @@ -537,6 +537,16 @@ Username Username + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Username:\nSets the PS4's account username, which may be displayed by some games. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + logTypeGroupBox diff --git a/src/qt_gui/translations/lt_LT.ts b/src/qt_gui/translations/lt_LT.ts index 092521fdf..bbb563908 100644 --- a/src/qt_gui/translations/lt_LT.ts +++ b/src/qt_gui/translations/lt_LT.ts @@ -537,6 +537,16 @@ Username Username + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Vartotojo vardas:\nNustato PS4 paskyros vartotojo vardą, kuris gali būti rodomas kai kuriuose žaidimuose. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + logTypeGroupBox diff --git a/src/qt_gui/translations/nb.ts b/src/qt_gui/translations/nb.ts index cc41573db..6c8e4c38a 100644 --- a/src/qt_gui/translations/nb.ts +++ b/src/qt_gui/translations/nb.ts @@ -537,6 +537,16 @@ Username Brukernavn + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Brukernavn:\nAngir brukernavnet for PS4-kontoen, som kan vises av enkelte spill. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + logTypeGroupBox diff --git a/src/qt_gui/translations/nl.ts b/src/qt_gui/translations/nl.ts index 5cd4a4224..d9da2253e 100644 --- a/src/qt_gui/translations/nl.ts +++ b/src/qt_gui/translations/nl.ts @@ -537,6 +537,16 @@ Username Username + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Gebruikersnaam:\nStelt de gebruikersnaam van het PS4-account in, die door sommige games kan worden weergegeven. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + logTypeGroupBox diff --git a/src/qt_gui/translations/pl_PL.ts b/src/qt_gui/translations/pl_PL.ts index b85393bb0..8de1c903c 100644 --- a/src/qt_gui/translations/pl_PL.ts +++ b/src/qt_gui/translations/pl_PL.ts @@ -537,6 +537,16 @@ Username Nazwa użytkownika + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Nazwa użytkownika:\nUstala nazwę użytkownika konta PS4, która może być wyświetlana w niektórych grach. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + logTypeGroupBox diff --git a/src/qt_gui/translations/pt_BR.ts b/src/qt_gui/translations/pt_BR.ts index 8ab8db093..bf806fe97 100644 --- a/src/qt_gui/translations/pt_BR.ts +++ b/src/qt_gui/translations/pt_BR.ts @@ -537,6 +537,16 @@ Username Nome de usuário + + + Trophy Key + Trophy Key + + + + Trophy + Troféus + Logger @@ -1236,6 +1246,11 @@ userName Nome de usuário:\nDefine o nome de usuário da conta PS4 que pode ser exibido por alguns jogos. + + + TrophyKey + Trophy Key:\nChave usada para descriptografar troféus.\nDeve conter apenas os caracteres hexadecimais de 'Trophy Key, type Release (CEX)', sem vírgulas ou 0x + logTypeGroupBox diff --git a/src/qt_gui/translations/ro_RO.ts b/src/qt_gui/translations/ro_RO.ts index 00547d6ba..5cbced635 100644 --- a/src/qt_gui/translations/ro_RO.ts +++ b/src/qt_gui/translations/ro_RO.ts @@ -537,6 +537,16 @@ Username Username + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Nume utilizator:\nSetează numele de utilizator al contului PS4, care poate fi afișat de unele jocuri. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + logTypeGroupBox diff --git a/src/qt_gui/translations/ru_RU.ts b/src/qt_gui/translations/ru_RU.ts index 505a05a3e..f534acc0d 100644 --- a/src/qt_gui/translations/ru_RU.ts +++ b/src/qt_gui/translations/ru_RU.ts @@ -537,6 +537,16 @@ Username Имя пользователя + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Имя пользователя:\nУстановите имя пользователя аккаунта PS4. Это может отображаться в некоторых играх. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + logTypeGroupBox diff --git a/src/qt_gui/translations/sq.ts b/src/qt_gui/translations/sq.ts index 0c318f4f7..0031ab305 100644 --- a/src/qt_gui/translations/sq.ts +++ b/src/qt_gui/translations/sq.ts @@ -537,6 +537,16 @@ Username Përdoruesi + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Përdoruesi:\nPërcakton emrin e përdoruesit të llogarisë PS4, i cili mund të shfaqet nga disa lojra. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + logTypeGroupBox diff --git a/src/qt_gui/translations/tr_TR.ts b/src/qt_gui/translations/tr_TR.ts index 2845af462..f671dab2a 100644 --- a/src/qt_gui/translations/tr_TR.ts +++ b/src/qt_gui/translations/tr_TR.ts @@ -537,6 +537,16 @@ Username Kullanıcı Adı + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Kullanıcı Adı:\nBazı oyunlar tarafından gösterilebilen PS4 hesabının kullanıcı adını ayarlar. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + logTypeGroupBox diff --git a/src/qt_gui/translations/uk_UA.ts b/src/qt_gui/translations/uk_UA.ts index 8abfca435..f6834e818 100644 --- a/src/qt_gui/translations/uk_UA.ts +++ b/src/qt_gui/translations/uk_UA.ts @@ -537,6 +537,16 @@ Username Ім'я користувача + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Ім'я користувача:\nВстановіть ім'я користувача акаунта PS4. Це може відображатися в деяких іграх. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + logTypeGroupBox diff --git a/src/qt_gui/translations/vi_VN.ts b/src/qt_gui/translations/vi_VN.ts index 7d0e9a2cd..315fcac7e 100644 --- a/src/qt_gui/translations/vi_VN.ts +++ b/src/qt_gui/translations/vi_VN.ts @@ -537,6 +537,16 @@ Username Username + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Tên người dùng:\nChọn tên người dùng của tài khoản PS4, có thể được một số trò chơi hiển thị. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + logTypeGroupBox diff --git a/src/qt_gui/translations/zh_CN.ts b/src/qt_gui/translations/zh_CN.ts index 32b838fac..5ccd680b5 100644 --- a/src/qt_gui/translations/zh_CN.ts +++ b/src/qt_gui/translations/zh_CN.ts @@ -537,6 +537,16 @@ Username 用户名 + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName 用户名:\n设置 PS4 帐户的用户名,某些游戏中可能会显示此名称。 + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + logTypeGroupBox diff --git a/src/qt_gui/translations/zh_TW.ts b/src/qt_gui/translations/zh_TW.ts index 3d27267b6..fc1bad1a3 100644 --- a/src/qt_gui/translations/zh_TW.ts +++ b/src/qt_gui/translations/zh_TW.ts @@ -537,6 +537,16 @@ Username Username + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName 用戶名:\n設定PS4帳號的用戶名,某些遊戲中可能會顯示。 + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + logTypeGroupBox From 3109bd245ff74b512ba96d70221eb9b501ebb780 Mon Sep 17 00:00:00 2001 From: Vinicius Rangel Date: Thu, 2 Jan 2025 04:42:53 -0300 Subject: [PATCH 39/53] savedata: Avoid Save memory concurrency --- src/core/libraries/save_data/savedata.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/core/libraries/save_data/savedata.cpp b/src/core/libraries/save_data/savedata.cpp index 66899fb34..b573ded1e 100644 --- a/src/core/libraries/save_data/savedata.cpp +++ b/src/core/libraries/save_data/savedata.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include #include @@ -1139,10 +1140,6 @@ Error PS4_SYSV_ABI sceSaveDataGetSaveDataMemory2(OrbisSaveDataMemoryGet2* getPar LOG_INFO(Lib_SaveData, "called without save memory initialized"); return Error::MEMORY_NOT_READY; } - if (SaveMemory::IsSaving()) { - LOG_TRACE(Lib_SaveData, "called while saving"); - return Error::BUSY_FOR_SAVING; - } LOG_DEBUG(Lib_SaveData, "called"); auto data = getParam->data; if (data != nullptr) { @@ -1502,8 +1499,14 @@ Error PS4_SYSV_ABI sceSaveDataSetSaveDataMemory2(const OrbisSaveDataMemorySet2* return Error::MEMORY_NOT_READY; } if (SaveMemory::IsSaving()) { - LOG_TRACE(Lib_SaveData, "called while saving"); - return Error::BUSY_FOR_SAVING; + int count = 0; + while (++count < 100 && SaveMemory::IsSaving()) { // try for more 10 seconds + std::this_thread::sleep_for(chrono::milliseconds(100)); + } + if (SaveMemory::IsSaving()) { + LOG_TRACE(Lib_SaveData, "called while saving"); + return Error::BUSY_FOR_SAVING; + } } LOG_DEBUG(Lib_SaveData, "called"); auto data = setParam->data; @@ -1584,8 +1587,8 @@ Error PS4_SYSV_ABI sceSaveDataSetupSaveDataMemory2(const OrbisSaveDataMemorySetu } else { SaveMemory::SetIcon(nullptr, 0); } + SaveMemory::TriggerSaveWithoutEvent(); } - SaveMemory::TriggerSaveWithoutEvent(); if (g_fw_ver >= ElfInfo::FW_45 && result != nullptr) { result->existedMemorySize = existed_size; } From 33afc00c3a39266aa0151bb6bb5da2acf52a7349 Mon Sep 17 00:00:00 2001 From: polyproxy <47796739+polybiusproxy@users.noreply.github.com> Date: Thu, 2 Jan 2025 10:07:48 +0100 Subject: [PATCH 40/53] Update improper wording in translations --- src/qt_gui/translations/ar.ts | 2 +- src/qt_gui/translations/da_DK.ts | 2 +- src/qt_gui/translations/de.ts | 2 +- src/qt_gui/translations/el.ts | 2 +- src/qt_gui/translations/en.ts | 2 +- src/qt_gui/translations/es_ES.ts | 2 +- src/qt_gui/translations/fa_IR.ts | 2 +- src/qt_gui/translations/fi.ts | 2 +- src/qt_gui/translations/fr.ts | 2 +- src/qt_gui/translations/hu_HU.ts | 2 +- src/qt_gui/translations/id.ts | 2 +- src/qt_gui/translations/it.ts | 2 +- src/qt_gui/translations/ja_JP.ts | 2 +- src/qt_gui/translations/ko_KR.ts | 2 +- src/qt_gui/translations/lt_LT.ts | 2 +- src/qt_gui/translations/nb.ts | 2 +- src/qt_gui/translations/nl.ts | 2 +- src/qt_gui/translations/pl_PL.ts | 2 +- src/qt_gui/translations/pt_BR.ts | 2 +- src/qt_gui/translations/ro_RO.ts | 2 +- src/qt_gui/translations/ru_RU.ts | 2 +- src/qt_gui/translations/sq.ts | 2 +- src/qt_gui/translations/tr_TR.ts | 2 +- src/qt_gui/translations/uk_UA.ts | 2 +- src/qt_gui/translations/vi_VN.ts | 2 +- src/qt_gui/translations/zh_CN.ts | 2 +- src/qt_gui/translations/zh_TW.ts | 2 +- 27 files changed, 27 insertions(+), 27 deletions(-) diff --git a/src/qt_gui/translations/ar.ts b/src/qt_gui/translations/ar.ts index 9296ef9a4..e851f59a7 100644 --- a/src/qt_gui/translations/ar.ts +++ b/src/qt_gui/translations/ar.ts @@ -1249,7 +1249,7 @@ TrophyKey - Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. diff --git a/src/qt_gui/translations/da_DK.ts b/src/qt_gui/translations/da_DK.ts index ef14d1496..41319c7ff 100644 --- a/src/qt_gui/translations/da_DK.ts +++ b/src/qt_gui/translations/da_DK.ts @@ -1249,7 +1249,7 @@ TrophyKey - Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. diff --git a/src/qt_gui/translations/de.ts b/src/qt_gui/translations/de.ts index d4587fc87..62897fe24 100644 --- a/src/qt_gui/translations/de.ts +++ b/src/qt_gui/translations/de.ts @@ -1249,7 +1249,7 @@ TrophyKey - Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. diff --git a/src/qt_gui/translations/el.ts b/src/qt_gui/translations/el.ts index 671676d62..43ed81c33 100644 --- a/src/qt_gui/translations/el.ts +++ b/src/qt_gui/translations/el.ts @@ -1249,7 +1249,7 @@ TrophyKey - Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. diff --git a/src/qt_gui/translations/en.ts b/src/qt_gui/translations/en.ts index 326cf27b3..293b5fae7 100644 --- a/src/qt_gui/translations/en.ts +++ b/src/qt_gui/translations/en.ts @@ -1249,7 +1249,7 @@ TrophyKey - Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. diff --git a/src/qt_gui/translations/es_ES.ts b/src/qt_gui/translations/es_ES.ts index 775f78958..096e104e3 100644 --- a/src/qt_gui/translations/es_ES.ts +++ b/src/qt_gui/translations/es_ES.ts @@ -1249,7 +1249,7 @@ TrophyKey - Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. diff --git a/src/qt_gui/translations/fa_IR.ts b/src/qt_gui/translations/fa_IR.ts index eb60613d2..7b93c6769 100644 --- a/src/qt_gui/translations/fa_IR.ts +++ b/src/qt_gui/translations/fa_IR.ts @@ -1249,7 +1249,7 @@ TrophyKey - Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. diff --git a/src/qt_gui/translations/fi.ts b/src/qt_gui/translations/fi.ts index e7af0f986..cdf331796 100644 --- a/src/qt_gui/translations/fi.ts +++ b/src/qt_gui/translations/fi.ts @@ -1249,7 +1249,7 @@ TrophyKey - Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. diff --git a/src/qt_gui/translations/fr.ts b/src/qt_gui/translations/fr.ts index aa2abed78..441eaddb1 100644 --- a/src/qt_gui/translations/fr.ts +++ b/src/qt_gui/translations/fr.ts @@ -1249,7 +1249,7 @@ TrophyKey - Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. diff --git a/src/qt_gui/translations/hu_HU.ts b/src/qt_gui/translations/hu_HU.ts index 51f149422..f6b853e4b 100644 --- a/src/qt_gui/translations/hu_HU.ts +++ b/src/qt_gui/translations/hu_HU.ts @@ -1249,7 +1249,7 @@ TrophyKey - Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. diff --git a/src/qt_gui/translations/id.ts b/src/qt_gui/translations/id.ts index 836bcf2a2..bee61083c 100644 --- a/src/qt_gui/translations/id.ts +++ b/src/qt_gui/translations/id.ts @@ -1249,7 +1249,7 @@ TrophyKey - Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. diff --git a/src/qt_gui/translations/it.ts b/src/qt_gui/translations/it.ts index 6e6022c17..9e375a45e 100644 --- a/src/qt_gui/translations/it.ts +++ b/src/qt_gui/translations/it.ts @@ -1249,7 +1249,7 @@ TrophyKey - Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. diff --git a/src/qt_gui/translations/ja_JP.ts b/src/qt_gui/translations/ja_JP.ts index 573a05e45..409900ade 100644 --- a/src/qt_gui/translations/ja_JP.ts +++ b/src/qt_gui/translations/ja_JP.ts @@ -1249,7 +1249,7 @@ TrophyKey - Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. diff --git a/src/qt_gui/translations/ko_KR.ts b/src/qt_gui/translations/ko_KR.ts index 8b4ac76f3..ab6404a7e 100644 --- a/src/qt_gui/translations/ko_KR.ts +++ b/src/qt_gui/translations/ko_KR.ts @@ -1249,7 +1249,7 @@ TrophyKey - Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. diff --git a/src/qt_gui/translations/lt_LT.ts b/src/qt_gui/translations/lt_LT.ts index bbb563908..0b9c5b542 100644 --- a/src/qt_gui/translations/lt_LT.ts +++ b/src/qt_gui/translations/lt_LT.ts @@ -1249,7 +1249,7 @@ TrophyKey - Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. diff --git a/src/qt_gui/translations/nb.ts b/src/qt_gui/translations/nb.ts index 6c8e4c38a..4d3c4f5af 100644 --- a/src/qt_gui/translations/nb.ts +++ b/src/qt_gui/translations/nb.ts @@ -1249,7 +1249,7 @@ TrophyKey - Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. diff --git a/src/qt_gui/translations/nl.ts b/src/qt_gui/translations/nl.ts index d9da2253e..0cb890186 100644 --- a/src/qt_gui/translations/nl.ts +++ b/src/qt_gui/translations/nl.ts @@ -1249,7 +1249,7 @@ TrophyKey - Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. diff --git a/src/qt_gui/translations/pl_PL.ts b/src/qt_gui/translations/pl_PL.ts index 8de1c903c..1aed08394 100644 --- a/src/qt_gui/translations/pl_PL.ts +++ b/src/qt_gui/translations/pl_PL.ts @@ -1249,7 +1249,7 @@ TrophyKey - Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. diff --git a/src/qt_gui/translations/pt_BR.ts b/src/qt_gui/translations/pt_BR.ts index bf806fe97..cce66c105 100644 --- a/src/qt_gui/translations/pt_BR.ts +++ b/src/qt_gui/translations/pt_BR.ts @@ -1249,7 +1249,7 @@ TrophyKey - Trophy Key:\nChave usada para descriptografar troféus.\nDeve conter apenas os caracteres hexadecimais de 'Trophy Key, type Release (CEX)', sem vírgulas ou 0x + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. diff --git a/src/qt_gui/translations/ro_RO.ts b/src/qt_gui/translations/ro_RO.ts index 5cbced635..63df2ff80 100644 --- a/src/qt_gui/translations/ro_RO.ts +++ b/src/qt_gui/translations/ro_RO.ts @@ -1249,7 +1249,7 @@ TrophyKey - Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. diff --git a/src/qt_gui/translations/ru_RU.ts b/src/qt_gui/translations/ru_RU.ts index f534acc0d..88eff1aeb 100644 --- a/src/qt_gui/translations/ru_RU.ts +++ b/src/qt_gui/translations/ru_RU.ts @@ -1249,7 +1249,7 @@ TrophyKey - Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. diff --git a/src/qt_gui/translations/sq.ts b/src/qt_gui/translations/sq.ts index 0031ab305..1df2a40e2 100644 --- a/src/qt_gui/translations/sq.ts +++ b/src/qt_gui/translations/sq.ts @@ -1249,7 +1249,7 @@ TrophyKey - Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. diff --git a/src/qt_gui/translations/tr_TR.ts b/src/qt_gui/translations/tr_TR.ts index f671dab2a..a03a48660 100644 --- a/src/qt_gui/translations/tr_TR.ts +++ b/src/qt_gui/translations/tr_TR.ts @@ -1249,7 +1249,7 @@ TrophyKey - Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. diff --git a/src/qt_gui/translations/uk_UA.ts b/src/qt_gui/translations/uk_UA.ts index f6834e818..7e0a58ffb 100644 --- a/src/qt_gui/translations/uk_UA.ts +++ b/src/qt_gui/translations/uk_UA.ts @@ -1249,7 +1249,7 @@ TrophyKey - Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. diff --git a/src/qt_gui/translations/vi_VN.ts b/src/qt_gui/translations/vi_VN.ts index 315fcac7e..997c3d3f9 100644 --- a/src/qt_gui/translations/vi_VN.ts +++ b/src/qt_gui/translations/vi_VN.ts @@ -1249,7 +1249,7 @@ TrophyKey - Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. diff --git a/src/qt_gui/translations/zh_CN.ts b/src/qt_gui/translations/zh_CN.ts index 5ccd680b5..fecb8857f 100644 --- a/src/qt_gui/translations/zh_CN.ts +++ b/src/qt_gui/translations/zh_CN.ts @@ -1249,7 +1249,7 @@ TrophyKey - Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. diff --git a/src/qt_gui/translations/zh_TW.ts b/src/qt_gui/translations/zh_TW.ts index fc1bad1a3..293ed81a6 100644 --- a/src/qt_gui/translations/zh_TW.ts +++ b/src/qt_gui/translations/zh_TW.ts @@ -1249,7 +1249,7 @@ TrophyKey - Trophy Key:\nKey used to decrypt trophies.\nMust contain only the hex characters of 'Trophy Key, type Release (CEX)', without commas or 0x + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. From 19269009451665b5126399b8fefa67dbf2706e75 Mon Sep 17 00:00:00 2001 From: polyproxy <47796739+polybiusproxy@users.noreply.github.com> Date: Thu, 2 Jan 2025 12:30:05 +0100 Subject: [PATCH 41/53] hotfix: reset stop source on thread stop --- src/core/libraries/kernel/threads.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/core/libraries/kernel/threads.h b/src/core/libraries/kernel/threads.h index ad1393599..409136968 100644 --- a/src/core/libraries/kernel/threads.h +++ b/src/core/libraries/kernel/threads.h @@ -55,6 +55,9 @@ public: stop.request_stop(); Join(); } + thread = nullptr; + func = nullptr; + stop = std::stop_source{}; } static void* PS4_SYSV_ABI RunWrapper(void* arg) { From 099e685bfff6bed3ede25af96a83fb02b996e917 Mon Sep 17 00:00:00 2001 From: Mahmoud Adel <94652220+AboMedoz@users.noreply.github.com> Date: Thu, 2 Jan 2025 14:29:57 +0200 Subject: [PATCH 42/53] add R16Uint to Format Detiler (#1995) helps with Matterfall --- src/video_core/texture_cache/tile_manager.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index de108843b..fda7e511a 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -32,6 +32,7 @@ static vk::Format DemoteImageFormatForDetiling(vk::Format format) { case vk::Format::eR5G5B5A1UnormPack16: case vk::Format::eR8G8Unorm: case vk::Format::eR16Sfloat: + case vk::Format::eR16Uint: case vk::Format::eR16Unorm: case vk::Format::eD16Unorm: return vk::Format::eR8G8Uint; From 6862c9aad77c2df28486d1ec86204a044a96f481 Mon Sep 17 00:00:00 2001 From: hspir404 Date: Thu, 2 Jan 2025 13:38:51 +0000 Subject: [PATCH 43/53] Speed up LiverpoolToVK::SurfaceFormat (#1982) * Speed up LiverpoolToVK::SurfaceFormat In Bloodborne this shows up as the function with the very highest cumulative "exclusive time". This is true both in scenes that perform poorly, and scenes that perform well. I took (approximately) 10s samples using an 8khz sampling profiler. In the Nightmare Grand Cathedral (looking towards the stairs, at the rest of the level): - Reduced total time from 757.34ms to 82.61ms (out of ~10000ms). - Reduced average frame times by 2ms (though according to the graph, the gap may be as big as 9ms every N frames). In the Hunter's Dream (in the spawn position): - Reduced the total time from 486.50ms to 53.83ms (out of ~10000ms). - Average frame times appear to be roughly the same. These are profiles of the change vs the version currently in the main branch. These improvements also improve things in the `threading` branch. They might improve them even more in that branch, but I didn't bother keeping track of my measurements as well in that branch. I believe this change will still be useful even when that branch is stabilized and merged. It could be there are other bottlenecks in rendering on this branch that are preventing this code from being the critical path in places like the Hunter's Dream, where performance isn't currently as constrained. That might explain why the reduction in call times isn't resulting in a higher frame rate. * Implement SurfaceFormat with derived lookup table instead of switch * Clang format fixes --- .../renderer_vulkan/liverpool_to_vk.cpp | 42 +++++++++++++++---- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index c41b760ba..eba2050e0 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -691,16 +691,40 @@ std::span SurfaceFormats() { return formats; } +// Table 8.13 Data and Image Formats [Sea Islands Series Instruction Set Architecture] +static const size_t amd_gpu_data_format_bit_size = 6; // All values are under 64 +static const size_t amd_gpu_number_format_bit_size = 4; // All values are under 16 + +static size_t GetSurfaceFormatTableIndex(AmdGpu::DataFormat data_format, + AmdGpu::NumberFormat num_format) { + DEBUG_ASSERT(data_format < 1 << amd_gpu_data_format_bit_size); + DEBUG_ASSERT(num_format < 1 << amd_gpu_number_format_bit_size); + size_t result = static_cast(num_format) | + (static_cast(data_format) << amd_gpu_number_format_bit_size); + return result; +} + +static auto surface_format_table = []() constexpr { + std::array + result; + for (auto& entry : result) { + entry = vk::Format::eUndefined; + } + for (const auto& supported_format : SurfaceFormats()) { + result[GetSurfaceFormatTableIndex(supported_format.data_format, + supported_format.number_format)] = + supported_format.vk_format; + } + return result; +}(); + vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) { - const auto& formats = SurfaceFormats(); - const auto format = - std::find_if(formats.begin(), formats.end(), [&](const SurfaceFormatInfo& format_info) { - return format_info.data_format == data_format && - format_info.number_format == num_format; - }); - ASSERT_MSG(format != formats.end(), "Unknown data_format={} and num_format={}", - static_cast(data_format), static_cast(num_format)); - return format->vk_format; + vk::Format result = surface_format_table[GetSurfaceFormatTableIndex(data_format, num_format)]; + bool found = + result != vk::Format::eUndefined || data_format == AmdGpu::DataFormat::FormatInvalid; + ASSERT_MSG(found, "Unknown data_format={} and num_format={}", static_cast(data_format), + static_cast(num_format)); + return result; } static constexpr DepthFormatInfo CreateDepthFormatInfo( From c25447097e51684ff4152e19226f572849c5a9cf Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Thu, 2 Jan 2025 15:39:02 +0200 Subject: [PATCH 44/53] buffer_cache: Improve buffer cache locking contention (#1973) * Improve buffer cache locking contention * buffer_cache: Revert some changes * clang fmt 1 * clang fmt 2 * clang fmt 3 * buffer_cache: Fix build --- src/video_core/buffer_cache/buffer_cache.cpp | 23 +- src/video_core/buffer_cache/buffer_cache.h | 4 +- .../buffer_cache/memory_tracker_base.h | 61 ++--- src/video_core/buffer_cache/word_manager.h | 241 +++++------------- src/video_core/multi_level_page_table.h | 9 + src/video_core/page_manager.cpp | 2 +- src/video_core/page_manager.h | 4 +- 7 files changed, 104 insertions(+), 240 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 0088ea4fa..3ac6a3598 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -54,18 +54,10 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s BufferCache::~BufferCache() = default; void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) { - std::scoped_lock lk{mutex}; const bool is_tracked = IsRegionRegistered(device_addr, size); - if (!is_tracked) { - return; - } - // Mark the page as CPU modified to stop tracking writes. - SCOPE_EXIT { + if (is_tracked) { + // Mark the page as CPU modified to stop tracking writes. memory_tracker.MarkRegionAsCpuModified(device_addr, size); - }; - if (!memory_tracker.IsRegionGpuModified(device_addr, size)) { - // Page has not been modified by the GPU, nothing to do. - return; } } @@ -346,6 +338,7 @@ bool BufferCache::IsRegionRegistered(VAddr addr, size_t size) { ++page; continue; } + std::shared_lock lk{mutex}; Buffer& buffer = slot_buffers[buffer_id]; const VAddr buf_start_addr = buffer.CpuAddr(); const VAddr buf_end_addr = buf_start_addr + buffer.SizeBytes(); @@ -496,8 +489,11 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) { wanted_size = static_cast(device_addr_end - device_addr); const OverlapResult overlap = ResolveOverlaps(device_addr, wanted_size); const u32 size = static_cast(overlap.end - overlap.begin); - const BufferId new_buffer_id = slot_buffers.insert( - instance, scheduler, MemoryUsage::DeviceLocal, overlap.begin, AllFlags, size); + const BufferId new_buffer_id = [&] { + std::scoped_lock lk{mutex}; + return slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, overlap.begin, + AllFlags, size); + }(); auto& new_buffer = slot_buffers[new_buffer_id]; const size_t size_bytes = new_buffer.SizeBytes(); const auto cmdbuf = scheduler.CommandBuffer(); @@ -537,10 +533,8 @@ void BufferCache::ChangeRegister(BufferId buffer_id) { void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, bool is_texel_buffer) { - std::scoped_lock lk{mutex}; boost::container::small_vector copies; u64 total_size_bytes = 0; - u64 largest_copy = 0; VAddr buffer_start = buffer.CpuAddr(); memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) { copies.push_back(vk::BufferCopy{ @@ -549,7 +543,6 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, .size = range_size, }); total_size_bytes += range_size; - largest_copy = std::max(largest_copy, range_size); }); SCOPE_EXIT { if (is_texel_buffer) { diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 0c70fa10b..c367795f1 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -3,7 +3,7 @@ #pragma once -#include +#include #include #include #include @@ -157,7 +157,7 @@ private: StreamBuffer staging_buffer; StreamBuffer stream_buffer; Buffer gds_buffer; - std::mutex mutex; + std::shared_mutex mutex; Common::SlotVector slot_buffers; RangeSet gpu_modified_ranges; vk::BufferView null_buffer_view; diff --git a/src/video_core/buffer_cache/memory_tracker_base.h b/src/video_core/buffer_cache/memory_tracker_base.h index ae61b55f2..d9166b11c 100644 --- a/src/video_core/buffer_cache/memory_tracker_base.h +++ b/src/video_core/buffer_cache/memory_tracker_base.h @@ -15,13 +15,8 @@ namespace VideoCore { class MemoryTracker { public: static constexpr size_t MAX_CPU_PAGE_BITS = 40; - static constexpr size_t HIGHER_PAGE_BITS = 22; - static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; - static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS); static constexpr size_t MANAGER_POOL_SIZE = 32; - static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD; - using Manager = WordManager; public: explicit MemoryTracker(PageManager* tracker_) : tracker{tracker_} {} @@ -30,7 +25,7 @@ public: /// Returns true if a region has been modified from the CPU [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { return IteratePages( - query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { + query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) { return manager->template IsRegionModified(offset, size); }); } @@ -38,52 +33,34 @@ public: /// Returns true if a region has been modified from the GPU [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { return IteratePages( - query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { + query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) { return manager->template IsRegionModified(offset, size); }); } /// Mark region as CPU modified, notifying the device_tracker about this change void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { - IteratePages(dirty_cpu_addr, query_size, - [](Manager* manager, u64 offset, size_t size) { - manager->template ChangeRegionState( - manager->GetCpuAddr() + offset, size); - }); - } - - /// Unmark region as CPU modified, notifying the device_tracker about this change - void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { - IteratePages(dirty_cpu_addr, query_size, - [](Manager* manager, u64 offset, size_t size) { - manager->template ChangeRegionState( - manager->GetCpuAddr() + offset, size); - }); + IteratePages(dirty_cpu_addr, query_size, + [](RegionManager* manager, u64 offset, size_t size) { + manager->template ChangeRegionState( + manager->GetCpuAddr() + offset, size); + }); } /// Mark region as modified from the host GPU void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { - IteratePages(dirty_cpu_addr, query_size, - [](Manager* manager, u64 offset, size_t size) { - manager->template ChangeRegionState( - manager->GetCpuAddr() + offset, size); - }); - } - - /// Unmark region as modified from the host GPU - void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { - IteratePages(dirty_cpu_addr, query_size, - [](Manager* manager, u64 offset, size_t size) { - manager->template ChangeRegionState( - manager->GetCpuAddr() + offset, size); - }); + IteratePages(dirty_cpu_addr, query_size, + [](RegionManager* manager, u64 offset, size_t size) { + manager->template ChangeRegionState( + manager->GetCpuAddr() + offset, size); + }); } /// Call 'func' for each CPU modified range and unmark those pages as CPU modified template void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, Func&& func) { IteratePages(query_cpu_range, query_size, - [&func](Manager* manager, u64 offset, size_t size) { + [&func](RegionManager* manager, u64 offset, size_t size) { manager->template ForEachModifiedRange( manager->GetCpuAddr() + offset, size, func); }); @@ -93,7 +70,7 @@ public: template void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, Func&& func) { IteratePages(query_cpu_range, query_size, - [&func](Manager* manager, u64 offset, size_t size) { + [&func](RegionManager* manager, u64 offset, size_t size) { if constexpr (clear) { manager->template ForEachModifiedRange( manager->GetCpuAddr() + offset, size, func); @@ -114,7 +91,7 @@ private: */ template bool IteratePages(VAddr cpu_address, size_t size, Func&& func) { - using FuncReturn = typename std::invoke_result::type; + using FuncReturn = typename std::invoke_result::type; static constexpr bool BOOL_BREAK = std::is_same_v; std::size_t remaining_size{size}; std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS}; @@ -155,7 +132,7 @@ private: manager_pool.emplace_back(); auto& last_pool = manager_pool.back(); for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) { - std::construct_at(&last_pool[i], tracker, 0, HIGHER_PAGE_SIZE); + std::construct_at(&last_pool[i], tracker, 0); free_managers.push_back(&last_pool[i]); } } @@ -167,9 +144,9 @@ private: } PageManager* tracker; - std::deque> manager_pool; - std::vector free_managers; - std::array top_tier{}; + std::deque> manager_pool; + std::vector free_managers; + std::array top_tier{}; }; } // namespace VideoCore diff --git a/src/video_core/buffer_cache/word_manager.h b/src/video_core/buffer_cache/word_manager.h index ae85d1eb1..7ad33d7a6 100644 --- a/src/video_core/buffer_cache/word_manager.h +++ b/src/video_core/buffer_cache/word_manager.h @@ -3,10 +3,12 @@ #pragma once -#include +#include +#include #include #include -#include "common/div_ceil.h" + +#include "common/spin_lock.h" #include "common/types.h" #include "video_core/page_manager.h" @@ -16,135 +18,32 @@ constexpr u64 PAGES_PER_WORD = 64; constexpr u64 BYTES_PER_PAGE = 4_KB; constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; +constexpr u64 HIGHER_PAGE_BITS = 22; +constexpr u64 HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; +constexpr u64 HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; +constexpr u64 NUM_REGION_WORDS = HIGHER_PAGE_SIZE / BYTES_PER_WORD; + enum class Type { CPU, GPU, Untracked, }; -/// Vector tracking modified pages tightly packed with small vector optimization -template -struct WordsArray { - /// Returns the pointer to the words state - [[nodiscard]] const u64* Pointer(bool is_short) const noexcept { - return is_short ? stack.data() : heap; - } +using WordsArray = std::array; - /// Returns the pointer to the words state - [[nodiscard]] u64* Pointer(bool is_short) noexcept { - return is_short ? stack.data() : heap; - } - - std::array stack{}; ///< Small buffers storage - u64* heap; ///< Not-small buffers pointer to the storage -}; - -template -struct Words { - explicit Words() = default; - explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} { - num_words = Common::DivCeil(size_bytes, BYTES_PER_WORD); - if (IsShort()) { - cpu.stack.fill(~u64{0}); - gpu.stack.fill(0); - untracked.stack.fill(~u64{0}); - } else { - // Share allocation between CPU and GPU pages and set their default values - u64* const alloc = new u64[num_words * 3]; - cpu.heap = alloc; - gpu.heap = alloc + num_words; - untracked.heap = alloc + num_words * 2; - std::fill_n(cpu.heap, num_words, ~u64{0}); - std::fill_n(gpu.heap, num_words, 0); - std::fill_n(untracked.heap, num_words, ~u64{0}); - } - // Clean up tailing bits - const u64 last_word_size = size_bytes % BYTES_PER_WORD; - const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE); - const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD; - const u64 last_word = (~u64{0} << shift) >> shift; - cpu.Pointer(IsShort())[NumWords() - 1] = last_word; - untracked.Pointer(IsShort())[NumWords() - 1] = last_word; - } - - ~Words() { - Release(); - } - - Words& operator=(Words&& rhs) noexcept { - Release(); - size_bytes = rhs.size_bytes; - num_words = rhs.num_words; - cpu = rhs.cpu; - gpu = rhs.gpu; - untracked = rhs.untracked; - rhs.cpu.heap = nullptr; - return *this; - } - - Words(Words&& rhs) noexcept - : size_bytes{rhs.size_bytes}, num_words{rhs.num_words}, cpu{rhs.cpu}, gpu{rhs.gpu}, - untracked{rhs.untracked} { - rhs.cpu.heap = nullptr; - } - - Words& operator=(const Words&) = delete; - Words(const Words&) = delete; - - /// Returns true when the buffer fits in the small vector optimization - [[nodiscard]] bool IsShort() const noexcept { - return num_words <= stack_words; - } - - /// Returns the number of words of the buffer - [[nodiscard]] size_t NumWords() const noexcept { - return num_words; - } - - /// Release buffer resources - void Release() { - if (!IsShort()) { - // CPU written words is the base for the heap allocation - delete[] cpu.heap; - } - } - - template - std::span Span() noexcept { - if constexpr (type == Type::CPU) { - return std::span(cpu.Pointer(IsShort()), num_words); - } else if constexpr (type == Type::GPU) { - return std::span(gpu.Pointer(IsShort()), num_words); - } else if constexpr (type == Type::Untracked) { - return std::span(untracked.Pointer(IsShort()), num_words); - } - } - - template - std::span Span() const noexcept { - if constexpr (type == Type::CPU) { - return std::span(cpu.Pointer(IsShort()), num_words); - } else if constexpr (type == Type::GPU) { - return std::span(gpu.Pointer(IsShort()), num_words); - } else if constexpr (type == Type::Untracked) { - return std::span(untracked.Pointer(IsShort()), num_words); - } - } - - u64 size_bytes = 0; - size_t num_words = 0; - WordsArray cpu; - WordsArray gpu; - WordsArray untracked; -}; - -template -class WordManager { +/** + * Allows tracking CPU and GPU modification of pages in a contigious 4MB virtual address region. + * Information is stored in bitsets for spacial locality and fast update of single pages. + */ +class RegionManager { public: - explicit WordManager(PageManager* tracker_, VAddr cpu_addr_, u64 size_bytes) - : tracker{tracker_}, cpu_addr{cpu_addr_}, words{size_bytes} {} - - explicit WordManager() = default; + explicit RegionManager(PageManager* tracker_, VAddr cpu_addr_) + : tracker{tracker_}, cpu_addr{cpu_addr_} { + cpu.fill(~u64{0}); + gpu.fill(0); + untracked.fill(~u64{0}); + } + explicit RegionManager() = default; void SetCpuAddress(VAddr new_cpu_addr) { cpu_addr = new_cpu_addr; @@ -175,12 +74,12 @@ public: static constexpr bool BOOL_BREAK = std::is_same_v; const size_t start = static_cast(std::max(static_cast(offset), 0LL)); const size_t end = static_cast(std::max(static_cast(offset + size), 0LL)); - if (start >= SizeBytes() || end <= start) { + if (start >= HIGHER_PAGE_SIZE || end <= start) { return; } auto [start_word, start_page] = GetWordPage(start); auto [end_word, end_page] = GetWordPage(end + BYTES_PER_PAGE - 1ULL); - const size_t num_words = NumWords(); + constexpr size_t num_words = NUM_REGION_WORDS; start_word = std::min(start_word, num_words); end_word = std::min(end_word, num_words); const size_t diff = end_word - start_word; @@ -225,21 +124,21 @@ public: */ template void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) { - std::span state_words = words.template Span(); - [[maybe_unused]] std::span untracked_words = words.template Span(); + std::scoped_lock lk{lock}; + std::span state_words = Span(); IterateWords(dirty_addr - cpu_addr, size, [&](size_t index, u64 mask) { if constexpr (type == Type::CPU) { - NotifyPageTracker(index, untracked_words[index], mask); + UpdateProtection(index, untracked[index], mask); } if constexpr (enable) { state_words[index] |= mask; if constexpr (type == Type::CPU) { - untracked_words[index] |= mask; + untracked[index] |= mask; } } else { state_words[index] &= ~mask; if constexpr (type == Type::CPU) { - untracked_words[index] &= ~mask; + untracked[index] &= ~mask; } } }); @@ -255,10 +154,10 @@ public: */ template void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) { + std::scoped_lock lk{lock}; static_assert(type != Type::Untracked); - std::span state_words = words.template Span(); - [[maybe_unused]] std::span untracked_words = words.template Span(); + std::span state_words = Span(); const size_t offset = query_cpu_range - cpu_addr; bool pending = false; size_t pending_offset{}; @@ -269,16 +168,16 @@ public: }; IterateWords(offset, size, [&](size_t index, u64 mask) { if constexpr (type == Type::GPU) { - mask &= ~untracked_words[index]; + mask &= ~untracked[index]; } const u64 word = state_words[index] & mask; if constexpr (clear) { if constexpr (type == Type::CPU) { - NotifyPageTracker(index, untracked_words[index], mask); + UpdateProtection(index, untracked[index], mask); } state_words[index] &= ~mask; if constexpr (type == Type::CPU) { - untracked_words[index] &= ~mask; + untracked[index] &= ~mask; } } const size_t base_offset = index * PAGES_PER_WORD; @@ -315,13 +214,11 @@ public: [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { static_assert(type != Type::Untracked); - const std::span state_words = words.template Span(); - [[maybe_unused]] const std::span untracked_words = - words.template Span(); + const std::span state_words = Span(); bool result = false; IterateWords(offset, size, [&](size_t index, u64 mask) { if constexpr (type == Type::GPU) { - mask &= ~untracked_words[index]; + mask &= ~untracked[index]; } const u64 word = state_words[index] & mask; if (word != 0) { @@ -333,44 +230,7 @@ public: return result; } - /// Returns the number of words of the manager - [[nodiscard]] size_t NumWords() const noexcept { - return words.NumWords(); - } - - /// Returns the size in bytes of the manager - [[nodiscard]] u64 SizeBytes() const noexcept { - return words.size_bytes; - } - - /// Returns true when the buffer fits in the small vector optimization - [[nodiscard]] bool IsShort() const noexcept { - return words.IsShort(); - } - private: - template - u64* Array() noexcept { - if constexpr (type == Type::CPU) { - return words.cpu.Pointer(IsShort()); - } else if constexpr (type == Type::GPU) { - return words.gpu.Pointer(IsShort()); - } else if constexpr (type == Type::Untracked) { - return words.untracked.Pointer(IsShort()); - } - } - - template - const u64* Array() const noexcept { - if constexpr (type == Type::CPU) { - return words.cpu.Pointer(IsShort()); - } else if constexpr (type == Type::GPU) { - return words.gpu.Pointer(IsShort()); - } else if constexpr (type == Type::Untracked) { - return words.untracked.Pointer(IsShort()); - } - } - /** * Notify tracker about changes in the CPU tracking state of a word in the buffer * @@ -381,7 +241,7 @@ private: * @tparam add_to_tracker True when the tracker should start tracking the new pages */ template - void NotifyPageTracker(u64 word_index, u64 current_bits, u64 new_bits) const { + void UpdateProtection(u64 word_index, u64 current_bits, u64 new_bits) const { u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits; VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; IteratePages(changed_bits, [&](size_t offset, size_t size) { @@ -390,9 +250,34 @@ private: }); } + template + std::span Span() noexcept { + if constexpr (type == Type::CPU) { + return cpu; + } else if constexpr (type == Type::GPU) { + return gpu; + } else if constexpr (type == Type::Untracked) { + return untracked; + } + } + + template + std::span Span() const noexcept { + if constexpr (type == Type::CPU) { + return cpu; + } else if constexpr (type == Type::GPU) { + return gpu; + } else if constexpr (type == Type::Untracked) { + return untracked; + } + } + + Common::SpinLock lock; PageManager* tracker; VAddr cpu_addr = 0; - Words words; + WordsArray cpu; + WordsArray gpu; + WordsArray untracked; }; } // namespace VideoCore diff --git a/src/video_core/multi_level_page_table.h b/src/video_core/multi_level_page_table.h index 527476f3b..7f3205e1a 100644 --- a/src/video_core/multi_level_page_table.h +++ b/src/video_core/multi_level_page_table.h @@ -39,6 +39,15 @@ public: return &(*first_level_map[l1_page])[l2_page]; } + [[nodiscard]] const Entry* find(size_t page) const { + const size_t l1_page = page >> SecondLevelBits; + const size_t l2_page = page & (NumEntriesPerL1Page - 1); + if (!first_level_map[l1_page]) { + return nullptr; + } + return &(*first_level_map[l1_page])[l2_page]; + } + [[nodiscard]] const Entry& operator[](size_t page) const { const size_t l1_page = page >> SecondLevelBits; const size_t l2_page = page & (NumEntriesPerL1Page - 1); diff --git a/src/video_core/page_manager.cpp b/src/video_core/page_manager.cpp index 556555c25..47ed9e543 100644 --- a/src/video_core/page_manager.cpp +++ b/src/video_core/page_manager.cpp @@ -185,7 +185,7 @@ void PageManager::OnGpuUnmap(VAddr address, size_t size) { void PageManager::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) { static constexpr u64 PageShift = 12; - std::scoped_lock lk{mutex}; + std::scoped_lock lk{lock}; const u64 num_pages = ((addr + size - 1) >> PageShift) - (addr >> PageShift) + 1; const u64 page_start = addr >> PageShift; const u64 page_end = page_start + num_pages; diff --git a/src/video_core/page_manager.h b/src/video_core/page_manager.h index 29a946a8f..f44307f92 100644 --- a/src/video_core/page_manager.h +++ b/src/video_core/page_manager.h @@ -4,8 +4,8 @@ #pragma once #include -#include #include +#include "common/spin_lock.h" #include "common/types.h" namespace Vulkan { @@ -35,8 +35,8 @@ private: struct Impl; std::unique_ptr impl; Vulkan::Rasterizer* rasterizer; - std::mutex mutex; boost::icl::interval_map cached_pages; + Common::SpinLock lock; }; } // namespace VideoCore From 596f4cdf0e66a97c9d2d4272091d8c0167a5b8e1 Mon Sep 17 00:00:00 2001 From: liberodark Date: Thu, 2 Jan 2025 14:39:39 +0100 Subject: [PATCH 45/53] Fix amdgpu & other issues (#2000) --- src/core/devtools/widget/reg_popup.cpp | 2 +- src/video_core/amdgpu/liverpool.h | 4 +-- src/video_core/amdgpu/resource.h | 30 ++++++++++--------- .../renderer_vulkan/liverpool_to_vk.cpp | 4 +-- .../renderer_vulkan/vk_pipeline_cache.cpp | 4 +-- src/video_core/texture_cache/image_info.cpp | 4 +-- src/video_core/texture_cache/image_view.cpp | 3 +- 7 files changed, 27 insertions(+), 24 deletions(-) diff --git a/src/core/devtools/widget/reg_popup.cpp b/src/core/devtools/widget/reg_popup.cpp index 2727e1745..fae620901 100644 --- a/src/core/devtools/widget/reg_popup.cpp +++ b/src/core/devtools/widget/reg_popup.cpp @@ -66,7 +66,7 @@ void RegPopup::DrawColorBuffer(const AmdGpu::Liverpool::ColorBuffer& buffer) { "GetColorSliceSize()", buffer.GetColorSliceSize(), "GetTilingMode()", buffer.GetTilingMode(), "IsTiled()", buffer.IsTiled(), - "NumFormat()", buffer.NumFormat() + "NumFormat()", buffer.GetNumberFmt() ); // clang-format on diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index f1607f03e..d2d1aab3c 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -889,11 +889,11 @@ struct Liverpool { return !info.linear_general; } - [[nodiscard]] DataFormat DataFormat() const { + [[nodiscard]] DataFormat GetDataFmt() const { return RemapDataFormat(info.format); } - [[nodiscard]] NumberFormat NumFormat() const { + [[nodiscard]] NumberFormat GetNumberFmt() const { // There is a small difference between T# and CB number types, account for it. return RemapNumberFormat(info.number_type == NumberFormat::SnormNz ? NumberFormat::Srgb diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index 4de25adbf..208f7f380 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -79,21 +79,23 @@ inline NumberFormat RemapNumberFormat(const NumberFormat format) { inline CompMapping RemapComponents(const DataFormat format, const CompMapping components) { switch (format) { - case DataFormat::Format11_11_10: - return { - .r = components.b, - .g = components.g, - .b = components.r, - .a = components.a, - }; + case DataFormat::Format11_11_10: { + CompMapping result; + result.r = components.b; + result.g = components.g; + result.b = components.r; + result.a = components.a; + return result; + } case DataFormat::Format10_10_10_2: - case DataFormat::Format5_5_5_1: - return { - .r = components.a, - .g = components.b, - .b = components.g, - .a = components.r, - }; + case DataFormat::Format5_5_5_1: { + CompMapping result; + result.r = components.a; + result.g = components.b; + result.b = components.g; + result.a = components.r; + return result; + } default: return components; } diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index eba2050e0..97825b1e1 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -770,8 +770,8 @@ vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color_buffer) { const auto comp_swizzle = color_buffer.Swizzle(); - const auto format = color_buffer.DataFormat(); - const auto number_type = color_buffer.NumFormat(); + const auto format = color_buffer.GetDataFmt(); + const auto number_type = color_buffer.GetNumberFmt(); const auto& c0 = color_buffer.clear_word0; const auto& c1 = color_buffer.clear_word1; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index cd1b42b05..ba069dae1 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -328,8 +328,8 @@ bool PipelineCache::RefreshGraphicsKey() { } key.color_formats[remapped_cb] = - LiverpoolToVK::SurfaceFormat(col_buf.DataFormat(), col_buf.NumFormat()); - key.color_num_formats[remapped_cb] = col_buf.NumFormat(); + LiverpoolToVK::SurfaceFormat(col_buf.GetDataFmt(), col_buf.GetNumberFmt()); + key.color_num_formats[remapped_cb] = col_buf.GetNumberFmt(); key.color_swizzles[remapped_cb] = col_buf.Swizzle(); } diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 0559f1be3..adc72c21f 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -265,9 +265,9 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, const AmdGpu::Liverpool::CbDbExtent& hint /*= {}*/) noexcept { props.is_tiled = buffer.IsTiled(); tiling_mode = buffer.GetTilingMode(); - pixel_format = LiverpoolToVK::SurfaceFormat(buffer.DataFormat(), buffer.NumFormat()); + pixel_format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()); num_samples = buffer.NumSamples(); - num_bits = NumBits(buffer.DataFormat()); + num_bits = NumBits(buffer.GetDataFmt()); type = vk::ImageType::e2D; size.width = hint.Valid() ? hint.width : buffer.Pitch(); size.height = hint.Valid() ? hint.height : buffer.Height(); diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index a9ae41dd1..68b116558 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -76,7 +76,8 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer) n range.base.layer = col_buffer.view.slice_start; range.extent.layers = col_buffer.NumSlices() - range.base.layer; type = range.extent.layers > 1 ? vk::ImageViewType::e2DArray : vk::ImageViewType::e2D; - format = Vulkan::LiverpoolToVK::SurfaceFormat(col_buffer.DataFormat(), col_buffer.NumFormat()); + format = + Vulkan::LiverpoolToVK::SurfaceFormat(col_buffer.GetDataFmt(), col_buffer.GetNumberFmt()); } ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer, From 55b50171f83ca247c19d5142d57c0583a4b07c1d Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Thu, 2 Jan 2025 07:33:53 -0800 Subject: [PATCH 46/53] audio: Improve port state guards. (#1998) --- src/core/libraries/audio/audioout.cpp | 151 +++++++++++++------------ src/core/libraries/audio/audioout.h | 8 +- src/core/libraries/audio/sdl_audio.cpp | 27 +++-- 3 files changed, 103 insertions(+), 83 deletions(-) diff --git a/src/core/libraries/audio/audioout.cpp b/src/core/libraries/audio/audioout.cpp index d69454c39..f0ad59c3b 100644 --- a/src/core/libraries/audio/audioout.cpp +++ b/src/core/libraries/audio/audioout.cpp @@ -3,13 +3,13 @@ #include #include -#include +#include +#include #include #include "common/assert.h" #include "common/config.h" #include "common/logging/log.h" -#include "common/polyfill_thread.h" #include "common/thread.h" #include "core/libraries/audio/audioout.h" #include "core/libraries/audio/audioout_backend.h" @@ -18,7 +18,7 @@ namespace Libraries::AudioOut { -std::shared_mutex ports_mutex; +std::mutex port_open_mutex{}; std::array ports_out{}; static std::unique_ptr audio; @@ -93,17 +93,20 @@ int PS4_SYSV_ABI sceAudioOutClose(s32 handle) { return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; } - std::scoped_lock lock(ports_mutex); + std::unique_lock open_lock{port_open_mutex}; auto& port = ports_out.at(handle - 1); - if (!port.impl) { - return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; + { + std::unique_lock lock{port.mutex}; + if (!port.IsOpen()) { + return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; + } + std::free(port.output_buffer); + port.output_buffer = nullptr; + port.output_ready = false; + port.impl = nullptr; } - + // Stop outside of port lock scope to prevent deadlocks. port.output_thread.Stop(); - std::free(port.output_buffer); - port.output_buffer = nullptr; - port.output_ready = false; - port.impl = nullptr; return ORBIS_OK; } @@ -172,35 +175,34 @@ int PS4_SYSV_ABI sceAudioOutGetPortState(s32 handle, OrbisAudioOutPortState* sta return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; } - std::scoped_lock lock(ports_mutex); - const auto& port = ports_out.at(handle - 1); - if (!port.impl) { - return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; + auto& port = ports_out.at(handle - 1); + { + std::unique_lock lock{port.mutex}; + if (!port.IsOpen()) { + return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; + } + switch (port.type) { + case OrbisAudioOutPort::Main: + case OrbisAudioOutPort::Bgm: + case OrbisAudioOutPort::Voice: + state->output = 1; + state->channel = port.format_info.num_channels > 2 ? 2 : port.format_info.num_channels; + break; + case OrbisAudioOutPort::Personal: + case OrbisAudioOutPort::Padspk: + state->output = 4; + state->channel = 1; + break; + case OrbisAudioOutPort::Aux: + state->output = 0; + state->channel = 0; + break; + default: + UNREACHABLE(); + } + state->rerouteCounter = 0; + state->volume = 127; } - - state->rerouteCounter = 0; - state->volume = 127; - - switch (port.type) { - case OrbisAudioOutPort::Main: - case OrbisAudioOutPort::Bgm: - case OrbisAudioOutPort::Voice: - state->output = 1; - state->channel = port.format_info.num_channels > 2 ? 2 : port.format_info.num_channels; - break; - case OrbisAudioOutPort::Personal: - case OrbisAudioOutPort::Padspk: - state->output = 4; - state->channel = 1; - break; - case OrbisAudioOutPort::Aux: - state->output = 0; - state->channel = 0; - break; - default: - UNREACHABLE(); - } - return ORBIS_OK; } @@ -279,15 +281,16 @@ static void AudioOutputThread(PortOut* port, const std::stop_token& stop) { while (true) { timer.Start(); { - std::unique_lock lock{port->output_mutex}; - Common::CondvarWait(port->output_cv, lock, stop, [&] { return port->output_ready; }); - if (stop.stop_requested()) { - break; + std::unique_lock lock{port->mutex}; + if (port->output_cv.wait(lock, stop, [&] { return port->output_ready; })) { + port->impl->Output(port->output_buffer); + port->output_ready = false; } - port->impl->Output(port->output_buffer); - port->output_ready = false; } port->output_cv.notify_one(); + if (stop.stop_requested()) { + break; + } timer.End(); } } @@ -332,27 +335,30 @@ s32 PS4_SYSV_ABI sceAudioOutOpen(UserService::OrbisUserServiceUserId user_id, return ORBIS_AUDIO_OUT_ERROR_INVALID_FORMAT; } - std::scoped_lock lock{ports_mutex}; + std::unique_lock open_lock{port_open_mutex}; const auto port = - std::ranges::find_if(ports_out, [&](const PortOut& p) { return p.impl == nullptr; }); + std::ranges::find_if(ports_out, [&](const PortOut& p) { return !p.IsOpen(); }); if (port == ports_out.end()) { LOG_ERROR(Lib_AudioOut, "Audio ports are full"); return ORBIS_AUDIO_OUT_ERROR_PORT_FULL; } - port->type = port_type; - port->format_info = GetFormatInfo(format); - port->sample_rate = sample_rate; - port->buffer_frames = length; - port->volume.fill(SCE_AUDIO_OUT_VOLUME_0DB); + { + std::unique_lock port_lock(port->mutex); - port->impl = audio->Open(*port); + port->type = port_type; + port->format_info = GetFormatInfo(format); + port->sample_rate = sample_rate; + port->buffer_frames = length; + port->volume.fill(SCE_AUDIO_OUT_VOLUME_0DB); - port->output_buffer = std::malloc(port->BufferSize()); - port->output_ready = false; - port->output_thread.Run( - [port](const std::stop_token& stop) { AudioOutputThread(&*port, stop); }); + port->impl = audio->Open(*port); + port->output_buffer = std::malloc(port->BufferSize()); + port->output_ready = false; + port->output_thread.Run( + [port](const std::stop_token& stop) { AudioOutputThread(&*port, stop); }); + } return std::distance(ports_out.begin(), port) + 1; } @@ -367,14 +373,13 @@ s32 PS4_SYSV_ABI sceAudioOutOutput(s32 handle, void* ptr) { } auto& port = ports_out.at(handle - 1); - if (!port.impl) { - return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; - } - { - std::unique_lock lock{port.output_mutex}; + std::unique_lock lock{port.mutex}; + if (!port.IsOpen()) { + return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; + } port.output_cv.wait(lock, [&] { return !port.output_ready; }); - if (ptr != nullptr) { + if (ptr != nullptr && port.IsOpen()) { std::memcpy(port.output_buffer, ptr, port.BufferSize()); port.output_ready = true; } @@ -488,19 +493,19 @@ s32 PS4_SYSV_ABI sceAudioOutSetVolume(s32 handle, s32 flag, s32* vol) { return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; } - std::scoped_lock lock(ports_mutex); auto& port = ports_out.at(handle - 1); - if (!port.impl) { - return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; - } - - for (int i = 0; i < port.format_info.num_channels; i++, flag >>= 1u) { - if (flag & 0x1u) { - port.volume[i] = vol[i]; + { + std::unique_lock lock{port.mutex}; + if (!port.IsOpen()) { + return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; } + for (int i = 0; i < port.format_info.num_channels; i++, flag >>= 1u) { + if (flag & 0x1u) { + port.volume[i] = vol[i]; + } + } + port.impl->SetVolume(port.volume); } - - port.impl->SetVolume(port.volume); return ORBIS_OK; } diff --git a/src/core/libraries/audio/audioout.h b/src/core/libraries/audio/audioout.h index 4f7378dcd..5eafb43a1 100644 --- a/src/core/libraries/audio/audioout.h +++ b/src/core/libraries/audio/audioout.h @@ -3,7 +3,9 @@ #pragma once +#include #include +#include #include "common/bit_field.h" #include "core/libraries/kernel/threads.h" @@ -74,10 +76,10 @@ struct AudioFormatInfo { }; struct PortOut { + std::mutex mutex; std::unique_ptr impl{}; void* output_buffer; - std::mutex output_mutex; std::condition_variable_any output_cv; bool output_ready; Kernel::Thread output_thread{}; @@ -88,6 +90,10 @@ struct PortOut { u32 buffer_frames; std::array volume; + [[nodiscard]] bool IsOpen() const { + return impl != nullptr; + } + [[nodiscard]] u32 BufferSize() const { return buffer_frames * format_info.FrameSize(); } diff --git a/src/core/libraries/audio/sdl_audio.cpp b/src/core/libraries/audio/sdl_audio.cpp index 59d2d5cfb..762a9f682 100644 --- a/src/core/libraries/audio/sdl_audio.cpp +++ b/src/core/libraries/audio/sdl_audio.cpp @@ -14,7 +14,7 @@ namespace Libraries::AudioOut { class SDLPortBackend : public PortBackend { public: explicit SDLPortBackend(const PortOut& port) - : frame_size(port.format_info.FrameSize()), buffer_size(port.BufferSize()) { + : frame_size(port.format_info.FrameSize()), guest_buffer_size(port.BufferSize()) { // We want the latency for delivering frames out to be as small as possible, // so set the sample frames hint to the number of frames per buffer. const auto samples_num_str = std::to_string(port.buffer_frames); @@ -33,7 +33,7 @@ public: LOG_ERROR(Lib_AudioOut, "Failed to create SDL audio stream: {}", SDL_GetError()); return; } - queue_threshold = CalculateQueueThreshold(); + CalculateQueueThreshold(); if (!SDL_SetAudioStreamInputChannelMap(stream, port.format_info.channel_layout.data(), port.format_info.num_channels)) { LOG_ERROR(Lib_AudioOut, "Failed to configure SDL audio stream channel map: {}", @@ -71,9 +71,9 @@ public: queue_threshold); SDL_ClearAudioStream(stream); // Recalculate the threshold in case this happened because of a device change. - queue_threshold = CalculateQueueThreshold(); + CalculateQueueThreshold(); } - if (!SDL_PutAudioStreamData(stream, ptr, static_cast(buffer_size))) { + if (!SDL_PutAudioStreamData(stream, ptr, static_cast(guest_buffer_size))) { LOG_ERROR(Lib_AudioOut, "Failed to output to SDL audio stream: {}", SDL_GetError()); } } @@ -91,7 +91,7 @@ public: } private: - [[nodiscard]] u32 CalculateQueueThreshold() const { + void CalculateQueueThreshold() { SDL_AudioSpec discard; int sdl_buffer_frames; if (!SDL_GetAudioDeviceFormat(SDL_GetAudioStreamDevice(stream), &discard, @@ -100,13 +100,22 @@ private: SDL_GetError()); sdl_buffer_frames = 0; } - return std::max(buffer_size, sdl_buffer_frames * frame_size) * 4; + const auto sdl_buffer_size = sdl_buffer_frames * frame_size; + const auto new_threshold = std::max(guest_buffer_size, sdl_buffer_size) * 4; + if (host_buffer_size != sdl_buffer_size || queue_threshold != new_threshold) { + host_buffer_size = sdl_buffer_size; + queue_threshold = new_threshold; + LOG_INFO(Lib_AudioOut, + "SDL audio buffers: guest = {} bytes, host = {} bytes, threshold = {} bytes", + guest_buffer_size, host_buffer_size, queue_threshold); + } } u32 frame_size; - u32 buffer_size; - u32 queue_threshold; - SDL_AudioStream* stream; + u32 guest_buffer_size; + u32 host_buffer_size{}; + u32 queue_threshold{}; + SDL_AudioStream* stream{}; }; std::unique_ptr SDLAudioOut::Open(PortOut& port) { From b87bca2e46584b5fc1025569b5caf28f81eafd56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=A5IGA?= <164882787+Xphalnos@users.noreply.github.com> Date: Thu, 2 Jan 2025 16:37:19 +0100 Subject: [PATCH 47/53] Reduce USBD Log Spamming (#2019) --- src/core/libraries/usbd/usbd.cpp | 130 +++++++++++++++---------------- 1 file changed, 65 insertions(+), 65 deletions(-) diff --git a/src/core/libraries/usbd/usbd.cpp b/src/core/libraries/usbd/usbd.cpp index c0e1b7ea8..fdfa50b23 100644 --- a/src/core/libraries/usbd/usbd.cpp +++ b/src/core/libraries/usbd/usbd.cpp @@ -10,327 +10,327 @@ namespace Libraries::Usbd { int PS4_SYSV_ABI sceUsbdAllocTransfer() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdAttachKernelDriver() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdBulkTransfer() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdCancelTransfer() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdCheckConnected() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdClaimInterface() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdClearHalt() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdClose() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdControlTransfer() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdControlTransferGetData() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdControlTransferGetSetup() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdDetachKernelDriver() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdEventHandlerActive() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdEventHandlingOk() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdExit() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdFillBulkTransfer() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdFillControlSetup() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdFillControlTransfer() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdFillInterruptTransfer() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdFillIsoTransfer() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdFreeConfigDescriptor() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdFreeDeviceList() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdFreeTransfer() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetActiveConfigDescriptor() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetBusNumber() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetConfigDescriptor() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetConfigDescriptorByValue() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetConfiguration() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetDescriptor() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetDevice() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetDeviceAddress() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetDeviceDescriptor() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetDeviceList() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetDeviceSpeed() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetIsoPacketBuffer() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetMaxIsoPacketSize() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetMaxPacketSize() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetStringDescriptor() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetStringDescriptorAscii() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdHandleEvents() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdHandleEventsLocked() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdHandleEventsTimeout() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_DEBUG(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdInit() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return 0x80240005; // Skip } int PS4_SYSV_ABI sceUsbdInterruptTransfer() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdKernelDriverActive() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdLockEvents() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdLockEventWaiters() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdOpen() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdOpenDeviceWithVidPid() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdRefDevice() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdReleaseInterface() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdResetDevice() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdSetConfiguration() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdSetInterfaceAltSetting() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdSetIsoPacketLengths() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdSubmitTransfer() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdTryLockEvents() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdUnlockEvents() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdUnlockEventWaiters() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdUnrefDevice() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdWaitForEvent() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI Func_65F6EF33E38FFF50() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI Func_97F056BAD90AADE7() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI Func_C55104A33B35B264() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI Func_D56B43060720B1E0() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } From 55ccec4a387dcf1bf3dcf11f55f2ce546e88a173 Mon Sep 17 00:00:00 2001 From: polyproxy <47796739+polybiusproxy@users.noreply.github.com> Date: Thu, 2 Jan 2025 18:40:10 +0100 Subject: [PATCH 48/53] fix typos --- src/core/libraries/kernel/memory.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/core/libraries/kernel/memory.cpp b/src/core/libraries/kernel/memory.cpp index b18d5f570..8deefb496 100644 --- a/src/core/libraries/kernel/memory.cpp +++ b/src/core/libraries/kernel/memory.cpp @@ -505,13 +505,13 @@ int PS4_SYSV_ABI posix_munmap(void* addr, size_t len) { return result; } -static constexpr int MAX_PTR_APERTURES = 3; +static constexpr int MAX_PRT_APERTURES = 3; static constexpr VAddr PRT_AREA_START_ADDR = 0x1000000000; static constexpr size_t PRT_AREA_SIZE = 0xec00000000; -static std::array, MAX_PTR_APERTURES> PrtApertures{}; +static std::array, MAX_PRT_APERTURES> PrtApertures{}; int PS4_SYSV_ABI sceKernelSetPrtAperture(int id, VAddr address, size_t size) { - if (id < 0 || id >= MAX_PTR_APERTURES) { + if (id < 0 || id >= MAX_PRT_APERTURES) { return ORBIS_KERNEL_ERROR_EINVAL; } @@ -531,12 +531,12 @@ int PS4_SYSV_ABI sceKernelSetPrtAperture(int id, VAddr address, size_t size) { return ORBIS_OK; } -int PS4_SYSV_ABI sceKernelGetPrtAperture(int id, VAddr* addres, size_t* size) { - if (id < 0 || id >= MAX_PTR_APERTURES) { +int PS4_SYSV_ABI sceKernelGetPrtAperture(int id, VAddr* address, size_t* size) { + if (id < 0 || id >= MAX_PRT_APERTURES) { return ORBIS_KERNEL_ERROR_EINVAL; } - std::tie(*addres, *size) = PrtApertures[id]; + std::tie(*address, *size) = PrtApertures[id]; return ORBIS_OK; } From f7a8e2409c38a5f49fcf3cd0db1bdbf26ff69d7f Mon Sep 17 00:00:00 2001 From: psucien Date: Thu, 2 Jan 2025 19:41:15 +0100 Subject: [PATCH 49/53] hot-fix: debug build --- src/video_core/renderer_vulkan/liverpool_to_vk.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 97825b1e1..690d26cfc 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -697,8 +697,8 @@ static const size_t amd_gpu_number_format_bit_size = 4; // All values are under static size_t GetSurfaceFormatTableIndex(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) { - DEBUG_ASSERT(data_format < 1 << amd_gpu_data_format_bit_size); - DEBUG_ASSERT(num_format < 1 << amd_gpu_number_format_bit_size); + DEBUG_ASSERT(u32(data_format) < 1 << amd_gpu_data_format_bit_size); + DEBUG_ASSERT(u32(num_format) < 1 << amd_gpu_number_format_bit_size); size_t result = static_cast(num_format) | (static_cast(data_format) << amd_gpu_number_format_bit_size); return result; From 77d217244142c11e01a960da4558ae122e20b3a8 Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Thu, 2 Jan 2025 20:43:56 +0200 Subject: [PATCH 50/53] renderer_vulkan: Cleanup and improve barriers in caches (#1865) * texture_cache: Stricter barriers on image upload * buffer_cache: Stricter barrier for vkCmdUpdateBuffer * vk_rasterizer: Barrier also normal buffers and make it apply to all stages * texture_cache: Minor barrier cleanup * Batch image and buffer barriers in a single command * clang format --- src/video_core/buffer_cache/buffer_cache.cpp | 147 ++++++++++++++---- .../renderer_vulkan/vk_rasterizer.cpp | 8 +- .../texture_cache/texture_cache.cpp | 45 +++++- src/video_core/texture_cache/tile_manager.cpp | 59 +++---- src/video_core/texture_cache/tile_manager.h | 7 +- 5 files changed, 190 insertions(+), 76 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 3ac6a3598..3e43b4fbc 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -259,7 +259,16 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo const BufferId buffer_id = FindBuffer(address, num_bytes); return &slot_buffers[buffer_id]; }(); - const vk::BufferMemoryBarrier2 buf_barrier = { + const vk::BufferMemoryBarrier2 pre_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, + .buffer = buffer->Handle(), + .offset = buffer->Offset(address), + .size = num_bytes, + }; + const vk::BufferMemoryBarrier2 post_barrier = { .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, @@ -271,9 +280,14 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo cmdbuf.pipelineBarrier2(vk::DependencyInfo{ .dependencyFlags = vk::DependencyFlagBits::eByRegion, .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &buf_barrier, + .pBufferMemoryBarriers = &pre_barrier, + }); + cmdbuf.updateBuffer(buffer->Handle(), buffer->Offset(address), num_bytes, value); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &post_barrier, }); - cmdbuf.updateBuffer(buffer->Handle(), buf_barrier.offset, num_bytes, value); } std::pair BufferCache::ObtainHostUBO(std::span data) { @@ -465,21 +479,48 @@ void BufferCache::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, }; scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); - static constexpr vk::MemoryBarrier READ_BARRIER{ - .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, - .dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite, + const std::array pre_barriers = { + vk::BufferMemoryBarrier2{ + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferRead, + .buffer = overlap.Handle(), + .offset = 0, + .size = overlap.SizeBytes(), + }, }; - static constexpr vk::MemoryBarrier WRITE_BARRIER{ - .srcAccessMask = vk::AccessFlagBits::eTransferWrite, - .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + const std::array post_barriers = { + vk::BufferMemoryBarrier2{ + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferRead, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryWrite, + .buffer = overlap.Handle(), + .offset = 0, + .size = overlap.SizeBytes(), + }, + vk::BufferMemoryBarrier2{ + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite, + .buffer = new_buffer.Handle(), + .offset = dst_base_offset, + .size = overlap.SizeBytes(), + }, }; - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, - vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, - READ_BARRIER, {}, {}); - cmdbuf.copyBuffer(overlap.buffer, new_buffer.buffer, copy); - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, - vk::PipelineStageFlagBits::eAllCommands, - vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {}); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = pre_barriers.data(), + }); + cmdbuf.copyBuffer(overlap.Handle(), new_buffer.Handle(), copy); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = static_cast(post_barriers.size()), + .pBufferMemoryBarriers = post_barriers.data(), + }); DeleteBuffer(overlap_id); } @@ -583,21 +624,35 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, } scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); - static constexpr vk::MemoryBarrier READ_BARRIER{ - .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, - .dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite, + const vk::BufferMemoryBarrier2 pre_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, + .buffer = buffer.Handle(), + .offset = 0, + .size = buffer.SizeBytes(), }; - static constexpr vk::MemoryBarrier WRITE_BARRIER{ - .srcAccessMask = vk::AccessFlagBits::eTransferWrite, - .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + const vk::BufferMemoryBarrier2 post_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite, + .buffer = buffer.Handle(), + .offset = 0, + .size = buffer.SizeBytes(), }; - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, - vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, - READ_BARRIER, {}, {}); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &pre_barrier, + }); cmdbuf.copyBuffer(src_buffer, buffer.buffer, copies); - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, - vk::PipelineStageFlagBits::eAllCommands, - vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {}); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &post_barrier, + }); } bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) { @@ -647,10 +702,42 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, } if (!copies.empty()) { scheduler.EndRendering(); - image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); + const vk::BufferMemoryBarrier2 pre_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, + .buffer = buffer.Handle(), + .offset = max_offset - size, + .size = size, + }; + const vk::BufferMemoryBarrier2 post_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, + .buffer = buffer.Handle(), + .offset = max_offset - size, + .size = size, + }; + auto barriers = image.GetBarriers(vk::ImageLayout::eTransferSrcOptimal, + vk::AccessFlagBits2::eTransferRead, + vk::PipelineStageFlagBits2::eTransfer, {}); const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &pre_barrier, + .imageMemoryBarrierCount = static_cast(barriers.size()), + .pImageMemoryBarriers = barriers.data(), + }); + cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.Handle(), copies); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &post_barrier, + }); } return true; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 4384cdbea..6e628239b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -562,6 +562,12 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding push_data.AddOffset(binding.buffer, adjust); buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, vsharp.GetSize() + adjust); + if (auto barrier = + vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite + : vk::AccessFlagBits2::eShaderRead, + vk::PipelineStageFlagBits2::eAllCommands)) { + buffer_barriers.emplace_back(*barrier); + } } set_writes.push_back({ @@ -600,7 +606,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding if (auto barrier = vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite : vk::AccessFlagBits2::eShaderRead, - vk::PipelineStageFlagBits2::eComputeShader)) { + vk::PipelineStageFlagBits2::eAllCommands)) { buffer_barriers.emplace_back(*barrier); } if (desc.is_written) { diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 897d6f67e..291e1da7c 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -542,31 +542,62 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule sched_ptr->EndRendering(); const auto cmdbuf = sched_ptr->CommandBuffer(); - image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}, - cmdbuf); - const VAddr image_addr = image.info.guest_address; const size_t image_size = image.info.guest_size_bytes; const auto [vk_buffer, buf_offset] = buffer_cache.ObtainViewBuffer(image_addr, image_size, is_gpu_dirty); + // The obtained buffer may be written by a shader so we need to emit a barrier to prevent RAW // hazard if (auto barrier = vk_buffer->GetBarrier(vk::AccessFlagBits2::eTransferRead, vk::PipelineStageFlagBits2::eTransfer)) { - const auto dependencies = vk::DependencyInfo{ + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ .dependencyFlags = vk::DependencyFlagBits::eByRegion, .bufferMemoryBarrierCount = 1, .pBufferMemoryBarriers = &barrier.value(), - }; - cmdbuf.pipelineBarrier2(dependencies); + }); } - const auto [buffer, offset] = tile_manager.TryDetile(vk_buffer->Handle(), buf_offset, image); + const auto [buffer, offset] = + tile_manager.TryDetile(vk_buffer->Handle(), buf_offset, image.info); for (auto& copy : image_copy) { copy.bufferOffset += offset; } + const vk::BufferMemoryBarrier2 pre_barrier{ + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferRead, + .buffer = buffer, + .offset = offset, + .size = image_size, + }; + const vk::BufferMemoryBarrier2 post_barrier{ + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite, + .buffer = buffer, + .offset = offset, + .size = image_size, + }; + const auto image_barriers = + image.GetBarriers(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, + vk::PipelineStageFlagBits2::eTransfer, {}); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &pre_barrier, + .imageMemoryBarrierCount = static_cast(image_barriers.size()), + .pImageMemoryBarriers = image_barriers.data(), + }); cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &post_barrier, + }); image.flags &= ~ImageFlagBits::Dirty; } diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index fda7e511a..c1243dafb 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -4,6 +4,7 @@ #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/texture_cache/image_info.h" #include "video_core/texture_cache/image_view.h" #include "video_core/texture_cache/tile_manager.h" @@ -86,10 +87,10 @@ static vk::Format DemoteImageFormatForDetiling(vk::Format format) { return format; } -const DetilerContext* TileManager::GetDetiler(const Image& image) const { - const auto format = DemoteImageFormatForDetiling(image.info.pixel_format); +const DetilerContext* TileManager::GetDetiler(const ImageInfo& info) const { + const auto format = DemoteImageFormatForDetiling(info.pixel_format); - switch (image.info.tiling_mode) { + switch (info.tiling_mode) { case AmdGpu::TilingMode::Texture_MicroTiled: switch (format) { case vk::Format::eR8Uint: @@ -258,23 +259,23 @@ void TileManager::FreeBuffer(ScratchBuffer buffer) { } std::pair TileManager::TryDetile(vk::Buffer in_buffer, u32 in_offset, - Image& image) { - if (!image.info.props.is_tiled) { + const ImageInfo& info) { + if (!info.props.is_tiled) { return {in_buffer, in_offset}; } - const auto* detiler = GetDetiler(image); + const auto* detiler = GetDetiler(info); if (!detiler) { - if (image.info.tiling_mode != AmdGpu::TilingMode::Texture_MacroTiled && - image.info.tiling_mode != AmdGpu::TilingMode::Display_MacroTiled && - image.info.tiling_mode != AmdGpu::TilingMode::Depth_MacroTiled) { + if (info.tiling_mode != AmdGpu::TilingMode::Texture_MacroTiled && + info.tiling_mode != AmdGpu::TilingMode::Display_MacroTiled && + info.tiling_mode != AmdGpu::TilingMode::Depth_MacroTiled) { LOG_ERROR(Render_Vulkan, "Unsupported tiled image: {} ({})", - vk::to_string(image.info.pixel_format), NameOf(image.info.tiling_mode)); + vk::to_string(info.pixel_format), NameOf(info.tiling_mode)); } return {in_buffer, in_offset}; } - const u32 image_size = image.info.guest_size_bytes; + const u32 image_size = info.guest_size_bytes; // Prepare output buffer auto out_buffer = AllocBuffer(image_size, true); @@ -317,22 +318,21 @@ std::pair TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o set_writes); DetilerParams params; - params.num_levels = image.info.resources.levels; - params.pitch0 = image.info.pitch >> (image.info.props.is_block ? 2u : 0u); - params.height = image.info.size.height; - if (image.info.tiling_mode == AmdGpu::TilingMode::Texture_Volume) { - ASSERT(image.info.resources.levels == 1); - ASSERT(image.info.num_bits >= 32); - const auto tiles_per_row = image.info.pitch / 8u; - const auto tiles_per_slice = tiles_per_row * ((image.info.size.height + 7u) / 8u); + params.num_levels = info.resources.levels; + params.pitch0 = info.pitch >> (info.props.is_block ? 2u : 0u); + params.height = info.size.height; + if (info.tiling_mode == AmdGpu::TilingMode::Texture_Volume) { + ASSERT(info.resources.levels == 1); + ASSERT(info.num_bits >= 32); + const auto tiles_per_row = info.pitch / 8u; + const auto tiles_per_slice = tiles_per_row * ((info.size.height + 7u) / 8u); params.sizes[0] = tiles_per_row; params.sizes[1] = tiles_per_slice; } else { - - ASSERT(image.info.resources.levels <= 14); + ASSERT(info.resources.levels <= 14); std::memset(¶ms.sizes, 0, sizeof(params.sizes)); - for (int m = 0; m < image.info.resources.levels; ++m) { - params.sizes[m] = image.info.mips_layout[m].size * image.info.resources.layers + + for (int m = 0; m < info.resources.levels; ++m) { + params.sizes[m] = info.mips_layout[m].size * info.resources.layers + (m > 0 ? params.sizes[m - 1] : 0); } } @@ -341,20 +341,9 @@ std::pair TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o ¶ms); ASSERT((image_size % 64) == 0); - const auto bpp = image.info.num_bits * (image.info.props.is_block ? 16u : 1u); + const auto bpp = info.num_bits * (info.props.is_block ? 16u : 1u); const auto num_tiles = image_size / (64 * (bpp / 8)); cmdbuf.dispatch(num_tiles, 1, 1); - - const vk::BufferMemoryBarrier post_barrier{ - .srcAccessMask = vk::AccessFlagBits::eShaderWrite, - .dstAccessMask = vk::AccessFlagBits::eTransferRead, - .buffer = out_buffer.first, - .size = image_size, - }; - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, - vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, - {}, post_barrier, {}); - return {out_buffer.first, 0}; } diff --git a/src/video_core/texture_cache/tile_manager.h b/src/video_core/texture_cache/tile_manager.h index 72860bca0..1d731d2f2 100644 --- a/src/video_core/texture_cache/tile_manager.h +++ b/src/video_core/texture_cache/tile_manager.h @@ -5,11 +5,11 @@ #include "common/types.h" #include "video_core/buffer_cache/buffer.h" -#include "video_core/texture_cache/image.h" namespace VideoCore { class TextureCache; +struct ImageInfo; enum DetilerType : u32 { Micro8x1, @@ -36,14 +36,15 @@ public: TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler); ~TileManager(); - std::pair TryDetile(vk::Buffer in_buffer, u32 in_offset, Image& image); + std::pair TryDetile(vk::Buffer in_buffer, u32 in_offset, + const ImageInfo& info); ScratchBuffer AllocBuffer(u32 size, bool is_storage = false); void Upload(ScratchBuffer buffer, const void* data, size_t size); void FreeBuffer(ScratchBuffer buffer); private: - const DetilerContext* GetDetiler(const Image& image) const; + const DetilerContext* GetDetiler(const ImageInfo& info) const; private: const Vulkan::Instance& instance; From 67c531298a6c6c56a2ba82251fd4f04487601bc8 Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Thu, 2 Jan 2025 14:51:47 -0600 Subject: [PATCH 51/53] Fixup returns (#2023) On a signed out console, these two functions return ERROR_SIGNED_OUT. --- src/core/libraries/np_manager/np_manager.cpp | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/core/libraries/np_manager/np_manager.cpp b/src/core/libraries/np_manager/np_manager.cpp index ec9cc6bf5..87d752c69 100644 --- a/src/core/libraries/np_manager/np_manager.cpp +++ b/src/core/libraries/np_manager/np_manager.cpp @@ -972,11 +972,8 @@ int PS4_SYSV_ABI sceNpGetGamePresenceStatusA() { } int PS4_SYSV_ABI sceNpGetNpId(OrbisUserServiceUserId user_id, OrbisNpId* np_id) { - LOG_INFO(Lib_NpManager, "user_id {}", user_id); - const auto name = Config::getUserName(); - std::memset(np_id, 0, sizeof(OrbisNpId)); - name.copy(np_id->handle.data, sizeof(np_id->handle.data)); - return ORBIS_OK; + LOG_DEBUG(Lib_NpManager, "user_id {}", user_id); + return ORBIS_NP_ERROR_SIGNED_OUT; } int PS4_SYSV_ABI sceNpGetNpReachabilityState() { @@ -986,10 +983,7 @@ int PS4_SYSV_ABI sceNpGetNpReachabilityState() { int PS4_SYSV_ABI sceNpGetOnlineId(s32 user_id, OrbisNpOnlineId* online_id) { LOG_DEBUG(Lib_NpManager, "user_id {}", user_id); - const auto name = Config::getUserName(); - std::memset(online_id, 0, sizeof(OrbisNpOnlineId)); - name.copy(online_id->data, sizeof(online_id->data)); - return ORBIS_OK; + return ORBIS_NP_ERROR_SIGNED_OUT; } int PS4_SYSV_ABI sceNpGetParentalControlInfo() { From dcc662ff1afa4a7aad8f50260b64c2e03a0dd2e6 Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Thu, 2 Jan 2025 22:52:10 +0200 Subject: [PATCH 52/53] ir_passes: Integrate DS barriers in block (#2020) --- .../frontend/translate/data_share.cpp | 6 -- .../frontend/translate/translate.h | 1 - .../ir/passes/shared_memory_barrier_pass.cpp | 71 ++++++++++++++----- 3 files changed, 52 insertions(+), 26 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index 4408cae28..62c0423dd 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -205,7 +205,6 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool strid addr, ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0))); ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0); } - emit_ds_read_barrier = true; } void Translator::DS_SWIZZLE_B32(const GcnInst& inst) { @@ -222,11 +221,6 @@ void Translator::DS_SWIZZLE_B32(const GcnInst& inst) { void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst) { - if (emit_ds_read_barrier && profile.needs_lds_barriers) { - ir.Barrier(); - emit_ds_read_barrier = false; - } - const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))}; IR::VectorReg dst_reg{inst.dst[0].code}; if (is_pair) { diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index e8584ec2f..9da0844e4 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -308,7 +308,6 @@ private: const RuntimeInfo& runtime_info; const Profile& profile; bool opcode_missing = false; - bool emit_ds_read_barrier = false; }; void Translate(IR::Block* block, u32 block_base, std::span inst_list, Info& info, diff --git a/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp b/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp index aad8fb148..ec7d7e986 100644 --- a/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp +++ b/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp @@ -8,6 +8,54 @@ namespace Shader::Optimization { +static void EmitBarrierInBlock(IR::Block* block) { + // This is inteded to insert a barrier when shared memory write and read + // occur in the same basic block. Also checks if branch depth is zero as + // we don't want to insert barrier in potentially divergent code. + bool emit_barrier_on_write = false; + bool emit_barrier_on_read = false; + const auto emit_barrier = [block](bool& emit_cond, IR::Inst& inst) { + if (emit_cond) { + IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)}; + ir.Barrier(); + emit_cond = false; + } + }; + for (IR::Inst& inst : block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::LoadSharedU32 || + inst.GetOpcode() == IR::Opcode::LoadSharedU64) { + emit_barrier(emit_barrier_on_read, inst); + emit_barrier_on_write = true; + } + if (inst.GetOpcode() == IR::Opcode::WriteSharedU32 || + inst.GetOpcode() == IR::Opcode::WriteSharedU64) { + emit_barrier(emit_barrier_on_write, inst); + emit_barrier_on_read = true; + } + } +} + +static void EmitBarrierInMergeBlock(const IR::AbstractSyntaxNode::Data& data) { + // Insert a barrier after divergent conditional blocks. + // This avoids potential softlocks and crashes when some threads + // initialize shared memory and others read from it. + const IR::U1 cond = data.if_node.cond; + const auto insert_barrier = + IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional { + if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 && + inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) { + return true; + } + return std::nullopt; + }); + if (insert_barrier) { + IR::Block* const merge = data.if_node.merge; + auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi); + IR::IREmitter ir{*merge, insert_point}; + ir.Barrier(); + } +} + void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile) { if (!program.info.uses_shared || !profile.needs_lds_barriers) { return; @@ -19,27 +67,12 @@ void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile) { --branch_depth; continue; } - if (node.type != Type::If) { + if (node.type == Type::If && branch_depth++ == 0) { + EmitBarrierInMergeBlock(node.data); continue; } - u32 curr_depth = branch_depth++; - if (curr_depth != 0) { - continue; - } - const IR::U1 cond = node.data.if_node.cond; - const auto insert_barrier = - IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional { - if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 && - inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) { - return true; - } - return std::nullopt; - }); - if (insert_barrier) { - IR::Block* const merge = node.data.if_node.merge; - auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi); - IR::IREmitter ir{*merge, insert_point}; - ir.Barrier(); + if (node.type == Type::Block && branch_depth == 0) { + EmitBarrierInBlock(node.data.block); } } } From 4e1733222f4b42d4292d6c7417f215c109cd51d3 Mon Sep 17 00:00:00 2001 From: georgemoralis Date: Thu, 2 Jan 2025 23:21:43 +0200 Subject: [PATCH 53/53] fixed deadzones (#2025) --- src/core/libraries/pad/pad.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/libraries/pad/pad.cpp b/src/core/libraries/pad/pad.cpp index 27564294e..7eb628a90 100644 --- a/src/core/libraries/pad/pad.cpp +++ b/src/core/libraries/pad/pad.cpp @@ -104,8 +104,8 @@ int PS4_SYSV_ABI scePadGetControllerInformation(s32 handle, OrbisPadControllerIn pInfo->touchPadInfo.pixelDensity = 1; pInfo->touchPadInfo.resolution.x = 1920; pInfo->touchPadInfo.resolution.y = 950; - pInfo->stickInfo.deadZoneLeft = 20; - pInfo->stickInfo.deadZoneRight = 20; + pInfo->stickInfo.deadZoneLeft = 2; + pInfo->stickInfo.deadZoneRight = 2; pInfo->connectionType = ORBIS_PAD_PORT_TYPE_STANDARD; pInfo->connectedCount = 1; pInfo->connected = true;