From 3218c36b22335e69d558a724627f9876ac70f7b7 Mon Sep 17 00:00:00 2001 From: georgemoralis Date: Fri, 27 Dec 2024 23:03:03 +0200 Subject: [PATCH 01/45] knack fixes by niko (#1933) --- src/core/libraries/ajm/ajm_batch.h | 1 + src/shader_recompiler/backend/spirv/emit_spirv.cpp | 1 + src/shader_recompiler/backend/spirv/spirv_emit_context.cpp | 1 + 3 files changed, 3 insertions(+) diff --git a/src/core/libraries/ajm/ajm_batch.h b/src/core/libraries/ajm/ajm_batch.h index 65110ee73..3c586b773 100644 --- a/src/core/libraries/ajm/ajm_batch.h +++ b/src/core/libraries/ajm/ajm_batch.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 0ce9eea7c..900d40472 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -24,6 +24,7 @@ static constexpr spv::ExecutionMode GetInputPrimitiveType(AmdGpu::PrimitiveType case AmdGpu::PrimitiveType::PointList: return spv::ExecutionMode::InputPoints; case AmdGpu::PrimitiveType::LineList: + case AmdGpu::PrimitiveType::LineStrip: return spv::ExecutionMode::InputLines; case AmdGpu::PrimitiveType::TriangleList: case AmdGpu::PrimitiveType::TriangleStrip: diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index d8bafccd9..281c487af 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -43,6 +43,7 @@ static constexpr u32 NumVertices(AmdGpu::PrimitiveType type) { case AmdGpu::PrimitiveType::PointList: return 1u; case AmdGpu::PrimitiveType::LineList: + case AmdGpu::PrimitiveType::LineStrip: return 2u; case AmdGpu::PrimitiveType::TriangleList: case AmdGpu::PrimitiveType::TriangleStrip: From 49ffb7b120d02286db02a9608b5f6e3f0b149f15 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Fri, 27 Dec 2024 14:07:16 -0800 Subject: [PATCH 02/45] sdl_audio: Implement SetVolume and add more error checking. (#1935) --- src/core/libraries/audio/cubeb_audio.cpp | 7 +++++- src/core/libraries/audio/sdl_audio.cpp | 30 ++++++++++++++++++------ 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/src/core/libraries/audio/cubeb_audio.cpp b/src/core/libraries/audio/cubeb_audio.cpp index ca0a4c3b6..e1195558a 100644 --- a/src/core/libraries/audio/cubeb_audio.cpp +++ b/src/core/libraries/audio/cubeb_audio.cpp @@ -5,7 +5,7 @@ #include #include -#include "common/assert.h" +#include "common/logging/log.h" #include "common/ringbuffer.h" #include "core/libraries/audio/audioout.h" #include "core/libraries/audio/audioout_backend.h" @@ -58,6 +58,8 @@ public: } if (const auto ret = cubeb_stream_start(stream); ret != CUBEB_OK) { LOG_ERROR(Lib_AudioOut, "Failed to start cubeb stream: {}", ret); + cubeb_stream_destroy(stream); + stream = nullptr; return; } } @@ -74,6 +76,9 @@ public: } void Output(void* ptr, size_t size) override { + if (!stream) { + return; + } auto* data = static_cast(ptr); std::unique_lock lock{buffer_mutex}; diff --git a/src/core/libraries/audio/sdl_audio.cpp b/src/core/libraries/audio/sdl_audio.cpp index 7d7a7cee5..598941ba7 100644 --- a/src/core/libraries/audio/sdl_audio.cpp +++ b/src/core/libraries/audio/sdl_audio.cpp @@ -2,9 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include - #include -#include #include "common/logging/log.h" #include "core/libraries/audio/audioout.h" @@ -26,18 +24,28 @@ public: SDL_OpenAudioDeviceStream(SDL_AUDIO_DEVICE_DEFAULT_PLAYBACK, &fmt, nullptr, nullptr); if (stream == nullptr) { LOG_ERROR(Lib_AudioOut, "Failed to create SDL audio stream: {}", SDL_GetError()); + return; + } + if (!SDL_ResumeAudioStreamDevice(stream)) { + LOG_ERROR(Lib_AudioOut, "Failed to resume SDL audio stream: {}", SDL_GetError()); + SDL_DestroyAudioStream(stream); + stream = nullptr; + return; } - SDL_ResumeAudioStreamDevice(stream); } ~SDLPortBackend() override { - if (stream) { - SDL_DestroyAudioStream(stream); - stream = nullptr; + if (!stream) { + return; } + SDL_DestroyAudioStream(stream); + stream = nullptr; } void Output(void* ptr, size_t size) override { + if (!stream) { + return; + } SDL_PutAudioStreamData(stream, ptr, static_cast(size)); while (SDL_GetAudioStreamAvailable(stream) > AUDIO_STREAM_BUFFER_THRESHOLD) { // Yield to allow the stream to drain. @@ -46,7 +54,15 @@ public: } void SetVolume(const std::array& ch_volumes) override { - // TODO: Not yet implemented + if (!stream) { + return; + } + // SDL does not have per-channel volumes, for now just take the maximum of the channels. + const auto vol = *std::ranges::max_element(ch_volumes); + if (!SDL_SetAudioStreamGain(stream, static_cast(vol) / SCE_AUDIO_OUT_VOLUME_0DB)) { + LOG_WARNING(Lib_AudioOut, "Failed to change SDL audio stream volume: {}", + SDL_GetError()); + } } private: From 122fe22a320c4a17529d709d863f842a5c9ab107 Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Fri, 27 Dec 2024 18:42:41 -0600 Subject: [PATCH 03/45] Implement IMAGE_GATHER4 and IMAGE_GATHER4_O (#1939) * Implement IMAGE_GATHER4_O Used by The Last of Us Remastered. * Fix type on IMAGE_GATHER4_C_LZ Had a different set of types compared to the other IMAGE_GATHER4 ops. * IMAGE_GATHER4 --- src/shader_recompiler/frontend/format.cpp | 10 +++++----- .../frontend/translate/vector_memory.cpp | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/frontend/format.cpp b/src/shader_recompiler/frontend/format.cpp index 4f0922e2e..9677be3e5 100644 --- a/src/shader_recompiler/frontend/format.cpp +++ b/src/shader_recompiler/frontend/format.cpp @@ -3565,8 +3565,8 @@ constexpr std::array InstructionFormatMIMG = {{ {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Float32, ScalarType::Float32}, // 64 = IMAGE_GATHER4 - {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, - ScalarType::Undefined}, + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, // 65 = IMAGE_GATHER4_CL {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, ScalarType::Undefined}, @@ -3603,10 +3603,10 @@ constexpr std::array InstructionFormatMIMG = {{ ScalarType::Undefined}, // 79 = IMAGE_GATHER4_C_LZ {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, - ScalarType::Uint32}, + ScalarType::Float32}, // 80 = IMAGE_GATHER4_O - {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, - ScalarType::Undefined}, + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, // 81 = IMAGE_GATHER4_CL_O {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, ScalarType::Undefined}, diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 48cb79610..7c3db9551 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -144,8 +144,10 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { return IMAGE_SAMPLE(inst); // Image gather operations + case Opcode::IMAGE_GATHER4: case Opcode::IMAGE_GATHER4_LZ: case Opcode::IMAGE_GATHER4_C: + case Opcode::IMAGE_GATHER4_O: case Opcode::IMAGE_GATHER4_C_O: case Opcode::IMAGE_GATHER4_C_LZ: case Opcode::IMAGE_GATHER4_LZ_O: From 04933ac863d4777fdbe21810e9cf5e050381aa37 Mon Sep 17 00:00:00 2001 From: psucien Date: Sat, 28 Dec 2024 11:44:11 +0100 Subject: [PATCH 04/45] hot-fix: handle ASC ring wrap --- src/core/libraries/gnmdriver/gnmdriver.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 566f8ce1f..91a1329e5 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -513,10 +513,14 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) { auto vqid = gnm_vqid - 1; auto& asc_queue = liverpool->asc_queues[{vqid}]; - const auto& offs_dw = asc_next_offs_dw[vqid]; + auto& offs_dw = asc_next_offs_dw[vqid]; - if (next_offs_dw < offs_dw) { - ASSERT_MSG(next_offs_dw == 0, "ACB submission is split at the end of ring buffer"); + if (next_offs_dw < offs_dw && next_offs_dw != 0) { + // For cases if a submission is split at the end of the ring buffer, we need to submit it in + // two parts to handle the wrap + liverpool->SubmitAsc(gnm_vqid, {reinterpret_cast(asc_queue.map_addr) + offs_dw, + asc_queue.ring_size_dw - offs_dw}); + offs_dw = 0; } const auto* acb_ptr = reinterpret_cast(asc_queue.map_addr) + offs_dw; From 9aabe98e4eb548b12ddbaeb6d60c81e14caf7dae Mon Sep 17 00:00:00 2001 From: DemoJameson Date: Sat, 28 Dec 2024 18:49:44 +0800 Subject: [PATCH 05/45] Fix a translation not working (#1947) --- src/qt_gui/gui_context_menus.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/qt_gui/gui_context_menus.h b/src/qt_gui/gui_context_menus.h index 6c96ab37f..bbc84c4fc 100644 --- a/src/qt_gui/gui_context_menus.h +++ b/src/qt_gui/gui_context_menus.h @@ -347,9 +347,8 @@ public: if (selected == deleteUpdate) { if (!std::filesystem::exists(Common::FS::PathFromQString(game_update_path))) { - QMessageBox::critical( - nullptr, tr("Error"), - QString(tr("This game has no separate update to delete!"))); + QMessageBox::critical(nullptr, tr("Error"), + QString(tr("This game has no update to delete!"))); error = true; } else { folder_path = game_update_path; From 55f78a0b94b508cc51ae23bd49079b680ac1a574 Mon Sep 17 00:00:00 2001 From: mailwl Date: Sat, 28 Dec 2024 13:58:37 +0300 Subject: [PATCH 06/45] libraries: Add libSceMove HLE skeleton (#1945) * libraries: Add libSceMove HLE skeleton * Fix clang-format issue --- CMakeLists.txt | 2 ++ src/common/logging/filter.cpp | 1 + src/common/logging/types.h | 3 ++- src/core/libraries/libs.cpp | 2 ++ src/core/libraries/move/move.cpp | 44 ++++++++++++++++++++++++++++++++ src/core/libraries/move/move.h | 21 +++++++++++++++ 6 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 src/core/libraries/move/move.cpp create mode 100644 src/core/libraries/move/move.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 43e8d7cab..cd3894719 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -425,6 +425,8 @@ set(NP_LIBS src/core/libraries/np_manager/np_manager.cpp set(MISC_LIBS src/core/libraries/screenshot/screenshot.cpp src/core/libraries/screenshot/screenshot.h + src/core/libraries/move/move.cpp + src/core/libraries/move/move.h ) set(DEV_TOOLS src/core/devtools/layer.cpp diff --git a/src/common/logging/filter.cpp b/src/common/logging/filter.cpp index 75c61a188..a2fd2c0a4 100644 --- a/src/common/logging/filter.cpp +++ b/src/common/logging/filter.cpp @@ -97,6 +97,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) { SUB(Lib, Http) \ SUB(Lib, Ssl) \ SUB(Lib, SysModule) \ + SUB(Lib, Move) \ SUB(Lib, NpManager) \ SUB(Lib, NpScore) \ SUB(Lib, NpTrophy) \ diff --git a/src/common/logging/types.h b/src/common/logging/types.h index a0e7d021f..5b496d175 100644 --- a/src/common/logging/types.h +++ b/src/common/logging/types.h @@ -57,8 +57,9 @@ enum class Class : u8 { Lib_MsgDlg, ///< The LibSceMsgDialog implementation. Lib_AudioOut, ///< The LibSceAudioOut implementation. Lib_AudioIn, ///< The LibSceAudioIn implementation. + Lib_Move, ///< The LibSceMove implementation. Lib_Net, ///< The LibSceNet implementation. - Lib_NetCtl, ///< The LibSecNetCtl implementation. + Lib_NetCtl, ///< The LibSceNetCtl implementation. Lib_SaveData, ///< The LibSceSaveData implementation. Lib_SaveDataDialog, ///< The LibSceSaveDataDialog implementation. Lib_Ssl, ///< The LibSceSsl implementation. diff --git a/src/core/libraries/libs.cpp b/src/core/libraries/libs.cpp index 66cdd5b87..c30c2d7c3 100644 --- a/src/core/libraries/libs.cpp +++ b/src/core/libraries/libs.cpp @@ -18,6 +18,7 @@ #include "core/libraries/libc_internal/libc_internal.h" #include "core/libraries/libpng/pngdec.h" #include "core/libraries/libs.h" +#include "core/libraries/move/move.h" #include "core/libraries/network/http.h" #include "core/libraries/network/net.h" #include "core/libraries/network/netctl.h" @@ -91,6 +92,7 @@ void InitHLELibs(Core::Loader::SymbolsResolver* sym) { Libraries::Remoteplay::RegisterlibSceRemoteplay(sym); Libraries::Videodec::RegisterlibSceVideodec(sym); Libraries::RazorCpu::RegisterlibSceRazorCpu(sym); + Libraries::Move::RegisterlibSceMove(sym); } } // namespace Libraries diff --git a/src/core/libraries/move/move.cpp b/src/core/libraries/move/move.cpp new file mode 100644 index 000000000..626fed9b4 --- /dev/null +++ b/src/core/libraries/move/move.cpp @@ -0,0 +1,44 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/logging/log.h" +#include "core/libraries/error_codes.h" +#include "core/libraries/libs.h" +#include "move.h" + +namespace Libraries::Move { + +int PS4_SYSV_ABI sceMoveOpen() { + LOG_ERROR(Lib_Move, "(STUBBED) called"); + return ORBIS_FAIL; +} + +int PS4_SYSV_ABI sceMoveGetDeviceInfo() { + LOG_ERROR(Lib_Move, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceMoveReadStateRecent() { + LOG_TRACE(Lib_Move, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceMoveTerm() { + LOG_ERROR(Lib_Move, "(STUBBED) called"); + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceMoveInit() { + LOG_ERROR(Lib_Move, "(STUBBED) called"); + return ORBIS_OK; +} + +void RegisterlibSceMove(Core::Loader::SymbolsResolver* sym) { + LIB_FUNCTION("HzC60MfjJxU", "libSceMove", 1, "libSceMove", 1, 1, sceMoveOpen); + LIB_FUNCTION("GWXTyxs4QbE", "libSceMove", 1, "libSceMove", 1, 1, sceMoveGetDeviceInfo); + LIB_FUNCTION("f2bcpK6kJfg", "libSceMove", 1, "libSceMove", 1, 1, sceMoveReadStateRecent); + LIB_FUNCTION("tsZi60H4ypY", "libSceMove", 1, "libSceMove", 1, 1, sceMoveTerm); + LIB_FUNCTION("j1ITE-EoJmE", "libSceMove", 1, "libSceMove", 1, 1, sceMoveInit); +}; + +} // namespace Libraries::Move \ No newline at end of file diff --git a/src/core/libraries/move/move.h b/src/core/libraries/move/move.h new file mode 100644 index 000000000..2d7adaba7 --- /dev/null +++ b/src/core/libraries/move/move.h @@ -0,0 +1,21 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/types.h" + +namespace Core::Loader { +class SymbolsResolver; +} + +namespace Libraries::Move { + +int PS4_SYSV_ABI sceMoveOpen(); +int PS4_SYSV_ABI sceMoveGetDeviceInfo(); +int PS4_SYSV_ABI sceMoveReadStateRecent(); +int PS4_SYSV_ABI sceMoveTerm(); +int PS4_SYSV_ABI sceMoveInit(); + +void RegisterlibSceMove(Core::Loader::SymbolsResolver* sym); +} // namespace Libraries::Move \ No newline at end of file From 817a62468e9ecf041ba106cab5b1943127057fff Mon Sep 17 00:00:00 2001 From: polybiusproxy <47796739+polybiusproxy@users.noreply.github.com> Date: Sat, 28 Dec 2024 12:03:00 +0100 Subject: [PATCH 07/45] core: better memory configuration (#1896) --- src/core/address_space.cpp | 2 +- src/core/libraries/kernel/memory.h | 12 +++---- src/core/libraries/np_trophy/np_trophy.cpp | 7 ++-- src/core/linker.cpp | 41 ++++++++++++++++------ src/core/linker.h | 3 +- src/core/memory.cpp | 20 ++++++----- src/core/memory.h | 2 +- 7 files changed, 55 insertions(+), 32 deletions(-) diff --git a/src/core/address_space.cpp b/src/core/address_space.cpp index 24f5e9f87..2b7331cbd 100644 --- a/src/core/address_space.cpp +++ b/src/core/address_space.cpp @@ -26,7 +26,7 @@ asm(".zerofill GUEST_SYSTEM,GUEST_SYSTEM,__guest_system,0xFBFC00000"); namespace Core { -static constexpr size_t BackingSize = SCE_KERNEL_MAIN_DMEM_SIZE_PRO; +static constexpr size_t BackingSize = SCE_KERNEL_TOTAL_MEM_PRO; #ifdef _WIN32 diff --git a/src/core/libraries/kernel/memory.h b/src/core/libraries/kernel/memory.h index 2d19ceb49..400b6c3fc 100644 --- a/src/core/libraries/kernel/memory.h +++ b/src/core/libraries/kernel/memory.h @@ -6,9 +6,11 @@ #include "common/bit_field.h" #include "common/types.h" -constexpr u64 SCE_KERNEL_MAIN_DMEM_SIZE = 5056_MB; // ~ 5GB -// TODO: Confirm this value on hardware. -constexpr u64 SCE_KERNEL_MAIN_DMEM_SIZE_PRO = 5568_MB; // ~ 5.5GB +constexpr u64 SCE_KERNEL_TOTAL_MEM = 5248_MB; +constexpr u64 SCE_KERNEL_TOTAL_MEM_PRO = 5888_MB; + +constexpr u64 SCE_FLEXIBLE_MEMORY_BASE = 64_MB; +constexpr u64 SCE_FLEXIBLE_MEMORY_SIZE = 512_MB; namespace Core::Loader { class SymbolsResolver; @@ -129,10 +131,6 @@ s32 PS4_SYSV_ABI sceKernelMemoryPoolDecommit(void* addr, size_t len, int flags); int PS4_SYSV_ABI sceKernelMunmap(void* addr, size_t len); -void* Malloc(size_t size); - -void Free(void* ptr); - void RegisterMemory(Core::Loader::SymbolsResolver* sym); } // namespace Libraries::Kernel diff --git a/src/core/libraries/np_trophy/np_trophy.cpp b/src/core/libraries/np_trophy/np_trophy.cpp index 9324ed6bb..ccd8ab710 100644 --- a/src/core/libraries/np_trophy/np_trophy.cpp +++ b/src/core/libraries/np_trophy/np_trophy.cpp @@ -498,7 +498,7 @@ int PS4_SYSV_ABI sceNpTrophyGetTrophyInfo(OrbisNpTrophyContext context, OrbisNpT s32 PS4_SYSV_ABI sceNpTrophyGetTrophyUnlockState(OrbisNpTrophyContext context, OrbisNpTrophyHandle handle, OrbisNpTrophyFlagArray* flags, u32* count) { - LOG_INFO(Lib_NpTrophy, "GetTrophyUnlockState called"); + LOG_INFO(Lib_NpTrophy, "called"); if (context == ORBIS_NP_TROPHY_INVALID_CONTEXT) return ORBIS_NP_TROPHY_ERROR_INVALID_CONTEXT; @@ -519,8 +519,9 @@ s32 PS4_SYSV_ABI sceNpTrophyGetTrophyUnlockState(OrbisNpTrophyContext context, pugi::xml_parse_result result = doc.load_file(trophy_file.native().c_str()); if (!result) { - LOG_ERROR(Lib_NpTrophy, "Failed to open trophy xml : {}", result.description()); - return -1; + LOG_ERROR(Lib_NpTrophy, "Failed to open trophy XML: {}", result.description()); + *count = 0; + return ORBIS_OK; } int num_trophies = 0; diff --git a/src/core/linker.cpp b/src/core/linker.cpp index 9cf4198ae..28d2eea7b 100644 --- a/src/core/linker.cpp +++ b/src/core/linker.cpp @@ -5,6 +5,7 @@ #include "common/arch.h" #include "common/assert.h" #include "common/config.h" +#include "common/elf_info.h" #include "common/logging/log.h" #include "common/path_util.h" #include "common/string_util.h" @@ -65,21 +66,41 @@ void Linker::Execute() { Relocate(m.get()); } - // Configure used flexible memory size. - if (const auto* proc_param = GetProcParam()) { - if (proc_param->size >= - offsetof(OrbisProcParam, mem_param) + sizeof(OrbisKernelMemParam*)) { - if (const auto* mem_param = proc_param->mem_param) { - if (mem_param->size >= - offsetof(OrbisKernelMemParam, flexible_memory_size) + sizeof(u64*)) { - if (const auto* flexible_size = mem_param->flexible_memory_size) { - memory->SetupMemoryRegions(*flexible_size); - } + // Configure the direct and flexible memory regions. + u64 fmem_size = SCE_FLEXIBLE_MEMORY_SIZE; + bool use_extended_mem1 = true, use_extended_mem2 = true; + + const auto* proc_param = GetProcParam(); + ASSERT(proc_param); + + Core::OrbisKernelMemParam mem_param{}; + if (proc_param->size >= offsetof(OrbisProcParam, mem_param) + sizeof(OrbisKernelMemParam*)) { + if (proc_param->mem_param) { + mem_param = *proc_param->mem_param; + if (mem_param.size >= + offsetof(OrbisKernelMemParam, flexible_memory_size) + sizeof(u64*)) { + if (const auto* flexible_size = mem_param.flexible_memory_size) { + fmem_size = *flexible_size + SCE_FLEXIBLE_MEMORY_BASE; } } } } + if (mem_param.size < offsetof(OrbisKernelMemParam, extended_memory_1) + sizeof(u64*)) { + mem_param.extended_memory_1 = nullptr; + } + if (mem_param.size < offsetof(OrbisKernelMemParam, extended_memory_2) + sizeof(u64*)) { + mem_param.extended_memory_2 = nullptr; + } + + const u64 sdk_ver = proc_param->sdk_version; + if (sdk_ver < Common::ElfInfo::FW_50) { + use_extended_mem1 = mem_param.extended_memory_1 ? *mem_param.extended_memory_1 : false; + use_extended_mem2 = mem_param.extended_memory_2 ? *mem_param.extended_memory_2 : false; + } + + memory->SetupMemoryRegions(fmem_size, use_extended_mem1, use_extended_mem2); + main_thread.Run([this, module](std::stop_token) { Common::SetCurrentThreadName("GAME_MainThread"); LoadSharedLibraries(); diff --git a/src/core/linker.h b/src/core/linker.h index d6b5d648a..7ef13ae56 100644 --- a/src/core/linker.h +++ b/src/core/linker.h @@ -22,8 +22,9 @@ struct OrbisKernelMemParam { u8* extended_memory_1; u64* extended_gpu_page_table; u8* extended_memory_2; - u64* exnteded_cpu_page_table; + u64* extended_cpu_page_table; }; +static_assert(sizeof(OrbisKernelMemParam) == 0x38); struct OrbisProcParam { u64 size; diff --git a/src/core/memory.cpp b/src/core/memory.cpp index aa116fa3d..0a69ad773 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -12,12 +12,7 @@ namespace Core { -constexpr u64 SCE_DEFAULT_FLEXIBLE_MEMORY_SIZE = 448_MB; - MemoryManager::MemoryManager() { - // Set up the direct and flexible memory regions. - SetupMemoryRegions(SCE_DEFAULT_FLEXIBLE_MEMORY_SIZE); - // Insert a virtual memory area that covers the entire area we manage. const VAddr system_managed_base = impl.SystemManagedVirtualBase(); const size_t system_managed_size = impl.SystemManagedVirtualSize(); @@ -38,10 +33,17 @@ MemoryManager::MemoryManager() { MemoryManager::~MemoryManager() = default; -void MemoryManager::SetupMemoryRegions(u64 flexible_size) { - const auto total_size = - Config::isNeoMode() ? SCE_KERNEL_MAIN_DMEM_SIZE_PRO : SCE_KERNEL_MAIN_DMEM_SIZE; - total_flexible_size = flexible_size; +void MemoryManager::SetupMemoryRegions(u64 flexible_size, bool use_extended_mem1, + bool use_extended_mem2) { + const bool is_neo = Config::isNeoMode(); + auto total_size = is_neo ? SCE_KERNEL_TOTAL_MEM_PRO : SCE_KERNEL_TOTAL_MEM; + if (!use_extended_mem1 && is_neo) { + total_size -= 256_MB; + } + if (!use_extended_mem2 && !is_neo) { + total_size -= 128_MB; + } + total_flexible_size = flexible_size - SCE_FLEXIBLE_MEMORY_BASE; total_direct_size = total_size - flexible_size; // Insert an area that covers direct memory physical block. diff --git a/src/core/memory.h b/src/core/memory.h index a9f2df322..615ecc3eb 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -166,7 +166,7 @@ public: bool TryWriteBacking(void* address, const void* data, u32 num_bytes); - void SetupMemoryRegions(u64 flexible_size); + void SetupMemoryRegions(u64 flexible_size, bool use_extended_mem1, bool use_extended_mem2); PAddr PoolExpand(PAddr search_start, PAddr search_end, size_t size, u64 alignment); From fc2fd9fd3cd4ce584235590772d15904e0c1b9a4 Mon Sep 17 00:00:00 2001 From: georgemoralis Date: Sat, 28 Dec 2024 13:05:53 +0200 Subject: [PATCH 08/45] Update README.md with latest (not quite) addition to dev team --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 7ba13ad47..e68fb86f7 100644 --- a/README.md +++ b/README.md @@ -121,6 +121,10 @@ R3 | M | | - [**skmp**](https://github.com/skmp) - [**wheremyfoodat**](https://github.com/wheremyfoodat) - [**raziel1000**](https://github.com/raziel1000) +- [**viniciuslrangel**](https://github.com/viniciuslrangel) +- [**Roamic**](https://github.com/vladmikhalin) +- [**Poly**](https://github.com/polybiusproxy) +- [**squidbus**](https://github.com/squidbus) Logo is done by [**Xphalnos**](https://github.com/Xphalnos) From c8b704e4a364ce64d2f50cda2c7a240fec2f9aa1 Mon Sep 17 00:00:00 2001 From: georgemoralis Date: Sat, 28 Dec 2024 13:06:51 +0200 Subject: [PATCH 09/45] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e68fb86f7..4fad47c3d 100644 --- a/README.md +++ b/README.md @@ -122,8 +122,8 @@ R3 | M | | - [**wheremyfoodat**](https://github.com/wheremyfoodat) - [**raziel1000**](https://github.com/raziel1000) - [**viniciuslrangel**](https://github.com/viniciuslrangel) -- [**Roamic**](https://github.com/vladmikhalin) -- [**Poly**](https://github.com/polybiusproxy) +- [**roamic**](https://github.com/vladmikhalin) +- [**poly**](https://github.com/polybiusproxy) - [**squidbus**](https://github.com/squidbus) Logo is done by [**Xphalnos**](https://github.com/Xphalnos) From f0352d2f7db0d64fec53fcb7214de9438285c6cb Mon Sep 17 00:00:00 2001 From: georgemoralis Date: Sat, 28 Dec 2024 13:16:25 +0200 Subject: [PATCH 10/45] Updated dev team (sorry frodo , the ring is yours) --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 4fad47c3d..fd40d2d63 100644 --- a/README.md +++ b/README.md @@ -125,6 +125,7 @@ R3 | M | | - [**roamic**](https://github.com/vladmikhalin) - [**poly**](https://github.com/polybiusproxy) - [**squidbus**](https://github.com/squidbus) +- [**frodo**](https://github.com/baggins183) Logo is done by [**Xphalnos**](https://github.com/Xphalnos) From 99e1e028c056de16966913d64efac7e7975df40f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quang=20Ng=C3=B4?= Date: Sat, 28 Dec 2024 18:18:56 +0700 Subject: [PATCH 11/45] texture_cache: Don't read max ansio value if not aniso filter (#1942) Fix Sonic Forces. --- src/video_core/amdgpu/resource.h | 2 +- src/video_core/texture_cache/sampler.cpp | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index 58b286e9c..6bbe1fb7e 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -447,7 +447,7 @@ struct Sampler { } float MaxAniso() const { - switch (max_aniso) { + switch (max_aniso.Value()) { case AnisoRatio::One: return 1.0f; case AnisoRatio::Two: diff --git a/src/video_core/texture_cache/sampler.cpp b/src/video_core/texture_cache/sampler.cpp index 5ce10ff0e..8dbdd2912 100644 --- a/src/video_core/texture_cache/sampler.cpp +++ b/src/video_core/texture_cache/sampler.cpp @@ -18,7 +18,8 @@ Sampler::Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sample (AmdGpu::IsAnisoFilter(sampler.xy_mag_filter) || AmdGpu::IsAnisoFilter(sampler.xy_min_filter)); const float maxAnisotropy = - std::clamp(sampler.MaxAniso(), 1.0f, instance.MaxSamplerAnisotropy()); + anisotropyEnable ? std::clamp(sampler.MaxAniso(), 1.0f, instance.MaxSamplerAnisotropy()) + : 1.0f; const vk::SamplerCreateInfo sampler_ci = { .magFilter = LiverpoolToVK::Filter(sampler.xy_mag_filter), .minFilter = LiverpoolToVK::Filter(sampler.xy_min_filter), From 668d5f65dcb1442ff14a5498a242231f36637b69 Mon Sep 17 00:00:00 2001 From: DemoJameson Date: Sat, 28 Dec 2024 19:19:16 +0800 Subject: [PATCH 12/45] Update zh_CN translation (#1946) --- src/qt_gui/translations/zh_CN.ts | 232 +++++++++++++++---------------- 1 file changed, 116 insertions(+), 116 deletions(-) diff --git a/src/qt_gui/translations/zh_CN.ts b/src/qt_gui/translations/zh_CN.ts index 4ceb91315..32b838fac 100644 --- a/src/qt_gui/translations/zh_CN.ts +++ b/src/qt_gui/translations/zh_CN.ts @@ -2,7 +2,7 @@ + SPDX-License-Identifier: GPL-2.0-or-later --> AboutDialog @@ -18,7 +18,7 @@ shadPS4 is an experimental open-source emulator for the PlayStation 4. - shadPS4 是一款实验性质的开源 PlayStation 4模拟器软件。 + shadPS4 是一款实验性质的开源 PlayStation 4 模拟器软件。 @@ -103,7 +103,7 @@ Cheats / Patches - 作弊码 / 补丁 + 作弊码/补丁 @@ -113,7 +113,7 @@ Trophy Viewer - Trophy 查看器 + 奖杯查看器 @@ -128,7 +128,7 @@ Open Save Data Folder - 打开保存数据文件夹 + 打开存档数据文件夹 @@ -173,27 +173,27 @@ Delete DLC - 删除DLC + 删除 DLC Compatibility... - Compatibility... + 兼容性... Update database - Update database + 更新数据库 View report - View report + 查看报告 Submit a report - Submit a report + 提交报告 @@ -203,7 +203,7 @@ Shortcut created successfully! - 创建快捷方式成功! + 创建快捷方式成功! @@ -213,7 +213,7 @@ Error creating shortcut! - 创建快捷方式出错! + 创建快捷方式出错! @@ -228,7 +228,7 @@ requiresEnableSeparateUpdateFolder_MSG - 这个功能需要‘启用单独的更新目录’配置选项才能正常运行,如果你想要使用这个功能,请启用它。 + 这个功能需要“启用单独的更新目录”配置选项才能正常运行,如果你想要使用这个功能,请启用它。 @@ -243,7 +243,7 @@ This game has no DLC to delete! - 这个游戏没有DLC可以删除! + 这个游戏没有 DLC 可以删除! @@ -258,7 +258,7 @@ Are you sure you want to delete %1's %2 directory? - 你确定要删除 %1 的 %2 目录? + 你确定要删除 %1 的%2目录? @@ -266,7 +266,7 @@ Open/Add Elf Folder - 打开/添加Elf文件夹 + 打开/添加 Elf 文件夹 @@ -316,7 +316,7 @@ Exit the application. - 退出应用程序. + 退出应用程序。 @@ -341,12 +341,12 @@ Medium - 中等 + Large - 巨大 + @@ -376,7 +376,7 @@ Dump Game List - 转储游戏列表 + 导出游戏列表 @@ -472,7 +472,7 @@ Trophy Viewer - Trophy 查看器 + 奖杯查看器 @@ -485,7 +485,7 @@ General - 通用 + 常规 @@ -520,12 +520,12 @@ Show Splash - 显示Splash + 显示启动画面 Is PS4 Pro - 是否是 PS4 Pro + 模拟 PS4 Pro @@ -570,17 +570,17 @@ Hide Cursor Idle Timeout - 光标空闲超时隐藏 + 光标隐藏闲置时长 s - s + Controller - 控制器 + 手柄 @@ -595,7 +595,7 @@ Graphics Device - 图像设备 + 图形设备 @@ -700,7 +700,7 @@ Disable Trophy Pop-ups - Disable Trophy Pop-ups + 禁止弹出奖杯 @@ -710,22 +710,22 @@ Update Compatibility Database On Startup - Update Compatibility Database On Startup + 启动时更新兼容性数据库 Game Compatibility - Game Compatibility + 游戏兼容性 Display Compatibility Data - Display Compatibility Data + 显示兼容性数据 Update Compatibility Database - Update Compatibility Database + 更新兼容性数据库 @@ -735,7 +735,7 @@ Audio Backend - Audio Backend + 音频后端 @@ -778,12 +778,12 @@ All Patches available for all games have been downloaded. - 所有游戏的所有补丁都已下载。 + 所有游戏的可用补丁都已下载。 Games: - 游戏: + 游戏: @@ -798,7 +798,7 @@ Game Boot - 游戏启动 + 启动游戏 @@ -818,7 +818,7 @@ PKG and Game versions match: - PKG 和游戏版本匹配: + PKG 和游戏版本匹配: @@ -828,17 +828,17 @@ PKG Version %1 is older than installed version: - PKG 版本 %1 比已安装版本更旧: + PKG 版本 %1 比已安装版本更旧: Game is installed: - 游戏已安装: + 游戏已安装: Would you like to install Patch: - 您想安装补丁吗: + 您想安装补丁吗: @@ -848,12 +848,12 @@ Would you like to install DLC: %1? - 您想安装 DLC: %1 吗? + 您想安装 DLC:%1 吗? DLC already installed: - DLC 已经安装: + DLC 已经安装: @@ -896,42 +896,42 @@ Cheats / Patches for - Cheats / Patches for + 作弊码/补丁: defaultTextEdit_MSG - 作弊/补丁是实验性的。\n请小心使用。\n\n通过选择存储库并点击下载按钮,单独下载作弊程序。\n在“补丁”选项卡中,您可以一次性下载所有补丁,选择要使用的补丁并保存选择。\n\n由于我们不开发作弊程序/补丁,\n请将问题报告给作弊程序的作者。\n\n创建了新的作弊程序?访问:\nhttps://github.com/shadps4-emu/ps4_cheats + 作弊码/补丁是实验性的。\n请小心使用。\n\n通过选择存储库并点击下载按钮,下载该游戏的作弊码。\n在“补丁”选项卡中,您可以一次性下载所有补丁,选择要使用的补丁并保存选择。\n\n由于我们不开发作弊码/补丁,\n请将问题报告给作弊码/补丁的作者。\n\n创建了新的作弊码/补丁?欢迎提交到我们的仓库:\nhttps://github.com/shadps4-emu/ps4_cheats No Image Available - 没有可用的图像 + 没有可用的图片 Serial: - 序列号: + 序列号: Version: - 版本: + 版本: Size: - 大小: + 大小: Select Cheat File: - 选择作弊码文件: + 选择作弊码文件: Repository: - 存储库: + 存储库: @@ -961,7 +961,7 @@ Select Patch File: - 选择补丁文件: + 选择补丁文件: @@ -1016,7 +1016,7 @@ Failed to parse XML: - 解析 XML 失败: + 解析 XML 失败: @@ -1046,17 +1046,17 @@ File already exists. Do you want to replace it? - 文件已存在。您要替换它吗? + 文件已存在,您要替换它吗? Failed to save file: - 保存文件失败: + 保存文件失败: Failed to download file: - 下载文件失败: + 下载文件失败: @@ -1076,17 +1076,17 @@ CheatsDownloadedSuccessfully_MSG - 您已成功下载了该游戏版本的作弊码 从所选存储库中。如果有,您还可以尝试从其他存储库下载,或通过从列表中选择文件来使用它们。 + 您已从所选存储库中成功下载了该游戏版本的作弊码。您还可以尝试从其他存储库下载,或通过从列表中选择文件来使用它们。 Failed to save: - 保存失败: + 保存失败: Failed to download: - 下载失败: + 下载失败: @@ -1096,7 +1096,7 @@ DownloadComplete_MSG - 补丁下载成功!所有可用的补丁已下载完成,无需像作弊码那样单独下载每个游戏的补丁。如果补丁没有出现,可能是该补丁不存在于特定的序列号和游戏版本中。 + 补丁下载成功!所有可用的补丁已下载完成,无需像作弊码那样单独下载每个游戏的补丁。如果补丁没有出现,可能是该补丁不适用于当前游戏的序列号和版本。 @@ -1111,12 +1111,12 @@ The game is in version: %1 - 游戏版本: %1 + 游戏版本:%1 The downloaded patch only works on version: %1 - 下载的补丁仅适用于版本: %1 + 下载的补丁仅适用于版本:%1 @@ -1131,12 +1131,12 @@ Failed to open file: - 无法打开文件: + 无法打开文件: XML ERROR: - XML 错误: + XML 错误: @@ -1146,12 +1146,12 @@ Author: - 作者: + 作者: Directory does not exist: - 目录不存在: + 目录不存在: @@ -1161,12 +1161,12 @@ Name: - 名称: + 名称: Can't apply cheats before the game is started - 在游戏开始之前无法应用作弊。 + 在游戏启动之前无法应用作弊码。 @@ -1199,17 +1199,17 @@ consoleLanguageGroupBox - 控制台语言:\n设置 PS4 游戏中使用的语言。\n建议设置为支持的语言,因为可能因地区而异。 + 主机语言:\n设置 PS4 游戏中使用的语言。\n建议设置为支持的语言,这将因地区而异。 emulatorLanguageGroupBox - 模拟器语言:\n设置模拟器用户界面的语言。 + 模拟器语言:\n设置模拟器用户界面的语言。 fullscreenCheckBox - 启用全屏模式:\n自动将游戏窗口设置为全屏模式。\n您可以按 F11 键禁用此选项。 + 启用全屏:\n以全屏模式启动游戏。\n您可以按 F11 键切换回窗口模式。 @@ -1219,77 +1219,77 @@ showSplashCheckBox - 显示启动画面:\n在游戏启动时显示游戏的启动画面(特殊图像)。 + 显示启动画面:\n在游戏启动时显示游戏的启动画面(特殊图像)。 ps4proCheckBox - 这是 PS4 Pro:\n使模拟器作为 PS4 PRO 运行,可以在支持的游戏中激活特殊功能。 + 模拟 PS4 Pro:\n使模拟器作为 PS4 Pro 运行,可以在支持的游戏中激活特殊功能。 discordRPCCheckbox - 启用 Discord Rich Presence:\n在您的 Discord 个人资料上显示仿真器图标和相关信息。 + 启用 Discord Rich Presence:\n在您的 Discord 个人资料上显示模拟器图标和相关信息。 userName - 用户名:\n设置 PS4 帐户的用户名。某些游戏中可能会显示此名称。 + 用户名:\n设置 PS4 帐户的用户名,某些游戏中可能会显示此名称。 logTypeGroupBox - 日志类型:\n设置是否同步日志窗口的输出以提高性能。这可能会对模拟产生负面影响。 + 日志类型:\n设置日志窗口输出的同步方式以提高性能。可能会对模拟产生不良影响。 logFilter - 日志过滤器:\n过滤日志,仅打印特定信息。\n例如:"Core:Trace" "Lib.Pad:Debug Common.Filesystem:Error" "*:Critical" 级别: Trace, Debug, Info, Warning, Error, Critical - 按此顺序,特定级别将静默列表中所有先前的级别,并记录所有后续级别。 + 日志过滤器:\n过滤日志,仅打印特定信息。\n例如:"Core:Trace" "Lib.Pad:Debug Common.Filesystem:Error" "*:Critical" 级别: Trace, Debug, Info, Warning, Error, Critical - 按此顺序,特定级别将静默列表中所有先前的级别,并记录所有后续级别。 updaterGroupBox - 更新:\nRelease: 官方版本,可能非常旧,并且每月发布,但更可靠且经过测试。\nNightly: 开发版本,包含所有最新功能和修复,但可能包含错误且不够稳定。 + 更新:\nRelease:每月发布的官方版本可能非常过时,但更可靠且经过测试。\nNightly:包含所有最新功能和修复的开发版本,但可能包含错误且稳定性较低。 GUIgroupBox - 播放标题音乐:\n如果游戏支持,在图形界面选择游戏时启用播放特殊音乐。 + 播放标题音乐:\n如果游戏支持,在图形界面选择游戏时播放特殊音乐。 disableTrophycheckBox - Disable Trophy Pop-ups:\nDisable in-game trophy notifications. Trophy progress can still be tracked using the Trophy Viewer (right-click the game in the main window). + 禁止弹出奖杯:\n禁用游戏内奖杯通知。可以在奖杯查看器中继续跟踪奖杯进度(在主窗口中右键点击游戏)。 hideCursorGroupBox - 隐藏光标:\n选择光标何时消失:\n从不: 您将始终看到鼠标。\n空闲: 设置光标在空闲后消失的时间。\n始终: 您将永远看不到鼠标。 + 隐藏光标:\n选择光标何时消失:\n从不: 始终显示光标。\闲置: 光标在闲置若干秒后消失。\n始终: 始终显示光标。 idleTimeoutGroupBox - 设置鼠标在空闲后消失的时间。 + 光标隐藏闲置时长:\n光标自动隐藏之前的闲置时长。 backButtonBehaviorGroupBox - 返回按钮行为:\n设置控制器的返回按钮以模拟在 PS4 触控板上指定位置的点击。 + 返回按钮行为:\n设置手柄的返回按钮模拟在 PS4 触控板上指定位置的点击。 enableCompatibilityCheckBox - Display Compatibility Data:\nDisplays game compatibility information in table view. Enable "Update Compatibility On Startup" to get up-to-date information. + 显示兼容性数据:\n在列表视图中显示游戏兼容性信息。启用“启动时更新兼容性数据库”以获取最新信息。 checkCompatibilityOnStartupCheckBox - Update Compatibility On Startup:\nAutomatically update the compatibility database when shadPS4 starts. + 启动时更新兼容性数据库:\n当 shadPS4 启动时自动更新兼容性数据库。 updateCompatibilityButton - Update Compatibility Database:\nImmediately update the compatibility database. + 更新兼容性数据库:\n立即更新兼容性数据库。 @@ -1299,7 +1299,7 @@ Idle - 空闲 + 闲置 @@ -1329,62 +1329,62 @@ graphicsAdapterGroupBox - 图形设备:\n在具有多个 GPU 的系统中,从下拉列表中选择要使用的 GPU,\n或者选择“自动检测”以自动确定。 + 图形设备:\n在具有多个 GPU 的系统中,从下拉列表中选择要使用的 GPU,\n或者选择“自动选择”由模拟器决定。 resolutionLayout - 宽度/高度:\n设置启动时模拟器的窗口大小,该大小可以在游戏中更改。\n这与游戏中的分辨率不同。 + 宽度/高度:\n设置启动游戏时的窗口大小,游戏过程中可以调整。\n这与游戏内的分辨率不同。 heightDivider - Vblank 除数:\n模拟器更新的帧速率乘以此数字。改变此项可能会导致游戏速度加快,或破坏游戏中不期望此变化的关键功能! + Vblank Divider:\n模拟器刷新的帧率会乘以此数字。改变此项可能会导致游戏速度加快,或破坏游戏中不期望此变化的关键功能! dumpShadersCheckBox - 启用着色器转储:\n为了技术调试,在渲染期间将游戏着色器保存到文件夹中。 + 启用着色器转储:\n用于技术调试,在渲染期间将游戏着色器保存到文件夹中。 nullGpuCheckBox - 启用空 GPU:\n为了技术调试,将游戏渲染禁用,仿佛没有图形卡。 + 启用 NULL GPU:\n用于技术调试,禁用游戏渲染,就像没有显卡一样。 gameFoldersBox - 游戏文件夹:\n检查已安装游戏的文件夹列表。 + 游戏文件夹:\n检查已安装游戏的文件夹列表。 addFolderButton - 添加:\n将文件夹添加到列表。 + 添加:\n将文件夹添加到列表。 removeFolderButton - 移除:\n从列表中移除文件夹。 + 移除:\n从列表中移除文件夹。 debugDump - 启用调试转储:\n将当前正在运行的 PS4 程序的导入和导出符号及文件头信息保存到目录中。 + 启用调试转储:\n将当前正在运行的 PS4 程序的导入和导出符号及文件头信息保存到目录中。 vkValidationCheckBox - 启用 Vulkan 验证层:\n启用验证 Vulkan 渲染器状态并记录内部状态信息的系统。这可能会降低性能,并可能更改模拟行为。 + 启用 Vulkan 验证层:\n启用一个系统来验证 Vulkan 渲染器的状态并记录其内部状态的信息。\n这将降低性能并可能改变模拟的行为。 vkSyncValidationCheckBox - 启用 Vulkan 同步验证:\n启用验证 Vulkan 渲染任务时间的系统。这可能会降低性能,并可能更改模拟行为。 + 启用 Vulkan 同步验证:\n启用一个系统来验证 Vulkan 渲染任务的时间。\n这将降低性能并可能改变模拟的行为。 rdocCheckBox - 启用 RenderDoc 调试:\n如果启用,模拟器将提供与 Renderdoc 的兼容性,允许在渲染过程中捕获和分析当前渲染的帧。 + 启用 RenderDoc 调试:\n启用后模拟器将提供与 Renderdoc 的兼容性,允许在渲染过程中捕获和分析当前渲染的帧。 @@ -1407,7 +1407,7 @@ Compatibility - Compatibility + 兼容性 @@ -1442,52 +1442,52 @@ Never Played - Never Played + 未玩过 h - h + 小时 m - m + 分钟 s - s + Compatibility is untested - Compatibility is untested + 兼容性未经测试 Game does not initialize properly / crashes the emulator - Game does not initialize properly / crashes the emulator + 游戏无法正确初始化/模拟器崩溃 Game boots, but only displays a blank screen - Game boots, but only displays a blank screen + 游戏启动,但只显示白屏 Game displays an image but does not go past the menu - Game displays an image but does not go past the menu + 游戏显示图像但无法通过菜单页面 Game has game-breaking glitches or unplayable performance - Game has game-breaking glitches or unplayable performance + 游戏有严重的 Bug 或太卡无法游玩 Game can be completed with playable performance and no major glitches - Game can be completed with playable performance and no major glitches + 游戏能在可玩的性能下完成且没有重大 Bug @@ -1525,7 +1525,7 @@ No download URL found for the specified asset. - 未找到指定资产的下载 URL。 + 未找到指定资源的下载地址。 @@ -1560,7 +1560,7 @@ Show Changelog - 显示变更日志 + 显示更新日志 @@ -1580,17 +1580,17 @@ Hide Changelog - 隐藏变更日志 + 隐藏更新日志 Changes - 变更 + 更新日志 Network error occurred while trying to access the URL - 尝试访问 URL 时发生网络错误 + 尝试访问网址时发生网络错误 @@ -1646,4 +1646,4 @@ TB - \ No newline at end of file + From 63d2d1ebe8467a58410e9eb82ff2ed5bafee0a4c Mon Sep 17 00:00:00 2001 From: jas0n098 Date: Sat, 28 Dec 2024 11:19:41 +0000 Subject: [PATCH 13/45] Handle RectList primitives in Geometry shaders (#1936) --- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 1 + src/shader_recompiler/backend/spirv/spirv_emit_context.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 900d40472..f0cf15af0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -28,6 +28,7 @@ static constexpr spv::ExecutionMode GetInputPrimitiveType(AmdGpu::PrimitiveType return spv::ExecutionMode::InputLines; case AmdGpu::PrimitiveType::TriangleList: case AmdGpu::PrimitiveType::TriangleStrip: + case AmdGpu::PrimitiveType::RectList: return spv::ExecutionMode::Triangles; case AmdGpu::PrimitiveType::AdjTriangleList: return spv::ExecutionMode::InputTrianglesAdjacency; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 281c487af..575bf91f7 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -47,6 +47,7 @@ static constexpr u32 NumVertices(AmdGpu::PrimitiveType type) { return 2u; case AmdGpu::PrimitiveType::TriangleList: case AmdGpu::PrimitiveType::TriangleStrip: + case AmdGpu::PrimitiveType::RectList: return 3u; case AmdGpu::PrimitiveType::AdjTriangleList: return 6u; From 8447d6ea1c6295b9e499f135a61cdb163a9c4b03 Mon Sep 17 00:00:00 2001 From: rainmakerv2 <30595646+rainmakerv3@users.noreply.github.com> Date: Sat, 28 Dec 2024 21:30:26 +0800 Subject: [PATCH 14/45] Remove PS4 pro mode from GUI, can still be edited in from config file (#1871) --- src/qt_gui/settings_dialog.cpp | 8 +------- src/qt_gui/settings_dialog.ui | 7 ------- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/src/qt_gui/settings_dialog.cpp b/src/qt_gui/settings_dialog.cpp index df802901a..6d4de6603 100644 --- a/src/qt_gui/settings_dialog.cpp +++ b/src/qt_gui/settings_dialog.cpp @@ -199,7 +199,6 @@ SettingsDialog::SettingsDialog(std::span physical_devices, ui->fullscreenCheckBox->installEventFilter(this); ui->separateUpdatesCheckBox->installEventFilter(this); ui->showSplashCheckBox->installEventFilter(this); - ui->ps4proCheckBox->installEventFilter(this); ui->discordRPCCheckbox->installEventFilter(this); ui->userName->installEventFilter(this); ui->logTypeGroupBox->installEventFilter(this); @@ -291,7 +290,6 @@ void SettingsDialog::LoadValuesFromConfig() { ui->separateUpdatesCheckBox->setChecked( toml::find_or(data, "General", "separateUpdateEnabled", false)); ui->showSplashCheckBox->setChecked(toml::find_or(data, "General", "showSplash", false)); - ui->ps4proCheckBox->setChecked(toml::find_or(data, "General", "isPS4Pro", false)); ui->logTypeComboBox->setCurrentText( QString::fromStdString(toml::find_or(data, "General", "logType", "async"))); ui->logFilterLineEdit->setText( @@ -408,8 +406,6 @@ void SettingsDialog::updateNoteTextEdit(const QString& elementName) { text = tr("separateUpdatesCheckBox"); } else if (elementName == "showSplashCheckBox") { text = tr("showSplashCheckBox"); - } else if (elementName == "ps4proCheckBox") { - text = tr("ps4proCheckBox"); } else if (elementName == "discordRPCCheckbox") { text = tr("discordRPCCheckbox"); } else if (elementName == "userName") { @@ -520,11 +516,9 @@ void SettingsDialog::UpdateSettings() { const QVector TouchPadIndex = {"left", "center", "right", "none"}; Config::setBackButtonBehavior(TouchPadIndex[ui->backButtonBehaviorComboBox->currentIndex()]); - Config::setNeoMode(ui->ps4proCheckBox->isChecked()); Config::setFullscreenMode(ui->fullscreenCheckBox->isChecked()); Config::setisTrophyPopupDisabled(ui->disableTrophycheckBox->isChecked()); Config::setPlayBGM(ui->playBGMCheckBox->isChecked()); - Config::setNeoMode(ui->ps4proCheckBox->isChecked()); Config::setLogType(ui->logTypeComboBox->currentText().toStdString()); Config::setLogFilter(ui->logFilterLineEdit->text().toStdString()); Config::setUserName(ui->userNameLineEdit->text().toStdString()); @@ -590,4 +584,4 @@ void SettingsDialog::ResetInstallFolders() { } Config::setGameInstallDirs(settings_install_dirs_config); } -} +} \ No newline at end of file diff --git a/src/qt_gui/settings_dialog.ui b/src/qt_gui/settings_dialog.ui index c18b70497..f2d6b77d2 100644 --- a/src/qt_gui/settings_dialog.ui +++ b/src/qt_gui/settings_dialog.ui @@ -228,13 +228,6 @@ - - - - Is PS4 Pro - - - From a8b4e14bf526b4983eab8cc544263ee3d28412f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quang=20Ng=C3=B4?= Date: Sat, 28 Dec 2024 21:35:12 +0700 Subject: [PATCH 15/45] Fix SDL version cannot launch game using game ID (#1650) * Fix SDL version cannot launch game using game ID Missing from https://github.com/shadps4-emu/shadPS4/pull/1507. * Added --add-game-folder argument Also added "treat the last argument as the game, if it isn't found already" option to the qt version --------- Co-authored-by: kalaposfos13 <153381648+kalaposfos13@users.noreply.github.com> --- src/main.cpp | 58 +++++++++++++++++++++++++++++++++++++++++---- src/qt_gui/main.cpp | 35 +++++++++++++++++++++++---- 2 files changed, 83 insertions(+), 10 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 17b5c11fe..bdbab89c9 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -4,11 +4,14 @@ #include "functional" #include "iostream" #include "string" +#include "system_error" #include "unordered_map" #include #include "common/config.h" #include "common/memory_patcher.h" +#include "common/path_util.h" +#include "core/file_sys/fs.h" #include "emulator.h" #ifdef _WIN32 @@ -20,6 +23,10 @@ int main(int argc, char* argv[]) { SetConsoleOutputCP(CP_UTF8); #endif + // Load configurations + const auto user_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); + Config::load(user_dir / "config.toml"); + bool has_game_argument = false; std::string game_path; @@ -33,6 +40,7 @@ int main(int argc, char* argv[]) { " -p, --patch Apply specified patch file\n" " -f, --fullscreen Specify window initial fullscreen " "state. Does not overwrite the config file.\n" + " --add-game-folder Adds a new game folder to the config.\n" " -h, --help Display this help message\n"; exit(0); }}, @@ -81,6 +89,25 @@ int main(int argc, char* argv[]) { Config::setFullscreenMode(is_fullscreen); }}, {"--fullscreen", [&](int& i) { arg_map["-f"](i); }}, + {"--add-game-folder", + [&](int& i) { + if (++i >= argc) { + std::cerr << "Error: Missing argument for --add-game-folder\n"; + exit(1); + } + std::string config_dir(argv[i]); + std::filesystem::path config_path = std::filesystem::path(config_dir); + std::error_code discard; + if (!std::filesystem::exists(config_path, discard)) { + std::cerr << "Error: File does not exist: " << config_path << "\n"; + exit(1); + } + + Config::addGameInstallDir(config_path); + Config::save(Common::FS::GetUserPath(Common::FS::PathType::UserDir) / "config.toml"); + std::cout << "Game folder successfully saved.\n"; + exit(0); + }}, }; if (argc == 1) { @@ -105,20 +132,41 @@ int main(int argc, char* argv[]) { } } + // If no game directory is set and no command line argument, prompt for it + if (Config::getGameInstallDirs().empty()) { + std::cout << "Warning: No game folder set, please set it by calling shadps4" + " with the --add-game-folder argument"; + } + if (!has_game_argument) { std::cerr << "Error: Please provide a game path or ID.\n"; exit(1); } // Check if the game path or ID exists - if (!std::filesystem::exists(game_path)) { - std::cerr << "Error: Game file not found\n"; - return -1; + std::filesystem::path eboot_path(game_path); + + // Check if the provided path is a valid file + if (!std::filesystem::exists(eboot_path)) { + // If not a file, treat it as a game ID and search in install directories + bool game_found = false; + for (const auto& install_dir : Config::getGameInstallDirs()) { + const auto candidate_path = install_dir / game_path / "eboot.bin"; + if (std::filesystem::exists(candidate_path)) { + eboot_path = candidate_path; + game_found = true; + break; + } + } + if (!game_found) { + std::cerr << "Error: Game ID or file path not found: " << game_path << std::endl; + return 1; + } } - // Run the emulator with the specified game + // Run the emulator with the resolved eboot path Core::Emulator emulator; - emulator.Run(game_path); + emulator.Run(eboot_path); return 0; } diff --git a/src/qt_gui/main.cpp b/src/qt_gui/main.cpp index 318245053..ac731fdce 100644 --- a/src/qt_gui/main.cpp +++ b/src/qt_gui/main.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "iostream" +#include "system_error" #include "unordered_map" #include "common/config.h" @@ -31,7 +32,7 @@ int main(int argc, char* argv[]) { bool has_command_line_argument = argc > 1; bool show_gui = false, has_game_argument = false; - std::string gamePath; + std::string game_path; // Map of argument strings to lambda functions std::unordered_map> arg_map = { @@ -46,6 +47,7 @@ int main(int argc, char* argv[]) { " -s, --show-gui Show the GUI\n" " -f, --fullscreen Specify window initial fullscreen " "state. Does not overwrite the config file.\n" + " --add-game-folder Adds a new game folder to the config.\n" " -h, --help Display this help message\n"; exit(0); }}, @@ -57,7 +59,7 @@ int main(int argc, char* argv[]) { {"-g", [&](int& i) { if (i + 1 < argc) { - gamePath = argv[++i]; + game_path = argv[++i]; has_game_argument = true; } else { std::cerr << "Error: Missing argument for -g/--game\n"; @@ -98,6 +100,25 @@ int main(int argc, char* argv[]) { Config::setFullscreenMode(is_fullscreen); }}, {"--fullscreen", [&](int& i) { arg_map["-f"](i); }}, + {"--add-game-folder", + [&](int& i) { + if (++i >= argc) { + std::cerr << "Error: Missing argument for --add-game-folder\n"; + exit(1); + } + std::string config_dir(argv[i]); + std::filesystem::path config_path = std::filesystem::path(config_dir); + std::error_code discard; + if (!std::filesystem::is_directory(config_path, discard)) { + std::cerr << "Error: Directory does not exist: " << config_path << "\n"; + exit(1); + } + + Config::addGameInstallDir(config_path); + Config::save(Common::FS::GetUserPath(Common::FS::PathType::UserDir) / "config.toml"); + std::cout << "Game folder successfully saved.\n"; + exit(0); + }}, }; // Parse command-line arguments using the map @@ -106,6 +127,10 @@ int main(int argc, char* argv[]) { auto it = arg_map.find(cur_arg); if (it != arg_map.end()) { it->second(i); // Call the associated lambda function + } else if (i == argc - 1 && !has_game_argument) { + // Assume the last argument is the game file if not specified via -g/--game + game_path = argv[i]; + has_game_argument = true; } else { std::cerr << "Unknown argument: " << cur_arg << ", see --help for info.\n"; return 1; @@ -134,14 +159,14 @@ int main(int argc, char* argv[]) { // Process game path or ID if provided if (has_game_argument) { - std::filesystem::path game_file_path(gamePath); + std::filesystem::path game_file_path(game_path); // Check if the provided path is a valid file if (!std::filesystem::exists(game_file_path)) { // If not a file, treat it as a game ID and search in install directories bool game_found = false; for (const auto& install_dir : Config::getGameInstallDirs()) { - auto potential_game_path = install_dir / gamePath / "eboot.bin"; + auto potential_game_path = install_dir / game_path / "eboot.bin"; if (std::filesystem::exists(potential_game_path)) { game_file_path = potential_game_path; game_found = true; @@ -149,7 +174,7 @@ int main(int argc, char* argv[]) { } } if (!game_found) { - std::cerr << "Error: Game ID or file path not found: " << gamePath << std::endl; + std::cerr << "Error: Game ID or file path not found: " << game_path << std::endl; return 1; } } From e8b0fdd6cce138e83043843ad3289cf73684dcd2 Mon Sep 17 00:00:00 2001 From: tomboylover93 <95257311+tomboylover93@users.noreply.github.com> Date: Sat, 28 Dec 2024 12:09:33 -0300 Subject: [PATCH 16/45] style: add Tokyo Night theme (#1811) * style: add Tokyo Night theme * clang-format: Update main_window_themes.h --- src/qt_gui/main_window.cpp | 14 ++++++++++++++ src/qt_gui/main_window_themes.cpp | 22 ++++++++++++++++++++++ src/qt_gui/main_window_themes.h | 2 +- src/qt_gui/main_window_ui.h | 6 ++++++ 4 files changed, 43 insertions(+), 1 deletion(-) diff --git a/src/qt_gui/main_window.cpp b/src/qt_gui/main_window.cpp index 724e008ae..bd3c27809 100644 --- a/src/qt_gui/main_window.cpp +++ b/src/qt_gui/main_window.cpp @@ -112,6 +112,7 @@ void MainWindow::CreateActions() { m_theme_act_group->addAction(ui->setThemeBlue); m_theme_act_group->addAction(ui->setThemeViolet); m_theme_act_group->addAction(ui->setThemeGruvbox); + m_theme_act_group->addAction(ui->setThemeTokyoNight); } void MainWindow::AddUiWidgets() { @@ -559,6 +560,14 @@ void MainWindow::CreateConnects() { isIconBlack = false; } }); + connect(ui->setThemeTokyoNight, &QAction::triggered, &m_window_themes, [this]() { + m_window_themes.SetWindowTheme(Theme::TokyoNight, ui->mw_searchbar); + Config::setMainWindowTheme(static_cast(Theme::TokyoNight)); + if (isIconBlack) { + SetUiIcons(false); + isIconBlack = false; + } + }); } void MainWindow::StartGame() { @@ -934,6 +943,11 @@ void MainWindow::SetLastUsedTheme() { isIconBlack = false; SetUiIcons(false); break; + case Theme::TokyoNight: + ui->setThemeTokyoNight->setChecked(true); + isIconBlack = false; + SetUiIcons(false); + break; } } diff --git a/src/qt_gui/main_window_themes.cpp b/src/qt_gui/main_window_themes.cpp index a52b4466e..5fffd4c9e 100644 --- a/src/qt_gui/main_window_themes.cpp +++ b/src/qt_gui/main_window_themes.cpp @@ -143,5 +143,27 @@ void WindowThemes::SetWindowTheme(Theme theme, QLineEdit* mw_searchbar) { themePalette.setColor(QPalette::HighlightedText, Qt::black); qApp->setPalette(themePalette); break; + case Theme::TokyoNight: + mw_searchbar->setStyleSheet( + "QLineEdit {" + "background-color: #1a1b26; color: #9d7cd8; border: 1px solid #9d7cd8; " + "border-radius: 4px; padding: 5px; }" + "QLineEdit:focus {" + "border: 1px solid #7aa2f7; }"); + themePalette.setColor(QPalette::Window, QColor(31, 35, 53)); + themePalette.setColor(QPalette::WindowText, QColor(192, 202, 245)); + themePalette.setColor(QPalette::Base, QColor(25, 28, 39)); + themePalette.setColor(QPalette::AlternateBase, QColor(36, 40, 59)); + themePalette.setColor(QPalette::ToolTipBase, QColor(192, 202, 245)); + themePalette.setColor(QPalette::ToolTipText, QColor(192, 202, 245)); + themePalette.setColor(QPalette::Text, QColor(192, 202, 245)); + themePalette.setColor(QPalette::Button, QColor(30, 30, 41)); + themePalette.setColor(QPalette::ButtonText, QColor(192, 202, 245)); + themePalette.setColor(QPalette::BrightText, QColor(197, 59, 83)); + themePalette.setColor(QPalette::Link, QColor(79, 214, 190)); + themePalette.setColor(QPalette::Highlight, QColor(79, 214, 190)); + themePalette.setColor(QPalette::HighlightedText, Qt::black); + qApp->setPalette(themePalette); + break; } } \ No newline at end of file diff --git a/src/qt_gui/main_window_themes.h b/src/qt_gui/main_window_themes.h index d162da87b..0ec2cce58 100644 --- a/src/qt_gui/main_window_themes.h +++ b/src/qt_gui/main_window_themes.h @@ -7,7 +7,7 @@ #include #include -enum class Theme : int { Dark, Light, Green, Blue, Violet, Gruvbox }; +enum class Theme : int { Dark, Light, Green, Blue, Violet, Gruvbox, TokyoNight }; class WindowThemes : public QObject { Q_OBJECT diff --git a/src/qt_gui/main_window_ui.h b/src/qt_gui/main_window_ui.h index df64361fd..0d5038d7e 100644 --- a/src/qt_gui/main_window_ui.h +++ b/src/qt_gui/main_window_ui.h @@ -37,6 +37,7 @@ public: QAction* setThemeBlue; QAction* setThemeViolet; QAction* setThemeGruvbox; + QAction* setThemeTokyoNight; QWidget* centralWidget; QLineEdit* mw_searchbar; QPushButton* playButton; @@ -162,6 +163,9 @@ public: setThemeGruvbox = new QAction(MainWindow); setThemeGruvbox->setObjectName("setThemeGruvbox"); setThemeGruvbox->setCheckable(true); + setThemeTokyoNight = new QAction(MainWindow); + setThemeTokyoNight->setObjectName("setThemeTokyoNight"); + setThemeTokyoNight->setCheckable(true); centralWidget = new QWidget(MainWindow); centralWidget->setObjectName("centralWidget"); sizePolicy.setHeightForWidth(centralWidget->sizePolicy().hasHeightForWidth()); @@ -287,6 +291,7 @@ public: menuThemes->addAction(setThemeBlue); menuThemes->addAction(setThemeViolet); menuThemes->addAction(setThemeGruvbox); + menuThemes->addAction(setThemeTokyoNight); menuGame_List_Icons->addAction(setIconSizeTinyAct); menuGame_List_Icons->addAction(setIconSizeSmallAct); menuGame_List_Icons->addAction(setIconSizeMediumAct); @@ -374,6 +379,7 @@ public: setThemeBlue->setText(QCoreApplication::translate("MainWindow", "Blue", nullptr)); setThemeViolet->setText(QCoreApplication::translate("MainWindow", "Violet", nullptr)); setThemeGruvbox->setText("Gruvbox"); + setThemeTokyoNight->setText("Tokyo Night"); toolBar->setWindowTitle(QCoreApplication::translate("MainWindow", "toolBar", nullptr)); } // retranslateUi }; From 851995d4440b65f34b81995c638ce73b4ee92489 Mon Sep 17 00:00:00 2001 From: polybiusproxy <47796739+polybiusproxy@users.noreply.github.com> Date: Sat, 28 Dec 2024 17:33:40 +0100 Subject: [PATCH 17/45] libraries/fiber: implement context switching (#1950) --- src/core/libraries/fiber/fiber.cpp | 98 +++++++++++++++++++++++++++--- src/core/libraries/fiber/fiber.h | 2 + 2 files changed, 91 insertions(+), 9 deletions(-) diff --git a/src/core/libraries/fiber/fiber.cpp b/src/core/libraries/fiber/fiber.cpp index 7bb81b61e..6d3f546f2 100644 --- a/src/core/libraries/fiber/fiber.cpp +++ b/src/core/libraries/fiber/fiber.cpp @@ -41,6 +41,39 @@ void PS4_SYSV_ABI _sceFiberCheckStackOverflow(OrbisFiberContext* ctx) { } } +s32 PS4_SYSV_ABI _sceFiberAttachContext(OrbisFiber* fiber, void* addr_context, u64 size_context) { + if (size_context && size_context < ORBIS_FIBER_CONTEXT_MINIMUM_SIZE) { + return ORBIS_FIBER_ERROR_RANGE; + } + if (size_context & 15) { + return ORBIS_FIBER_ERROR_INVALID; + } + if (!addr_context || !size_context) { + return ORBIS_FIBER_ERROR_INVALID; + } + if (fiber->addr_context) { + return ORBIS_FIBER_ERROR_INVALID; + } + + fiber->addr_context = addr_context; + fiber->size_context = size_context; + fiber->context_start = addr_context; + fiber->context_end = reinterpret_cast(addr_context) + size_context; + + /* Apply signature to start of stack */ + *(u64*)addr_context = kFiberStackSignature; + + if (fiber->flags & FiberFlags::ContextSizeCheck) { + u64* stack_start = reinterpret_cast(fiber->context_start); + u64* stack_end = reinterpret_cast(fiber->context_end); + + u64* stack_ptr = stack_start + 1; + while (stack_ptr < stack_end) { + *stack_ptr++ = kFiberStackSizeCheck; + } + } +} + void PS4_SYSV_ABI _sceFiberSwitchToFiber(OrbisFiber* fiber, u64 arg_on_run_to, OrbisFiberContext* ctx) { OrbisFiberContext* fiber_ctx = fiber->context; @@ -62,8 +95,7 @@ void PS4_SYSV_ABI _sceFiberSwitchToFiber(OrbisFiber* fiber, u64 arg_on_run_to, data.entry = fiber->entry; data.arg_on_initialize = fiber->arg_on_initialize; data.arg_on_run_to = arg_on_run_to; - data.stack_addr = - reinterpret_cast(reinterpret_cast(fiber->addr_context) + fiber->size_context); + data.stack_addr = reinterpret_cast(fiber->addr_context) + fiber->size_context; if (fiber->flags & FiberFlags::SetFpuRegs) { data.fpucw = 0x037f; data.mxcsr = 0x9fc0; @@ -169,8 +201,7 @@ s32 PS4_SYSV_ABI sceFiberInitialize(OrbisFiber* fiber, const char* name, OrbisFi if (addr_context != nullptr) { fiber->context_start = addr_context; - fiber->context_end = - reinterpret_cast(reinterpret_cast(addr_context) + size_context); + fiber->context_end = reinterpret_cast(addr_context) + size_context; /* Apply signature to start of stack */ *(u64*)addr_context = kFiberStackSignature; @@ -221,11 +252,12 @@ s32 PS4_SYSV_ABI sceFiberFinalize(OrbisFiber* fiber) { return ORBIS_OK; } -s32 PS4_SYSV_ABI sceFiberRun(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_return) { +s32 PS4_SYSV_ABI sceFiberRunImpl(OrbisFiber* fiber, void* addr_context, u64 size_context, + u64 arg_on_run_to, u64* arg_on_return) { if (!fiber) { return ORBIS_FIBER_ERROR_NULL; } - if ((u64)fiber & 7) { + if ((u64)fiber & 7 || (u64)addr_context & 15) { return ORBIS_FIBER_ERROR_ALIGNMENT; } if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) { @@ -237,6 +269,14 @@ s32 PS4_SYSV_ABI sceFiberRun(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_r return ORBIS_FIBER_ERROR_PERMISSION; } + /* Caller wants to attach context and run. */ + if (addr_context != nullptr || size_context != 0) { + s32 res = _sceFiberAttachContext(fiber, addr_context, size_context); + if (res < 0) { + return res; + } + } + FiberState expected = FiberState::Idle; if (!fiber->state.compare_exchange_strong(expected, FiberState::Run)) { return ORBIS_FIBER_ERROR_STATE; @@ -288,11 +328,12 @@ s32 PS4_SYSV_ABI sceFiberRun(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_r return ORBIS_OK; } -s32 PS4_SYSV_ABI sceFiberSwitch(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_run) { +s32 PS4_SYSV_ABI sceFiberSwitchImpl(OrbisFiber* fiber, void* addr_context, u64 size_context, + u64 arg_on_run_to, u64* arg_on_run) { if (!fiber) { return ORBIS_FIBER_ERROR_NULL; } - if ((u64)fiber & 7) { + if ((u64)fiber & 7 || (u64)addr_context & 15) { return ORBIS_FIBER_ERROR_ALIGNMENT; } if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) { @@ -304,6 +345,14 @@ s32 PS4_SYSV_ABI sceFiberSwitch(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_o return ORBIS_FIBER_ERROR_PERMISSION; } + /* Caller wants to attach context and switch. */ + if (addr_context != nullptr || size_context != 0) { + s32 res = _sceFiberAttachContext(fiber, addr_context, size_context); + if (res < 0) { + return res; + } + } + FiberState expected = FiberState::Idle; if (!fiber->state.compare_exchange_strong(expected, FiberState::Run)) { return ORBIS_FIBER_ERROR_STATE; @@ -462,9 +511,32 @@ s32 PS4_SYSV_ABI sceFiberRename(OrbisFiber* fiber, const char* name) { return ORBIS_OK; } +s32 PS4_SYSV_ABI sceFiberGetThreadFramePointerAddress(u64* addr_frame_pointer) { + if (!addr_frame_pointer) { + return ORBIS_FIBER_ERROR_NULL; + } + + OrbisFiberContext* g_ctx = GetFiberContext(); + if (!g_ctx) { + return ORBIS_FIBER_ERROR_PERMISSION; + } + + *addr_frame_pointer = g_ctx->rbp; + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceFiberRun(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_return) { + return sceFiberRunImpl(fiber, nullptr, 0, arg_on_run_to, arg_on_return); +} + +s32 PS4_SYSV_ABI sceFiberSwitch(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_run) { + return sceFiberSwitchImpl(fiber, nullptr, 0, arg_on_run_to, arg_on_run); +} + void RegisterlibSceFiber(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("hVYD7Ou2pCQ", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberInitialize); - LIB_FUNCTION("7+OJIpko9RY", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberInitialize); + LIB_FUNCTION("7+OJIpko9RY", "libSceFiber", 1, "libSceFiber", 1, 1, + sceFiberInitialize); // _sceFiberInitializeWithInternalOptionImpl LIB_FUNCTION("asjUJJ+aa8s", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberOptParamInitialize); LIB_FUNCTION("JeNX5F-NzQU", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberFinalize); @@ -473,12 +545,20 @@ void RegisterlibSceFiber(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("p+zLIOg27zU", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberGetSelf); LIB_FUNCTION("B0ZX2hx9DMw", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberReturnToThread); + LIB_FUNCTION("avfGJ94g36Q", "libSceFiber", 1, "libSceFiber", 1, 1, + sceFiberRunImpl); // _sceFiberAttachContextAndRun + LIB_FUNCTION("ZqhZFuzKT6U", "libSceFiber", 1, "libSceFiber", 1, 1, + sceFiberSwitchImpl); // _sceFiberAttachContextAndSwitch + LIB_FUNCTION("uq2Y5BFz0PE", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberGetInfo); LIB_FUNCTION("Lcqty+QNWFc", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberStartContextSizeCheck); LIB_FUNCTION("Kj4nXMpnM8Y", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberStopContextSizeCheck); LIB_FUNCTION("JzyT91ucGDc", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberRename); + + LIB_FUNCTION("0dy4JtMUcMQ", "libSceFiber", 1, "libSceFiber", 1, 1, + sceFiberGetThreadFramePointerAddress); } } // namespace Libraries::Fiber diff --git a/src/core/libraries/fiber/fiber.h b/src/core/libraries/fiber/fiber.h index 3c4e3b70e..edcd9afe8 100644 --- a/src/core/libraries/fiber/fiber.h +++ b/src/core/libraries/fiber/fiber.h @@ -114,5 +114,7 @@ s32 PS4_SYSV_ABI sceFiberStopContextSizeCheck(void); s32 PS4_SYSV_ABI sceFiberRename(OrbisFiber* fiber, const char* name); +s32 PS4_SYSV_ABI sceFiberGetThreadFramePointerAddress(u64* addr_frame_pointer); + void RegisterlibSceFiber(Core::Loader::SymbolsResolver* sym); } // namespace Libraries::Fiber \ No newline at end of file From ee974414d28b4da47fda57024987057c928e2872 Mon Sep 17 00:00:00 2001 From: polyproxy <47796739+polybiusproxy@users.noreply.github.com> Date: Sat, 28 Dec 2024 17:43:29 +0100 Subject: [PATCH 18/45] hotfix: fix fiber initialization --- src/common/elf_info.h | 1 + src/core/libraries/fiber/fiber.cpp | 29 ++++++++++++++++++++--------- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/src/common/elf_info.h b/src/common/elf_info.h index 5a2c914e0..6eb144e9a 100644 --- a/src/common/elf_info.h +++ b/src/common/elf_info.h @@ -34,6 +34,7 @@ public: static constexpr u32 FW_20 = 0x2000000; static constexpr u32 FW_25 = 0x2500000; static constexpr u32 FW_30 = 0x3000000; + static constexpr u32 FW_35 = 0x3500000; static constexpr u32 FW_40 = 0x4000000; static constexpr u32 FW_45 = 0x4500000; static constexpr u32 FW_50 = 0x5000000; diff --git a/src/core/libraries/fiber/fiber.cpp b/src/core/libraries/fiber/fiber.cpp index 6d3f546f2..b77b5b5b6 100644 --- a/src/core/libraries/fiber/fiber.cpp +++ b/src/core/libraries/fiber/fiber.cpp @@ -3,6 +3,7 @@ #include "fiber.h" +#include "common/elf_info.h" #include "common/logging/log.h" #include "core/libraries/fiber/fiber_error.h" #include "core/libraries/libs.h" @@ -72,6 +73,8 @@ s32 PS4_SYSV_ABI _sceFiberAttachContext(OrbisFiber* fiber, void* addr_context, u *stack_ptr++ = kFiberStackSizeCheck; } } + + return ORBIS_OK; } void PS4_SYSV_ABI _sceFiberSwitchToFiber(OrbisFiber* fiber, u64 arg_on_run_to, @@ -143,9 +146,10 @@ void PS4_SYSV_ABI _sceFiberTerminate(OrbisFiber* fiber, u64 arg_on_return, Orbis __builtin_trap(); } -s32 PS4_SYSV_ABI sceFiberInitialize(OrbisFiber* fiber, const char* name, OrbisFiberEntry entry, - u64 arg_on_initialize, void* addr_context, u64 size_context, - const OrbisFiberOptParam* opt_param, u32 build_ver) { +s32 PS4_SYSV_ABI sceFiberInitializeImpl(OrbisFiber* fiber, const char* name, OrbisFiberEntry entry, + u64 arg_on_initialize, void* addr_context, u64 size_context, + const OrbisFiberOptParam* opt_param, u32 flags, + u32 build_ver) { if (!fiber || !name || !entry) { return ORBIS_FIBER_ERROR_NULL; } @@ -171,12 +175,12 @@ s32 PS4_SYSV_ABI sceFiberInitialize(OrbisFiber* fiber, const char* name, OrbisFi return ORBIS_FIBER_ERROR_INVALID; } - u32 flags = FiberFlags::None; - if (build_ver >= 0x3500000) { - flags |= FiberFlags::SetFpuRegs; + u32 user_flags = flags; + if (build_ver >= Common::ElfInfo::FW_35) { + user_flags |= FiberFlags::SetFpuRegs; } if (context_size_check) { - flags |= FiberFlags::ContextSizeCheck; + user_flags |= FiberFlags::ContextSizeCheck; } strncpy(fiber->name, name, ORBIS_FIBER_MAX_NAME_LENGTH); @@ -186,7 +190,7 @@ s32 PS4_SYSV_ABI sceFiberInitialize(OrbisFiber* fiber, const char* name, OrbisFi fiber->addr_context = addr_context; fiber->size_context = size_context; fiber->context = nullptr; - fiber->flags = flags; + fiber->flags = user_flags; /* A low stack area is problematic, as we can easily @@ -525,6 +529,13 @@ s32 PS4_SYSV_ABI sceFiberGetThreadFramePointerAddress(u64* addr_frame_pointer) { return ORBIS_OK; } +s32 PS4_SYSV_ABI sceFiberInitialize(OrbisFiber* fiber, const char* name, OrbisFiberEntry entry, + u64 arg_on_initialize, void* addr_context, u64 size_context, + const OrbisFiberOptParam* opt_param, u32 build_ver) { + return sceFiberInitializeImpl(fiber, name, entry, arg_on_initialize, addr_context, size_context, + opt_param, 0, build_ver); +} + s32 PS4_SYSV_ABI sceFiberRun(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_return) { return sceFiberRunImpl(fiber, nullptr, 0, arg_on_run_to, arg_on_return); } @@ -536,7 +547,7 @@ s32 PS4_SYSV_ABI sceFiberSwitch(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_o void RegisterlibSceFiber(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("hVYD7Ou2pCQ", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberInitialize); LIB_FUNCTION("7+OJIpko9RY", "libSceFiber", 1, "libSceFiber", 1, 1, - sceFiberInitialize); // _sceFiberInitializeWithInternalOptionImpl + sceFiberInitializeImpl); // _sceFiberInitializeWithInternalOptionImpl LIB_FUNCTION("asjUJJ+aa8s", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberOptParamInitialize); LIB_FUNCTION("JeNX5F-NzQU", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberFinalize); From ab7e794f23881c4fb704ceedcaad6133fc187a7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quang=20Ng=C3=B4?= Date: Sun, 29 Dec 2024 17:35:52 +0700 Subject: [PATCH 19/45] sdl: Limit minimum window size (#1966) --- src/sdl_window.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sdl_window.cpp b/src/sdl_window.cpp index 4b13844b8..50c3e93ee 100644 --- a/src/sdl_window.cpp +++ b/src/sdl_window.cpp @@ -92,6 +92,7 @@ WindowSDL::WindowSDL(s32 width_, s32 height_, Input::GameController* controller_ UNREACHABLE_MSG("Failed to create window handle: {}", SDL_GetError()); } + SDL_SetWindowMinimumSize(window, 640, 360); SDL_SetWindowFullscreen(window, Config::isFullscreenMode()); SDL_InitSubSystem(SDL_INIT_GAMEPAD); From 468d7ea80edd5c370e9de67f3a3f3b5b03b3ddf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quang=20Ng=C3=B4?= Date: Sun, 29 Dec 2024 17:36:16 +0700 Subject: [PATCH 20/45] config: Don't load config in the Emulator class (#1965) Allows overriding of configs in frontends. Fix set fullscreen not working when specified in the CLI. --- src/emulator.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/emulator.cpp b/src/emulator.cpp index 10d17a2db..dbe21a141 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -45,10 +45,6 @@ Frontend::WindowSDL* g_window = nullptr; namespace Core { Emulator::Emulator() { - // Read configuration file. - const auto config_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); - Config::load(config_dir / "config.toml"); - // Initialize NT API functions and set high priority #ifdef _WIN32 Common::NtApi::Initialize(); From 202c1046a139a413f6b37265c89a9ebc6c24ca97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quang=20Ng=C3=B4?= Date: Sun, 29 Dec 2024 17:36:29 +0700 Subject: [PATCH 21/45] Fix loading RenderDoc in offline mode for Linux (#1968) --- src/video_core/renderdoc.cpp | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/video_core/renderdoc.cpp b/src/video_core/renderdoc.cpp index 7e0994992..b082fd1ca 100644 --- a/src/video_core/renderdoc.cpp +++ b/src/video_core/renderdoc.cpp @@ -65,11 +65,18 @@ void LoadRenderDoc() { #else static constexpr const char RENDERDOC_LIB[] = "librenderdoc.so"; #endif - if (void* mod = dlopen(RENDERDOC_LIB, RTLD_NOW | RTLD_NOLOAD)) { - const auto RENDERDOC_GetAPI = - reinterpret_cast(dlsym(mod, "RENDERDOC_GetAPI")); - const s32 ret = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_6_0, (void**)&rdoc_api); - ASSERT(ret == 1); + // Check if we are running by RDoc GUI + void* mod = dlopen(RENDERDOC_LIB, RTLD_NOW | RTLD_NOLOAD); + if (!mod && Config::isRdocEnabled()) { + // If enabled in config, try to load RDoc runtime in offline mode + if ((mod = dlopen(RENDERDOC_LIB, RTLD_NOW))) { + const auto RENDERDOC_GetAPI = + reinterpret_cast(dlsym(mod, "RENDERDOC_GetAPI")); + const s32 ret = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_6_0, (void**)&rdoc_api); + ASSERT(ret == 1); + } else { + LOG_ERROR(Render, "Cannot load RenderDoc: {}", dlerror()); + } } #endif if (rdoc_api) { From da9e45b5828a9a7c54b10b6d41e346cde0e2d535 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sun, 29 Dec 2024 02:36:41 -0800 Subject: [PATCH 22/45] build: Update MoltenVK and fix missing add_dependencies for copy. (#1970) * build: Fix missing add_dependencies for MoltenVK copy target. * externals: Update MoltenVK --- CMakeLists.txt | 1 + externals/MoltenVK/MoltenVK | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cd3894719..d63bd1951 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -917,6 +917,7 @@ if (APPLE) DEPENDS ${MVK_DYLIB_SRC} COMMAND cmake -E copy ${MVK_DYLIB_SRC} ${MVK_DYLIB_DST}) add_custom_target(CopyMoltenVK DEPENDS ${MVK_DYLIB_DST}) + add_dependencies(CopyMoltenVK MoltenVK) add_dependencies(shadps4 CopyMoltenVK) set_property(TARGET shadps4 APPEND PROPERTY BUILD_RPATH "@executable_path/../Frameworks") else() diff --git a/externals/MoltenVK/MoltenVK b/externals/MoltenVK/MoltenVK index 5ad3ee5d2..9f0b616d9 160000 --- a/externals/MoltenVK/MoltenVK +++ b/externals/MoltenVK/MoltenVK @@ -1 +1 @@ -Subproject commit 5ad3ee5d2f84342950c3fe93dec97719574d1932 +Subproject commit 9f0b616d9e2c39464d2a859b79dbc655c4a30e7e From 62c47cb1b74c22812c566e7a2aeb2968e7fcb999 Mon Sep 17 00:00:00 2001 From: baggins183 Date: Sun, 29 Dec 2024 02:37:15 -0800 Subject: [PATCH 23/45] recompiler: handle reads of output variables in hull shaders (#1962) * Handle output control point reads in hull shader. Might need additional barriers * output storage class --- .../spirv/emit_spirv_context_get_set.cpp | 15 +++++----- .../backend/spirv/emit_spirv_instructions.h | 2 ++ .../frontend/translate/vector_memory.cpp | 4 --- src/shader_recompiler/ir/ir_emitter.cpp | 6 ++++ src/shader_recompiler/ir/ir_emitter.h | 3 ++ src/shader_recompiler/ir/opcodes.inc | 2 ++ .../ir/passes/hull_shader_transform.cpp | 28 ++++++++++++------- 7 files changed, 38 insertions(+), 22 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index f3db6af56..4550440bb 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -217,14 +217,6 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) { const auto pointer{ ctx.OpAccessChain(component_ptr, ctx.tess_coord, ctx.ConstU32(component))}; return ctx.OpLoad(ctx.F32[1], pointer); - } else if (IR::IsParam(attr)) { - const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)}; - const auto param = ctx.input_params.at(param_id).id; - const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); - const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)}; - const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); - return ctx.OpLoad(ctx.F32[1], - ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); } UNREACHABLE(); } @@ -351,6 +343,13 @@ Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, vertex_index, attr_index, comp_index)); } +Id EmitReadTcsGenericOuputAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, + Id comp_index) { + const auto attr_comp_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]); + return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(attr_comp_ptr, ctx.output_attr_array, + vertex_index, attr_index, comp_index)); +} + void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index) { // Implied vertex index is invocation_id const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 85bed589b..d26cf6662 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -89,6 +89,8 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp); Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index); void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index); +Id EmitReadTcsGenericOuputAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, + Id comp_index); Id EmitGetPatch(EmitContext& ctx, IR::Patch patch); void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value); void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 7c3db9551..79d46cd42 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -255,10 +255,6 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst "Non immediate offset not supported"); } - if (info.stage == Stage::Hull) { - // printf("here\n"); // break - } - IR::Value address = [&] -> IR::Value { if (is_ring) { return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset); diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index c9d97679f..20e6eae0b 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -288,6 +288,12 @@ void IREmitter::SetTcsGenericAttribute(const F32& value, const U32& attr_index, Inst(Opcode::SetTcsGenericAttribute, value, attr_index, comp_index); } +F32 IREmitter::ReadTcsGenericOuputAttribute(const U32& vertex_index, const U32& attr_index, + const U32& comp_index) { + return Inst(IR::Opcode::ReadTcsGenericOuputAttribute, vertex_index, attr_index, + comp_index); +} + F32 IREmitter::GetPatch(Patch patch) { return Inst(Opcode::GetPatch, patch); } diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 4679a0133..f65baee2a 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -90,6 +90,9 @@ public: const U32& comp_index); void SetTcsGenericAttribute(const F32& value, const U32& attr_index, const U32& comp_index); + [[nodiscard]] F32 ReadTcsGenericOuputAttribute(const U32& vertex_index, const U32& attr_index, + const U32& comp_index); + [[nodiscard]] F32 GetPatch(Patch patch); void SetPatch(Patch patch, const F32& value); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index cf2c3b67e..1194c3792 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -64,6 +64,8 @@ OPCODE(GetPatch, F32, Patc OPCODE(SetPatch, Void, Patch, F32, ) OPCODE(GetTessGenericAttribute, F32, U32, U32, U32, ) OPCODE(SetTcsGenericAttribute, Void, F32, U32, U32, ) +OPCODE(ReadTcsGenericOuputAttribute, F32, U32, U32, U32, ) + // Flags OPCODE(GetScc, U1, Void, ) diff --git a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp index 895c9823e..6164fec12 100644 --- a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp +++ b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp @@ -343,8 +343,8 @@ static IR::U32 TryOptimizeAddressModulo(IR::U32 addr, u32 stride, IR::IREmitter& // TODO: can optimize div in control point index similarly to mod // Read a TCS input (InputCP region) or TES input (OutputCP region) -static IR::F32 ReadTessInputComponent(IR::U32 addr, const u32 stride, IR::IREmitter& ir, - u32 off_dw) { +static IR::F32 ReadTessControlPointAttribute(IR::U32 addr, const u32 stride, IR::IREmitter& ir, + u32 off_dw, bool is_output_read_in_tcs) { if (off_dw > 0) { addr = ir.IAdd(addr, ir.Imm32(off_dw)); } @@ -354,7 +354,11 @@ static IR::F32 ReadTessInputComponent(IR::U32 addr, const u32 stride, IR::IREmit ir.ShiftRightLogical(ir.IMod(addr_for_attrs, ir.Imm32(stride)), ir.Imm32(4u)); const IR::U32 comp_index = ir.ShiftRightLogical(ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u)); - return ir.GetTessGenericAttribute(control_point_index, attr_index, comp_index); + if (is_output_read_in_tcs) { + return ir.ReadTcsGenericOuputAttribute(control_point_index, attr_index, comp_index); + } else { + return ir.GetTessGenericAttribute(control_point_index, attr_index, comp_index); + } } } // namespace @@ -481,21 +485,25 @@ void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) { case IR::Opcode::LoadSharedU128: IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)}; const IR::U32 addr{inst.Arg(0)}; - AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info); + const AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info); const u32 num_dwords = opcode == IR::Opcode::LoadSharedU32 ? 1 : (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4); - ASSERT_MSG(region == AttributeRegion::InputCP, - "Unhandled read of output or patchconst attribute in hull shader"); + ASSERT_MSG(region == AttributeRegion::InputCP || + region == AttributeRegion::OutputCP, + "Unhandled read of patchconst attribute in hull shader"); + const bool is_tcs_output_read = region == AttributeRegion::OutputCP; + const u32 stride = is_tcs_output_read ? runtime_info.hs_info.hs_output_cp_stride + : runtime_info.hs_info.ls_stride; IR::Value attr_read; if (num_dwords == 1) { attr_read = ir.BitCast( - ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, 0)); + ReadTessControlPointAttribute(addr, stride, ir, 0, is_tcs_output_read)); } else { boost::container::static_vector read_components; for (auto i = 0; i < num_dwords; i++) { const IR::F32 component = - ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, i); + ReadTessControlPointAttribute(addr, stride, ir, i, is_tcs_output_read); read_components.push_back(ir.BitCast(component)); } attr_read = ir.CompositeConstruct(read_components); @@ -565,8 +573,8 @@ void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) { : (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4); const auto GetInput = [&](IR::U32 addr, u32 off_dw) -> IR::F32 { if (region == AttributeRegion::OutputCP) { - return ReadTessInputComponent( - addr, runtime_info.vs_info.hs_output_cp_stride, ir, off_dw); + return ReadTessControlPointAttribute( + addr, runtime_info.vs_info.hs_output_cp_stride, ir, off_dw, false); } else { ASSERT(region == AttributeRegion::PatchConst); return ir.GetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw)); From 248220fef333557f6a59450072da9555f68e9109 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sun, 29 Dec 2024 02:37:37 -0800 Subject: [PATCH 24/45] pthread: Change minimum stack for HLE to additional stack. (#1960) --- src/core/libraries/kernel/threads/pthread.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/core/libraries/kernel/threads/pthread.cpp b/src/core/libraries/kernel/threads/pthread.cpp index 761d13346..e81207a0d 100644 --- a/src/core/libraries/kernel/threads/pthread.cpp +++ b/src/core/libraries/kernel/threads/pthread.cpp @@ -244,10 +244,9 @@ int PS4_SYSV_ABI posix_pthread_create_name_np(PthreadT* thread, const PthreadAtt new_thread->tid = ++TidCounter; if (new_thread->attr.stackaddr_attr == 0) { - /* Enforce minimum stack size of 128 KB */ - static constexpr size_t MinimumStack = 128_KB; - auto& stacksize = new_thread->attr.stacksize_attr; - stacksize = std::max(stacksize, MinimumStack); + /* Add additional stack space for HLE */ + static constexpr size_t AdditionalStack = 128_KB; + new_thread->attr.stacksize_attr += AdditionalStack; } if (thread_state->CreateStack(&new_thread->attr) != 0) { From 4b2db61120b893840baeedf1e03500e4d32b1699 Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Sun, 29 Dec 2024 12:45:18 +0200 Subject: [PATCH 25/45] Update issue templates --- .github/ISSUE_TEMPLATE/bug_report.md | 30 ++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 000000000..02017dddf --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,30 @@ +--- +name: Bug report +about: Report a bug in the emulator +title: '' +labels: '' +assignees: '' + +--- + +Checklist: +[ ] I have searched for a similar issue in this repository and did not find one. +[ ] I have asked for support on shadPS4 discord server. +[ ] I am using an official build obtained from [releases](https://github.com/shadps4-emu/shadPS4/releases) or updated it using its in-app updater. +[ ] I have re-dumped the game and performed a clean install without mods. +[ ] I have disabled all patches and cheats. + +Description: + + +Steps To Reproduce: + + +Logs: + + +System Information: + + +Additional Information: + From f8177902a5e4a2f99bd166b9d9f85f468dfce7d5 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sun, 29 Dec 2024 02:46:59 -0800 Subject: [PATCH 26/45] cubeb_audio: Make sure COM is initialized on Windows. (#1958) --- src/core/libraries/audio/audioout_backend.h | 3 +++ src/core/libraries/audio/cubeb_audio.cpp | 23 +++++++++++++++------ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/core/libraries/audio/audioout_backend.h b/src/core/libraries/audio/audioout_backend.h index ecc4cf7c6..f423d4963 100644 --- a/src/core/libraries/audio/audioout_backend.h +++ b/src/core/libraries/audio/audioout_backend.h @@ -34,6 +34,9 @@ public: private: cubeb* ctx = nullptr; +#ifdef _WIN32 + bool owns_com = false; +#endif }; class SDLAudioOut final : public AudioOutBackend { diff --git a/src/core/libraries/audio/cubeb_audio.cpp b/src/core/libraries/audio/cubeb_audio.cpp index e1195558a..4127931b7 100644 --- a/src/core/libraries/audio/cubeb_audio.cpp +++ b/src/core/libraries/audio/cubeb_audio.cpp @@ -10,9 +10,11 @@ #include "core/libraries/audio/audioout.h" #include "core/libraries/audio/audioout_backend.h" -namespace Libraries::AudioOut { +#ifdef _WIN32 +#include +#endif -constexpr int AUDIO_STREAM_BUFFER_THRESHOLD = 65536; // Define constant for buffer threshold +namespace Libraries::AudioOut { class CubebPortBackend : public PortBackend { public: @@ -143,17 +145,26 @@ private: }; CubebAudioOut::CubebAudioOut() { +#ifdef _WIN32 + // Need to initialize COM for this thread on Windows, in case WASAPI backend is used. + owns_com = CoInitializeEx(nullptr, COINIT_MULTITHREADED) == S_OK; +#endif if (const auto ret = cubeb_init(&ctx, "shadPS4", nullptr); ret != CUBEB_OK) { LOG_CRITICAL(Lib_AudioOut, "Failed to create cubeb context: {}", ret); } } CubebAudioOut::~CubebAudioOut() { - if (!ctx) { - return; + if (ctx) { + cubeb_destroy(ctx); + ctx = nullptr; } - cubeb_destroy(ctx); - ctx = nullptr; +#ifdef _WIN32 + if (owns_com) { + CoUninitialize(); + owns_com = false; + } +#endif } std::unique_ptr CubebAudioOut::Open(PortOut& port) { From e952013fe06b6d040d3be091e3c7e50e3456770f Mon Sep 17 00:00:00 2001 From: Mahmoud Adel <94652220+AboMedoz@users.noreply.github.com> Date: Sun, 29 Dec 2024 12:47:15 +0200 Subject: [PATCH 27/45] add EventWrite and DispatchIndirect to ProcessCompute (#1948) * add EventWrite and DispatchIndirect to ProcessCompute helps Alienation go Ingame * apply review changes Co-authored-by: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> --------- Co-authored-by: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> --- src/video_core/amdgpu/liverpool.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 5dd3edd6d..43adff8d2 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -821,6 +821,24 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, u32 vqid) { } break; } + case PM4ItOpcode::DispatchIndirect: { + const auto* dispatch_indirect = reinterpret_cast(header); + auto& cs_program = GetCsRegs(); + const auto offset = dispatch_indirect->data_offset; + const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr; + const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions); + if (DebugState.DumpingCurrentReg()) { + DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast(header), + cs_program); + } + if (rasterizer && (cs_program.dispatch_initiator & 1)) { + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin(fmt::format("acb:{}:DispatchIndirect", cmd_address)); + rasterizer->DispatchIndirect(ib_address, offset, size); + rasterizer->ScopeMarkerEnd(); + } + break; + } case PM4ItOpcode::WriteData: { const auto* write_data = reinterpret_cast(header); ASSERT(write_data->dst_sel.Value() == 2 || write_data->dst_sel.Value() == 5); @@ -845,6 +863,10 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, u32 vqid) { release_mem->SignalFence(static_cast(queue.pipe_id)); break; } + case PM4ItOpcode::EventWrite: { + // const auto* event = reinterpret_cast(header); + break; + } default: UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}", static_cast(opcode), count); From f09a95453ee875e0a6492343f7a27ba91525ab23 Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Sun, 29 Dec 2024 12:48:45 +0200 Subject: [PATCH 28/45] hot-fix: Correct queue id in dispatch indirect I missed this --- src/video_core/amdgpu/liverpool.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 43adff8d2..2926bcc69 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -825,7 +825,7 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, u32 vqid) { const auto* dispatch_indirect = reinterpret_cast(header); auto& cs_program = GetCsRegs(); const auto offset = dispatch_indirect->data_offset; - const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr; + const auto ib_address = mapped_queues[vqid].indirect_args_addr; const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions); if (DebugState.DumpingCurrentReg()) { DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast(header), @@ -833,7 +833,7 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, u32 vqid) { } if (rasterizer && (cs_program.dispatch_initiator & 1)) { const auto cmd_address = reinterpret_cast(header); - rasterizer->ScopeMarkerBegin(fmt::format("acb:{}:DispatchIndirect", cmd_address)); + rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:Dispatch", vqid, cmd_address)); rasterizer->DispatchIndirect(ib_address, offset, size); rasterizer->ScopeMarkerEnd(); } From ee72d99947382dac8f4fbd56f1915a3eb6b65cde Mon Sep 17 00:00:00 2001 From: Vladislav Mikhalin Date: Sun, 29 Dec 2024 13:53:06 +0300 Subject: [PATCH 29/45] ajm: added stubbed statistics instance (#1924) * ajm: added stubbed statistics instance * fixed a typo, thanks poly * fixed clang-format * removed unused struct * small fixes * fixed typedefs, added per codec statistics --- CMakeLists.txt | 2 + src/core/libraries/ajm/ajm.cpp | 10 +- src/core/libraries/ajm/ajm.h | 49 ++++++- src/core/libraries/ajm/ajm_batch.cpp | 129 ++++++++++++++---- src/core/libraries/ajm/ajm_batch.h | 4 + src/core/libraries/ajm/ajm_context.cpp | 21 +-- src/core/libraries/ajm/ajm_instance.cpp | 4 +- .../libraries/ajm/ajm_instance_statistics.cpp | 37 +++++ .../libraries/ajm/ajm_instance_statistics.h | 17 +++ 9 files changed, 231 insertions(+), 42 deletions(-) create mode 100644 src/core/libraries/ajm/ajm_instance_statistics.cpp create mode 100644 src/core/libraries/ajm/ajm_instance_statistics.h diff --git a/CMakeLists.txt b/CMakeLists.txt index d63bd1951..af811e9fb 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -189,6 +189,8 @@ set(AJM_LIB src/core/libraries/ajm/ajm.cpp src/core/libraries/ajm/ajm_context.cpp src/core/libraries/ajm/ajm_context.h src/core/libraries/ajm/ajm_error.h + src/core/libraries/ajm/ajm_instance_statistics.cpp + src/core/libraries/ajm/ajm_instance_statistics.h src/core/libraries/ajm/ajm_instance.cpp src/core/libraries/ajm/ajm_instance.h src/core/libraries/ajm/ajm_mp3.cpp diff --git a/src/core/libraries/ajm/ajm.cpp b/src/core/libraries/ajm/ajm.cpp index 3184fa64f..5c55d2c06 100644 --- a/src/core/libraries/ajm/ajm.cpp +++ b/src/core/libraries/ajm/ajm.cpp @@ -183,13 +183,15 @@ int PS4_SYSV_ABI sceAjmInstanceSwitch() { return ORBIS_OK; } -int PS4_SYSV_ABI sceAjmMemoryRegister() { - LOG_ERROR(Lib_Ajm, "(STUBBED) called"); +int PS4_SYSV_ABI sceAjmMemoryRegister(u32 context_id, void* ptr, size_t num_pages) { + // All memory is already shared with our implementation since we do not use any hardware. + LOG_TRACE(Lib_Ajm, "(STUBBED) called"); return ORBIS_OK; } -int PS4_SYSV_ABI sceAjmMemoryUnregister() { - LOG_ERROR(Lib_Ajm, "(STUBBED) called"); +int PS4_SYSV_ABI sceAjmMemoryUnregister(u32 context_id, void* ptr) { + // All memory is already shared with our implementation since we do not use any hardware. + LOG_TRACE(Lib_Ajm, "(STUBBED) called"); return ORBIS_OK; } diff --git a/src/core/libraries/ajm/ajm.h b/src/core/libraries/ajm/ajm.h index 1ac7c7629..34aeb9aa4 100644 --- a/src/core/libraries/ajm/ajm.h +++ b/src/core/libraries/ajm/ajm.h @@ -74,6 +74,26 @@ union AjmJobFlags { }; }; +enum class AjmStatisticsFlags : u64 { + Memory = 1 << 0, + EnginePerCodec = 1 << 15, + Engine = 1 << 16, +}; +DECLARE_ENUM_FLAG_OPERATORS(AjmStatisticsFlags) + +union AjmStatisticsJobFlags { + AjmStatisticsJobFlags(AjmJobFlags job_flags) : raw(job_flags.raw) {} + + u64 raw; + struct { + u64 version : 3; + u64 : 12; + AjmStatisticsFlags statistics_flags : 17; + u64 : 32; + }; +}; +static_assert(sizeof(AjmStatisticsJobFlags) == 8); + struct AjmSidebandResult { s32 result; s32 internal_result; @@ -126,6 +146,31 @@ union AjmSidebandInitParameters { u8 reserved[8]; }; +struct AjmSidebandStatisticsEngine { + float usage_batch; + float usage_interval[3]; +}; + +struct AjmSidebandStatisticsEnginePerCodec { + u8 codec_count; + u8 codec_id[3]; + float codec_percentage[3]; +}; + +struct AjmSidebandStatisticsMemory { + u32 instance_free; + u32 buffer_free; + u32 batch_size; + u32 input_size; + u32 output_size; + u32 small_size; +}; + +struct AjmSidebandStatisticsEngineParameters { + u32 interval_count; + float interval[3]; +}; + union AjmInstanceFlags { u64 raw; struct { @@ -178,8 +223,8 @@ int PS4_SYSV_ABI sceAjmInstanceCreate(u32 context, AjmCodecType codec_type, AjmI int PS4_SYSV_ABI sceAjmInstanceDestroy(u32 context, u32 instance); int PS4_SYSV_ABI sceAjmInstanceExtend(); int PS4_SYSV_ABI sceAjmInstanceSwitch(); -int PS4_SYSV_ABI sceAjmMemoryRegister(); -int PS4_SYSV_ABI sceAjmMemoryUnregister(); +int PS4_SYSV_ABI sceAjmMemoryRegister(u32 context_id, void* ptr, size_t num_pages); +int PS4_SYSV_ABI sceAjmMemoryUnregister(u32 context_id, void* ptr); int PS4_SYSV_ABI sceAjmModuleRegister(u32 context, AjmCodecType codec_type, s64 reserved); int PS4_SYSV_ABI sceAjmModuleUnregister(); int PS4_SYSV_ABI sceAjmStrError(); diff --git a/src/core/libraries/ajm/ajm_batch.cpp b/src/core/libraries/ajm/ajm_batch.cpp index b1cec88b3..30e1deb71 100644 --- a/src/core/libraries/ajm/ajm_batch.cpp +++ b/src/core/libraries/ajm/ajm_batch.cpp @@ -54,6 +54,8 @@ public: : m_p_begin(begin), m_p_current(m_p_begin), m_size(size) {} AjmBatchBuffer(std::span data) : m_p_begin(data.data()), m_p_current(m_p_begin), m_size(data.size()) {} + AjmBatchBuffer(AjmChunkBuffer& buffer) + : AjmBatchBuffer(reinterpret_cast(buffer.p_address), buffer.size) {} AjmBatchBuffer SubBuffer(size_t size = s_dynamic_extent) { auto current = m_p_current; @@ -113,6 +115,88 @@ private: size_t m_size{}; }; +AjmJob AjmStatisticsJobFromBatchBuffer(u32 instance_id, AjmBatchBuffer batch_buffer) { + std::optional job_flags = {}; + std::optional input_control_buffer = {}; + std::optional output_control_buffer = {}; + + AjmJob job; + job.instance_id = instance_id; + + while (!batch_buffer.IsEmpty()) { + auto& header = batch_buffer.Peek(); + switch (header.ident) { + case Identifier::AjmIdentInputControlBuf: { + ASSERT_MSG(!input_control_buffer.has_value(), + "Only one instance of input control buffer is allowed per job"); + const auto& buffer = batch_buffer.Consume(); + if (buffer.p_address != nullptr && buffer.size != 0) { + input_control_buffer = buffer; + } + break; + } + case Identifier::AjmIdentControlFlags: { + ASSERT_MSG(!job_flags.has_value(), "Only one instance of job flags is allowed per job"); + auto& chunk = batch_buffer.Consume(); + job_flags = AjmJobFlags{ + .raw = (u64(chunk.header.payload) << 32) + chunk.flags_low, + }; + break; + } + case Identifier::AjmIdentReturnAddressBuf: { + // Ignore return address buffers. + batch_buffer.Skip(); + break; + } + case Identifier::AjmIdentOutputControlBuf: { + ASSERT_MSG(!output_control_buffer.has_value(), + "Only one instance of output control buffer is allowed per job"); + const auto& buffer = batch_buffer.Consume(); + if (buffer.p_address != nullptr && buffer.size != 0) { + output_control_buffer = buffer; + } + break; + } + default: + UNREACHABLE_MSG("Unknown chunk: {}", header.ident); + } + } + + ASSERT(job_flags.has_value()); + job.flags = job_flags.value(); + + AjmStatisticsJobFlags flags(job.flags); + if (input_control_buffer.has_value()) { + AjmBatchBuffer input_batch(input_control_buffer.value()); + if (True(flags.statistics_flags & AjmStatisticsFlags::Engine)) { + job.input.statistics_engine_parameters = + input_batch.Consume(); + } + } + + if (output_control_buffer.has_value()) { + AjmBatchBuffer output_batch(output_control_buffer.value()); + job.output.p_result = &output_batch.Consume(); + *job.output.p_result = AjmSidebandResult{}; + + if (True(flags.statistics_flags & AjmStatisticsFlags::Engine)) { + job.output.p_engine = &output_batch.Consume(); + *job.output.p_engine = AjmSidebandStatisticsEngine{}; + } + if (True(flags.statistics_flags & AjmStatisticsFlags::EnginePerCodec)) { + job.output.p_engine_per_codec = + &output_batch.Consume(); + *job.output.p_engine_per_codec = AjmSidebandStatisticsEnginePerCodec{}; + } + if (True(flags.statistics_flags & AjmStatisticsFlags::Memory)) { + job.output.p_memory = &output_batch.Consume(); + *job.output.p_memory = AjmSidebandStatisticsMemory{}; + } + } + + return job; +} + AjmJob AjmJobFromBatchBuffer(u32 instance_id, AjmBatchBuffer batch_buffer) { std::optional job_flags = {}; std::optional input_control_buffer = {}; @@ -155,15 +239,6 @@ AjmJob AjmJobFromBatchBuffer(u32 instance_id, AjmBatchBuffer batch_buffer) { batch_buffer.Skip(); break; } - case Identifier::AjmIdentInlineBuf: { - ASSERT_MSG(!output_control_buffer.has_value(), - "Only one instance of inline buffer is allowed per job"); - const auto& buffer = batch_buffer.Consume(); - if (buffer.p_address != nullptr && buffer.size != 0) { - inline_buffer = buffer; - } - break; - } case Identifier::AjmIdentOutputRunBuf: { auto& buffer = batch_buffer.Consume(); u8* p_begin = reinterpret_cast(buffer.p_address); @@ -186,13 +261,12 @@ AjmJob AjmJobFromBatchBuffer(u32 instance_id, AjmBatchBuffer batch_buffer) { } } + ASSERT(job_flags.has_value()); job.flags = job_flags.value(); // Initialize sideband input parameters if (input_control_buffer.has_value()) { - AjmBatchBuffer input_batch(reinterpret_cast(input_control_buffer->p_address), - input_control_buffer->size); - + AjmBatchBuffer input_batch(input_control_buffer.value()); const auto sideband_flags = job_flags->sideband_flags; if (True(sideband_flags & AjmJobSidebandFlags::Format) && !input_batch.IsEmpty()) { job.input.format = input_batch.Consume(); @@ -202,6 +276,9 @@ AjmJob AjmJobFromBatchBuffer(u32 instance_id, AjmBatchBuffer batch_buffer) { } const auto control_flags = job_flags.value().control_flags; + if (True(control_flags & AjmJobControlFlags::Resample)) { + job.input.resample_parameters = input_batch.Consume(); + } if (True(control_flags & AjmJobControlFlags::Initialize)) { job.input.init_params = AjmDecAt9InitializeParameters{}; std::memcpy(&job.input.init_params.value(), input_batch.GetCurrent(), @@ -209,21 +286,9 @@ AjmJob AjmJobFromBatchBuffer(u32 instance_id, AjmBatchBuffer batch_buffer) { } } - if (inline_buffer.has_value()) { - AjmBatchBuffer inline_batch(reinterpret_cast(inline_buffer->p_address), - inline_buffer->size); - - const auto control_flags = job_flags.value().control_flags; - if (True(control_flags & AjmJobControlFlags::Resample)) { - job.input.resample_parameters = inline_batch.Consume(); - } - } - // Initialize sideband output parameters if (output_control_buffer.has_value()) { - AjmBatchBuffer output_batch(reinterpret_cast(output_control_buffer->p_address), - output_control_buffer->size); - + AjmBatchBuffer output_batch(output_control_buffer.value()); job.output.p_result = &output_batch.Consume(); *job.output.p_result = AjmSidebandResult{}; @@ -260,9 +325,21 @@ std::shared_ptr AjmBatch::FromBatchBuffer(std::span data) { AjmBatchBuffer buffer(data); while (!buffer.IsEmpty()) { auto& job_chunk = buffer.Consume(); + if (job_chunk.header.ident == AjmIdentInlineBuf) { + // Inline buffers are used to store sideband input data. + // We should just skip them as they do not require any special handling. + buffer.Advance(job_chunk.size); + continue; + } ASSERT(job_chunk.header.ident == AjmIdentJob); auto instance_id = job_chunk.header.payload; - batch->jobs.push_back(AjmJobFromBatchBuffer(instance_id, buffer.SubBuffer(job_chunk.size))); + if (instance_id == AJM_INSTANCE_STATISTICS) { + batch->jobs.push_back( + AjmStatisticsJobFromBatchBuffer(instance_id, buffer.SubBuffer(job_chunk.size))); + } else { + batch->jobs.push_back( + AjmJobFromBatchBuffer(instance_id, buffer.SubBuffer(job_chunk.size))); + } } return batch; diff --git a/src/core/libraries/ajm/ajm_batch.h b/src/core/libraries/ajm/ajm_batch.h index 3c586b773..09daa630d 100644 --- a/src/core/libraries/ajm/ajm_batch.h +++ b/src/core/libraries/ajm/ajm_batch.h @@ -23,6 +23,7 @@ struct AjmJob { struct Input { std::optional init_params; std::optional resample_parameters; + std::optional statistics_engine_parameters; std::optional format; std::optional gapless_decode; std::vector buffer; @@ -33,6 +34,9 @@ struct AjmJob { AjmSidebandResult* p_result = nullptr; AjmSidebandStream* p_stream = nullptr; AjmSidebandFormat* p_format = nullptr; + AjmSidebandStatisticsMemory* p_memory = nullptr; + AjmSidebandStatisticsEnginePerCodec* p_engine_per_codec = nullptr; + AjmSidebandStatisticsEngine* p_engine = nullptr; AjmSidebandGaplessDecode* p_gapless_decode = nullptr; AjmSidebandMFrame* p_mframe = nullptr; u8* p_codec_info = nullptr; diff --git a/src/core/libraries/ajm/ajm_context.cpp b/src/core/libraries/ajm/ajm_context.cpp index 09255110c..8992dd83b 100644 --- a/src/core/libraries/ajm/ajm_context.cpp +++ b/src/core/libraries/ajm/ajm_context.cpp @@ -9,6 +9,7 @@ #include "core/libraries/ajm/ajm_context.h" #include "core/libraries/ajm/ajm_error.h" #include "core/libraries/ajm/ajm_instance.h" +#include "core/libraries/ajm/ajm_instance_statistics.h" #include "core/libraries/ajm/ajm_mp3.h" #include "core/libraries/error_codes.h" @@ -70,15 +71,19 @@ void AjmContext::ProcessBatch(u32 id, std::span jobs) { LOG_TRACE(Lib_Ajm, "Processing job {} for instance {}. flags = {:#x}", id, job.instance_id, job.flags.raw); - std::shared_ptr instance; - { - std::shared_lock lock(instances_mutex); - auto* p_instance = instances.Get(job.instance_id); - ASSERT_MSG(p_instance != nullptr, "Attempting to execute job on null instance"); - instance = *p_instance; - } + if (job.instance_id == AJM_INSTANCE_STATISTICS) { + AjmInstanceStatistics::Getinstance().ExecuteJob(job); + } else { + std::shared_ptr instance; + { + std::shared_lock lock(instances_mutex); + auto* p_instance = instances.Get(job.instance_id); + ASSERT_MSG(p_instance != nullptr, "Attempting to execute job on null instance"); + instance = *p_instance; + } - instance->ExecuteJob(job); + instance->ExecuteJob(job); + } } } diff --git a/src/core/libraries/ajm/ajm_instance.cpp b/src/core/libraries/ajm/ajm_instance.cpp index ea7fd5617..8af105c77 100644 --- a/src/core/libraries/ajm/ajm_instance.cpp +++ b/src/core/libraries/ajm/ajm_instance.cpp @@ -68,11 +68,11 @@ void AjmInstance::ExecuteJob(AjmJob& job) { m_codec->Initialize(¶ms, sizeof(params)); } if (job.input.resample_parameters.has_value()) { - UNREACHABLE_MSG("Unimplemented: resample parameters"); + LOG_ERROR(Lib_Ajm, "Unimplemented: resample parameters"); m_resample_parameters = job.input.resample_parameters.value(); } if (job.input.format.has_value()) { - UNREACHABLE_MSG("Unimplemented: format parameters"); + LOG_ERROR(Lib_Ajm, "Unimplemented: format parameters"); m_format = job.input.format.value(); } if (job.input.gapless_decode.has_value()) { diff --git a/src/core/libraries/ajm/ajm_instance_statistics.cpp b/src/core/libraries/ajm/ajm_instance_statistics.cpp new file mode 100644 index 000000000..c0c1af8bb --- /dev/null +++ b/src/core/libraries/ajm/ajm_instance_statistics.cpp @@ -0,0 +1,37 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "core/libraries/ajm/ajm.h" +#include "core/libraries/ajm/ajm_instance_statistics.h" + +namespace Libraries::Ajm { + +void AjmInstanceStatistics::ExecuteJob(AjmJob& job) { + if (job.output.p_engine) { + job.output.p_engine->usage_batch = 0.01; + const auto ic = job.input.statistics_engine_parameters->interval_count; + for (u32 idx = 0; idx < ic; ++idx) { + job.output.p_engine->usage_interval[idx] = 0.01; + } + } + if (job.output.p_engine_per_codec) { + job.output.p_engine_per_codec->codec_count = 1; + job.output.p_engine_per_codec->codec_id[0] = static_cast(AjmCodecType::At9Dec); + job.output.p_engine_per_codec->codec_percentage[0] = 0.01; + } + if (job.output.p_memory) { + job.output.p_memory->instance_free = 0x400000; + job.output.p_memory->buffer_free = 0x400000; + job.output.p_memory->batch_size = 0x4200; + job.output.p_memory->input_size = 0x2000; + job.output.p_memory->output_size = 0x2000; + job.output.p_memory->small_size = 0x200; + } +} + +AjmInstanceStatistics& AjmInstanceStatistics::Getinstance() { + static AjmInstanceStatistics instance; + return instance; +} + +} // namespace Libraries::Ajm diff --git a/src/core/libraries/ajm/ajm_instance_statistics.h b/src/core/libraries/ajm/ajm_instance_statistics.h new file mode 100644 index 000000000..ea70c9d56 --- /dev/null +++ b/src/core/libraries/ajm/ajm_instance_statistics.h @@ -0,0 +1,17 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "core/libraries/ajm/ajm_batch.h" + +namespace Libraries::Ajm { + +class AjmInstanceStatistics { +public: + void ExecuteJob(AjmJob& job); + + static AjmInstanceStatistics& Getinstance(); +}; + +} // namespace Libraries::Ajm From 1bc27135e33b057702d0fe673f48b9eb29cc8e36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quang=20Ng=C3=B4?= Date: Sun, 29 Dec 2024 18:22:35 +0700 Subject: [PATCH 30/45] renderer_vulkan: fix deadlock when resizing the SDL window (#1860) * renderer_vulkan: Fix deadlock when resizing the SDL window * Address review comment --- .../renderer_vulkan/vk_presenter.cpp | 27 +++++++++++++------ .../renderer_vulkan/vk_swapchain.cpp | 1 + 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_presenter.cpp b/src/video_core/renderer_vulkan/vk_presenter.cpp index bc55cde23..93129842f 100644 --- a/src/video_core/renderer_vulkan/vk_presenter.cpp +++ b/src/video_core/renderer_vulkan/vk_presenter.cpp @@ -628,6 +628,13 @@ Frame* Presenter::PrepareFrameInternal(VideoCore::ImageId image_id, bool is_eop) } void Presenter::Present(Frame* frame) { + // Free the frame for reuse + const auto free_frame = [&] { + std::scoped_lock fl{free_mutex}; + free_queue.push(frame); + free_cv.notify_one(); + }; + // Recreate the swapchain if the window was resized. if (window.GetWidth() != swapchain.GetExtent().width || window.GetHeight() != swapchain.GetExtent().height) { @@ -636,8 +643,19 @@ void Presenter::Present(Frame* frame) { if (!swapchain.AcquireNextImage()) { swapchain.Recreate(window.GetWidth(), window.GetHeight()); + if (!swapchain.AcquireNextImage()) { + // User resizes the window too fast and GPU can't keep up. Skip this frame. + LOG_WARNING(Render_Vulkan, "Skipping frame!"); + free_frame(); + return; + } } + // Reset fence for queue submission. Do it here instead of GetRenderFrame() because we may + // skip frame because of slow swapchain recreation. If a frame skip occurs, we skip signal + // the frame's present fence and future GetRenderFrame() call will hang waiting for this frame. + instance.GetDevice().resetFences(frame->present_done); + ImGui::Core::NewFrame(); const vk::Image swapchain_image = swapchain.Image(); @@ -737,11 +755,7 @@ void Presenter::Present(Frame* frame) { swapchain.Recreate(window.GetWidth(), window.GetHeight()); } - // Free the frame for reuse - std::scoped_lock fl{free_mutex}; - free_queue.push(frame); - free_cv.notify_one(); - + free_frame(); DebugState.IncFlipFrameNum(); } @@ -776,9 +790,6 @@ Frame* Presenter::GetRenderFrame() { } } - // Reset fence for next queue submission. - device.resetFences(frame->present_done); - // If the window dimensions changed, recreate this frame if (frame->width != window.GetWidth() || frame->height != window.GetHeight()) { RecreateFrame(frame, window.GetWidth(), window.GetHeight()); diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 380660a2f..44f4be6dd 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -84,6 +84,7 @@ void Swapchain::Create(u32 width_, u32 height_, vk::SurfaceKHR surface_) { } void Swapchain::Recreate(u32 width_, u32 height_) { + LOG_DEBUG(Render_Vulkan, "Recreate the swapchain: width={} height={}", width_, height_); Create(width_, height_, surface); } From 38f1cc265295e30c429a1d604d38599eccce5303 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sun, 29 Dec 2024 03:30:37 -0800 Subject: [PATCH 31/45] renderer_vulkan: Render polygons using triangle fans. (#1969) --- src/video_core/buffer_cache/buffer_cache.cpp | 30 ++----------------- src/video_core/buffer_cache/buffer_cache.h | 2 +- .../renderer_vulkan/liverpool_to_vk.cpp | 4 +-- .../renderer_vulkan/liverpool_to_vk.h | 9 ------ .../renderer_vulkan/vk_rasterizer.cpp | 26 +++++++--------- 5 files changed, 15 insertions(+), 56 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 59c1e0bc3..0088ea4fa 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -234,46 +234,22 @@ bool BufferCache::BindVertexBuffers( return has_step_rate; } -u32 BufferCache::BindIndexBuffer(bool& is_indexed, u32 index_offset) { - // Emulate QuadList and Polygon primitive types with CPU made index buffer. +void BufferCache::BindIndexBuffer(u32 index_offset) { const auto& regs = liverpool->regs; - if (!is_indexed) { - if (regs.primitive_type != AmdGpu::PrimitiveType::Polygon) { - return regs.num_indices; - } - - // Emit indices. - const u32 index_size = 3 * regs.num_indices; - const auto [data, offset] = stream_buffer.Map(index_size); - Vulkan::LiverpoolToVK::EmitPolygonToTriangleListIndices(data, regs.num_indices); - stream_buffer.Commit(); - - // Bind index buffer. - is_indexed = true; - - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.bindIndexBuffer(stream_buffer.Handle(), offset, vk::IndexType::eUint16); - return index_size / sizeof(u16); - } // Figure out index type and size. const bool is_index16 = regs.index_buffer_type.index_type == AmdGpu::Liverpool::IndexType::Index16; const vk::IndexType index_type = is_index16 ? vk::IndexType::eUint16 : vk::IndexType::eUint32; const u32 index_size = is_index16 ? sizeof(u16) : sizeof(u32); - VAddr index_address = regs.index_base_address.Address(); - index_address += index_offset * index_size; - - if (regs.primitive_type == AmdGpu::PrimitiveType::Polygon) { - UNREACHABLE(); - } + const VAddr index_address = + regs.index_base_address.Address() + index_offset * index_size; // Bind index buffer. const u32 index_buffer_size = regs.num_indices * index_size; const auto [vk_buffer, offset] = ObtainBuffer(index_address, index_buffer_size, false); const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.bindIndexBuffer(vk_buffer->Handle(), offset, index_type); - return regs.num_indices; } void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) { diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index bcbaa45dc..0c70fa10b 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -83,7 +83,7 @@ public: const std::optional& fetch_shader); /// Bind host index buffer for the current draw. - u32 BindIndexBuffer(bool& is_indexed, u32 index_offset); + void BindIndexBuffer(u32 index_offset); /// Writes a value to GPU buffer. void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 25ff88b9d..6bd50ab06 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -103,6 +103,7 @@ vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type) { case AmdGpu::PrimitiveType::TriangleList: return vk::PrimitiveTopology::eTriangleList; case AmdGpu::PrimitiveType::TriangleFan: + case AmdGpu::PrimitiveType::Polygon: return vk::PrimitiveTopology::eTriangleFan; case AmdGpu::PrimitiveType::TriangleStrip: return vk::PrimitiveTopology::eTriangleStrip; @@ -116,9 +117,6 @@ vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type) { return vk::PrimitiveTopology::eTriangleStripWithAdjacency; case AmdGpu::PrimitiveType::PatchPrimitive: return vk::PrimitiveTopology::ePatchList; - case AmdGpu::PrimitiveType::Polygon: - // Needs to generate index buffer on the fly. - return vk::PrimitiveTopology::eTriangleList; case AmdGpu::PrimitiveType::QuadList: case AmdGpu::PrimitiveType::RectList: return vk::PrimitiveTopology::ePatchList; diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.h b/src/video_core/renderer_vulkan/liverpool_to_vk.h index d5f8e693b..25a27e20e 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.h +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h @@ -70,15 +70,6 @@ vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color vk::SampleCountFlagBits NumSamples(u32 num_samples, vk::SampleCountFlags supported_flags); -inline void EmitPolygonToTriangleListIndices(u8* out_ptr, u32 num_vertices) { - u16* out_data = reinterpret_cast(out_ptr); - for (u16 i = 1; i < num_vertices - 1; i++) { - *out_data++ = 0; - *out_data++ = i; - *out_data++ = i + 1; - } -} - static inline vk::Format PromoteFormatToDepth(vk::Format fmt) { if (fmt == vk::Format::eR32Sfloat) { return vk::Format::eD32Sfloat; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index d458fa124..4384cdbea 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -12,7 +12,6 @@ #include "video_core/renderer_vulkan/vk_shader_hle.h" #include "video_core/texture_cache/image_view.h" #include "video_core/texture_cache/texture_cache.h" -#include "vk_rasterizer.h" #ifdef MemoryBarrier #undef MemoryBarrier @@ -252,7 +251,9 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex); const auto& fetch_shader = pipeline->GetFetchShader(); buffer_cache.BindVertexBuffers(vs_info, fetch_shader); - const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset); + if (is_indexed) { + buffer_cache.BindIndexBuffer(index_offset); + } BeginRendering(*pipeline, state); UpdateDynamicState(*pipeline); @@ -263,10 +264,11 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); if (is_indexed) { - cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, s32(vertex_offset), - instance_offset); + cmdbuf.drawIndexed(regs.num_indices, regs.num_instances.NumInstances(), 0, + s32(vertex_offset), instance_offset); } else { - cmdbuf.draw(num_indices, regs.num_instances.NumInstances(), vertex_offset, instance_offset); + cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), vertex_offset, + instance_offset); } ResetBindings(); @@ -280,22 +282,12 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 return; } - const auto& regs = liverpool->regs; - if (regs.primitive_type == AmdGpu::PrimitiveType::Polygon) { - // We use a generated index buffer to convert polygons to triangles. Since it - // changes type of the draw, arguments are not valid for this case. We need to run a - // conversion pass to repack the indirect arguments buffer first. - LOG_WARNING(Render_Vulkan, "Primitive type is not supported for indirect draw"); - return; - } - const GraphicsPipeline* pipeline = pipeline_cache.GetGraphicsPipeline(); if (!pipeline) { return; } auto state = PrepareRenderState(pipeline->GetMrtMask()); - if (!BindResources(pipeline)) { return; } @@ -303,7 +295,9 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex); const auto& fetch_shader = pipeline->GetFetchShader(); buffer_cache.BindVertexBuffers(vs_info, fetch_shader); - buffer_cache.BindIndexBuffer(is_indexed, 0); + if (is_indexed) { + buffer_cache.BindIndexBuffer(0); + } const auto& [buffer, base] = buffer_cache.ObtainBuffer(arg_address + offset, stride * max_count, false); From ac2e8c26027059af7c80892f6245193890d3d1c2 Mon Sep 17 00:00:00 2001 From: Nenkai Date: Sun, 29 Dec 2024 19:15:04 +0100 Subject: [PATCH 32/45] gnmdriver: remove redundant EqEventType assert (#1975) --- src/core/libraries/gnmdriver/gnmdriver.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 91a1329e5..805c9124e 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -1015,11 +1015,7 @@ int PS4_SYSV_ABI sceGnmGetDebugTimestamp() { int PS4_SYSV_ABI sceGnmGetEqEventType(const SceKernelEvent* ev) { LOG_TRACE(Lib_GnmDriver, "called"); - - auto data = sceKernelGetEventData(ev); - ASSERT(static_cast(data) == GnmEventType::GfxEop); - - return data; + return sceKernelGetEventData(ev); } int PS4_SYSV_ABI sceGnmGetEqTimeStamp() { From dd3f24614b47986ff2b921e6ff9e45c979ed4ed6 Mon Sep 17 00:00:00 2001 From: Vladislav Mikhalin Date: Sun, 29 Dec 2024 14:57:32 +0300 Subject: [PATCH 33/45] infra: updated github issue templates --- .github/ISSUE_TEMPLATE/app-bug-report.yaml | 55 +++++++++++++ .github/ISSUE_TEMPLATE/bug_report.md | 30 ------- .github/ISSUE_TEMPLATE/config.yml | 10 +++ .github/ISSUE_TEMPLATE/feature-request.yaml | 54 ++++++++++++ .github/ISSUE_TEMPLATE/game-bug-report.yaml | 91 +++++++++++++++++++++ 5 files changed, 210 insertions(+), 30 deletions(-) create mode 100644 .github/ISSUE_TEMPLATE/app-bug-report.yaml delete mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/config.yml create mode 100644 .github/ISSUE_TEMPLATE/feature-request.yaml create mode 100644 .github/ISSUE_TEMPLATE/game-bug-report.yaml diff --git a/.github/ISSUE_TEMPLATE/app-bug-report.yaml b/.github/ISSUE_TEMPLATE/app-bug-report.yaml new file mode 100644 index 000000000..c38bbb814 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/app-bug-report.yaml @@ -0,0 +1,55 @@ +# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +# SPDX-License-Identifier: GPL-2.0-or-later +# Docs - https://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema +name: Application Bug Report +description: Problem with the application itself (ie. bad file path handling, UX issue) +title: "[APP BUG]: " +body: + - type: markdown + attributes: + value: | + ## Important: Read First + + Please do not make support requests on GitHub. Our issue tracker is for tracking bugs and feature requests only + If you need help using the emulator or unsure about your issue please contact us on [discord](https://discord.gg/bFJxfftGW6). + + Please make an effort to make sure your issue isn't already reported. + + Do not create issues involving software piracy, our rules specifically prohibit this. Otherwise your issue will be closed and you will be banned in this repository. + - type: checkboxes + id: checklist + attributes: + label: Checklist + options: + - label: I have searched for a similar issue in this repository and did not find one. + required: true + - label: I am using an official build obtained from [releases](https://github.com/shadps4-emu/shadPS4/releases) or updated one of those builds using its in-app updater. + required: true + - type: textarea + id: desc + attributes: + label: Describe the Bug + description: "A clear and concise description of what the bug is" + validations: + required: true + - type: textarea + id: repro + attributes: + label: Reproduction Steps + description: "Detailed steps to reproduce the behavior" + validations: + required: true + - type: textarea + id: expected + attributes: + label: Expected Behavior + description: "A clear and concise description of what you expected to happen" + validations: + required: false + - type: input + id: os + attributes: + label: Specify OS Version + placeholder: "Example: Windows 11, Arch Linux, MacOS 15" + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md deleted file mode 100644 index 02017dddf..000000000 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ /dev/null @@ -1,30 +0,0 @@ ---- -name: Bug report -about: Report a bug in the emulator -title: '' -labels: '' -assignees: '' - ---- - -Checklist: -[ ] I have searched for a similar issue in this repository and did not find one. -[ ] I have asked for support on shadPS4 discord server. -[ ] I am using an official build obtained from [releases](https://github.com/shadps4-emu/shadPS4/releases) or updated it using its in-app updater. -[ ] I have re-dumped the game and performed a clean install without mods. -[ ] I have disabled all patches and cheats. - -Description: - - -Steps To Reproduce: - - -Logs: - - -System Information: - - -Additional Information: - diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 000000000..5adcf1437 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,10 @@ +# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +# SPDX-License-Identifier: GPL-2.0-or-later +blank_issues_enabled: false +contact_links: + - name: Discord + url: https://discord.gg/bFJxfftGW6 + about: Get direct support and hang out with us + - name: Wiki + url: https://github.com/shadps4-emu/shadPS4/wiki + about: Information, guides, etc. diff --git a/.github/ISSUE_TEMPLATE/feature-request.yaml b/.github/ISSUE_TEMPLATE/feature-request.yaml new file mode 100644 index 000000000..a1b49362a --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature-request.yaml @@ -0,0 +1,54 @@ +# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +# SPDX-License-Identifier: GPL-2.0-or-later +# Docs - https://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema +name: Feature Request +description: Suggest a new feature or improve an existing one +title: "[Feature Request]: " +body: + - type: markdown + attributes: + value: | + ## Important: Read First + + Please make an effort to make sure your issue isn't already reported. + + Do not create issues involving software piracy, our rules specifically prohibit this. Otherwise your issue will be closed and you will be banned in this repository. + + - type: checkboxes + id: checklist + attributes: + label: Checklist + options: + - label: I have searched for a similar issue in this repository and did not find one. + required: true + - type: textarea + id: desc + attributes: + label: Description + description: | + A concise description of the feature you want + + Include step by step examples of how the feature should work under various circumstances + validations: + required: true + - type: textarea + id: reason + attributes: + label: Reason + description: | + Give a reason why you want this feature + - How will it make things easier for you? + - How does this feature help your enjoyment of the emulator? + - What does it provide that isn't being provided currently? + validations: + required: true + - type: textarea + id: examples + attributes: + label: Examples + description: | + Provide examples of the feature as implemented by other software + + Include screenshots or video if you like to help demonstrate how you'd like this feature to work + validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/game-bug-report.yaml b/.github/ISSUE_TEMPLATE/game-bug-report.yaml new file mode 100644 index 000000000..7eb9441d2 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/game-bug-report.yaml @@ -0,0 +1,91 @@ +# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +# SPDX-License-Identifier: GPL-2.0-or-later +# Docs - https://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema +name: Game Emulation Bug Report +description: Problem in a game (ie. graphical artifacts, crashes, etc.) +title: "[GAME BUG]: " +body: + - type: markdown + attributes: + value: | + ## Important: Read First + + Please do not make support requests on GitHub. Our issue tracker is for tracking bugs and feature requests only + If you need help using the emulator or unsure about your issue please contact us on [discord](https://discord.gg/bFJxfftGW6). + + You can also check the [Game Compatibility Repository](https://github.com/shadps4-emu/shadps4-game-compatibility) for the information about the status of the game. + + Please make an effort to make sure your issue isn't already reported. + + Do not create issues involving software piracy, our rules specifically prohibit this. Otherwise your issue will be closed and you will be banned in this repository. + - type: checkboxes + id: checklist + attributes: + label: Checklist + options: + - label: I have searched for a similar issue in this repository and did not find one. + required: true + - label: I am using an official build obtained from [releases](https://github.com/shadps4-emu/shadPS4/releases) or updated one of those builds using its in-app updater. + required: true + - label: I have re-dumped the game and performed a clean install without mods. + required: true + - label: I have disabled all patches and cheats. + required: true + - label: I have all the required [system modules](https://github.com/shadps4-emu/shadps4-game-compatibility?tab=readme-ov-file#informations) installed. + required: true + - type: textarea + id: desc + attributes: + label: Describe the Bug + description: "A clear and concise description of what the bug is" + validations: + required: true + - type: textarea + id: repro + attributes: + label: Reproduction Steps + description: "Detailed steps to reproduce the behavior" + validations: + required: true + - type: input + id: os + attributes: + label: Specify OS Version + placeholder: "Example: Windows 11, Arch Linux, MacOS 15" + validations: + required: true + - type: input + id: cpu + attributes: + label: CPU + placeholder: "Example: Intel Core i7-8700" + validations: + required: true + - type: input + id: gpu + attributes: + label: GPU + placeholder: "Example: nVidia GTX 1650" + validations: + required: true + - type: input + id: ram + attributes: + label: Amount of RAM in GB + placeholder: "Example: 16 GB" + validations: + required: true + - type: input + id: vram + attributes: + label: Amount of VRAM in GB + placeholder: "Example: 8 GB" + validations: + required: true + - type: textarea + id: logs + attributes: + label: "Logs" + description: Attach any logs here. Log can be found by right clicking on a game name -> Open Folder... -> Open Log Folder. Make sure that the log type is set to `sync`. + validations: + required: false From 90912233967f97b41d4ed8017eae15667adf1547 Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Mon, 30 Dec 2024 21:59:14 -0600 Subject: [PATCH 34/45] Fix sceKernelGetEventFilter (#1987) --- src/core/libraries/kernel/equeue.cpp | 2 +- src/core/libraries/kernel/equeue.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/libraries/kernel/equeue.cpp b/src/core/libraries/kernel/equeue.cpp index 3ae77e46b..03259cd22 100644 --- a/src/core/libraries/kernel/equeue.cpp +++ b/src/core/libraries/kernel/equeue.cpp @@ -346,7 +346,7 @@ int PS4_SYSV_ABI sceKernelDeleteUserEvent(SceKernelEqueue eq, int id) { return ORBIS_OK; } -s16 PS4_SYSV_ABI sceKernelGetEventFilter(const SceKernelEvent* ev) { +int PS4_SYSV_ABI sceKernelGetEventFilter(const SceKernelEvent* ev) { return ev->filter; } diff --git a/src/core/libraries/kernel/equeue.h b/src/core/libraries/kernel/equeue.h index f8759137c..17900238f 100644 --- a/src/core/libraries/kernel/equeue.h +++ b/src/core/libraries/kernel/equeue.h @@ -21,7 +21,7 @@ class EqueueInternal; struct EqueueEvent; struct SceKernelEvent { - enum Filter : s16 { + enum Filter : int { None = 0, Read = -1, Write = -2, From 62780e4e431b42cacadeb63dfbd360b0891f4928 Mon Sep 17 00:00:00 2001 From: baggins183 Date: Mon, 30 Dec 2024 20:00:52 -0800 Subject: [PATCH 35/45] Initialize V0 to PrimitiveId in hull shader (#1985) --- src/shader_recompiler/frontend/translate/translate.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index a14bff706..237acf309 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -124,12 +124,12 @@ void Translator::EmitPrologue() { } break; case LogicalStage::TessellationControl: { + ir.SetVectorReg(IR::VectorReg::V0, ir.GetAttributeU32(IR::Attribute::PrimitiveId)); // Should be laid out like: // [0:8]: patch id within VGT // [8:12]: output control point id ir.SetVectorReg(IR::VectorReg::V1, ir.GetAttributeU32(IR::Attribute::PackedHullInvocationInfo)); - // TODO PrimitiveId is probably V2 but haven't seen it yet break; } case LogicalStage::TessellationEval: From 284f473a52456917e2cf7fd61a151f39abf09403 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Mon, 30 Dec 2024 20:10:29 -0800 Subject: [PATCH 36/45] shader_recompiler: Fix BitCount64 and FindILsb64 (#1978) --- .../backend/spirv/emit_spirv_integer.cpp | 19 +++++++++++++++++-- .../frontend/translate/scalar_alu.cpp | 5 ++--- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index def1f816e..70411ecec 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -202,7 +202,14 @@ Id EmitBitCount32(EmitContext& ctx, Id value) { } Id EmitBitCount64(EmitContext& ctx, Id value) { - return ctx.OpBitCount(ctx.U64, value); + // Vulkan restricts some bitwise operations to 32-bit only, so decompose into + // two 32-bit values and add the result. + const Id unpacked{ctx.OpBitcast(ctx.U32[2], value)}; + const Id lo{ctx.OpCompositeExtract(ctx.U32[1], unpacked, 0U)}; + const Id hi{ctx.OpCompositeExtract(ctx.U32[1], unpacked, 1U)}; + const Id lo_count{ctx.OpBitCount(ctx.U32[1], lo)}; + const Id hi_count{ctx.OpBitCount(ctx.U32[1], hi)}; + return ctx.OpIAdd(ctx.U32[1], lo_count, hi_count); } Id EmitBitwiseNot32(EmitContext& ctx, Id value) { @@ -222,7 +229,15 @@ Id EmitFindILsb32(EmitContext& ctx, Id value) { } Id EmitFindILsb64(EmitContext& ctx, Id value) { - return ctx.OpFindILsb(ctx.U64, value); + // Vulkan restricts some bitwise operations to 32-bit only, so decompose into + // two 32-bit values and select the correct result. + const Id unpacked{ctx.OpBitcast(ctx.U32[2], value)}; + const Id lo{ctx.OpCompositeExtract(ctx.U32[1], unpacked, 0U)}; + const Id hi{ctx.OpCompositeExtract(ctx.U32[1], unpacked, 1U)}; + const Id lo_lsb{ctx.OpFindILsb(ctx.U32[1], lo)}; + const Id hi_lsb{ctx.OpFindILsb(ctx.U32[1], hi)}; + const Id found_lo{ctx.OpINotEqual(ctx.U32[1], lo_lsb, ctx.ConstU32(u32(-1)))}; + return ctx.OpSelect(ctx.U32[1], found_lo, lo_lsb, hi_lsb); } Id EmitSMin32(EmitContext& ctx, Id a, Id b) { diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index 3a2b01a90..e18cda012 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -597,14 +597,13 @@ void Translator::S_BCNT1_I32_B64(const GcnInst& inst) { void Translator::S_FF1_I32_B32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 result{ir.Select(ir.IEqual(src0, ir.Imm32(0U)), ir.Imm32(-1), ir.FindILsb(src0))}; + const IR::U32 result{ir.FindILsb(src0)}; SetDst(inst.dst[0], result); } void Translator::S_FF1_I32_B64(const GcnInst& inst) { const IR::U64 src0{GetSrc64(inst.src[0])}; - const IR::U32 result{ - ir.Select(ir.IEqual(src0, ir.Imm64(u64(0))), ir.Imm32(-1), ir.FindILsb(src0))}; + const IR::U32 result{ir.FindILsb(src0)}; SetDst(inst.dst[0], result); } From 41d64a200dc6ac519b2db0df9d7692afef3a8344 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Mon, 30 Dec 2024 20:14:47 -0800 Subject: [PATCH 37/45] shader_recompiler: Add swizzle support for unsupported formats. (#1869) * shader_recompiler: Add swizzle support for unsupported formats. * renderer_vulkan: Rework MRT swizzles and add unsupported format swizzle support. * shader_recompiler: Clean up swizzle handling and handle ImageRead storage swizzle. * shader_recompiler: Fix type errors * liverpool_to_vk: Remove redundant clear color swizzles. * shader_recompiler: Reduce CompositeConstruct to constants where possible. * shader_recompiler: Fix ImageRead/Write and StoreBufferFormatF32 types. * amdgpu: Add a few more unsupported format remaps. --- CMakeLists.txt | 1 + .../backend/spirv/emit_spirv_composite.cpp | 98 +++++++++-- .../backend/spirv/emit_spirv_image.cpp | 6 +- .../backend/spirv/emit_spirv_instructions.h | 38 ++++- .../frontend/translate/export.cpp | 32 ++-- .../frontend/translate/translate.cpp | 25 +-- .../frontend/translate/vector_memory.cpp | 4 +- src/shader_recompiler/ir/ir_emitter.cpp | 80 +++++++++ src/shader_recompiler/ir/ir_emitter.h | 7 + src/shader_recompiler/ir/opcodes.inc | 18 +- .../ir/passes/resource_tracking_pass.cpp | 100 ++++++----- src/shader_recompiler/ir/reinterpret.h | 24 +++ src/shader_recompiler/runtime_info.h | 2 +- src/shader_recompiler/specialization.h | 10 +- src/video_core/amdgpu/liverpool.h | 50 +++++- src/video_core/amdgpu/resource.h | 156 ++++++++++++------ .../renderer_vulkan/liverpool_to_vk.cpp | 97 ++++------- .../renderer_vulkan/liverpool_to_vk.h | 7 +- .../renderer_vulkan/vk_graphics_pipeline.h | 2 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 14 +- src/video_core/texture_cache/image_info.cpp | 4 +- src/video_core/texture_cache/image_view.cpp | 29 +--- 22 files changed, 522 insertions(+), 282 deletions(-) create mode 100644 src/shader_recompiler/ir/reinterpret.h diff --git a/CMakeLists.txt b/CMakeLists.txt index af811e9fb..833bbe3ce 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -701,6 +701,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/post_order.h src/shader_recompiler/ir/program.cpp src/shader_recompiler/ir/program.h + src/shader_recompiler/ir/reinterpret.h src/shader_recompiler/ir/reg.h src/shader_recompiler/ir/type.cpp src/shader_recompiler/ir/type.h diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp index 74e736cf6..d064b5d05 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -6,16 +6,22 @@ namespace Shader::Backend::SPIRV { -Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2) { - return ctx.OpCompositeConstruct(ctx.U32[2], e1, e2); +template +Id EmitCompositeConstruct(EmitContext& ctx, IR::Inst* inst, Args&&... args) { + return inst->AreAllArgsImmediates() ? ctx.ConstantComposite(args...) + : ctx.OpCompositeConstruct(args...); } -Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3) { - return ctx.OpCompositeConstruct(ctx.U32[3], e1, e2, e3); +Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { + return EmitCompositeConstruct(ctx, inst, ctx.U32[2], e1, e2); } -Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { - return ctx.OpCompositeConstruct(ctx.U32[4], e1, e2, e3, e4); +Id EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3) { + return EmitCompositeConstruct(ctx, inst, ctx.U32[3], e1, e2, e3); +} + +Id EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4) { + return EmitCompositeConstruct(ctx, inst, ctx.U32[4], e1, e2, e3, e4); } Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index) { @@ -42,16 +48,30 @@ Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index); } -Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) { - return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2); +Id EmitCompositeShuffleU32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) { + return ctx.OpVectorShuffle(ctx.U32[2], composite1, composite2, comp0, comp1); } -Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3) { - return ctx.OpCompositeConstruct(ctx.F16[3], e1, e2, e3); +Id EmitCompositeShuffleU32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2) { + return ctx.OpVectorShuffle(ctx.U32[3], composite1, composite2, comp0, comp1, comp2); } -Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { - return ctx.OpCompositeConstruct(ctx.F16[4], e1, e2, e3, e4); +Id EmitCompositeShuffleU32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3) { + return ctx.OpVectorShuffle(ctx.U32[4], composite1, composite2, comp0, comp1, comp2, comp3); +} + +Id EmitCompositeConstructF16x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { + return EmitCompositeConstruct(ctx, inst, ctx.F16[2], e1, e2); +} + +Id EmitCompositeConstructF16x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3) { + return EmitCompositeConstruct(ctx, inst, ctx.F16[3], e1, e2, e3); +} + +Id EmitCompositeConstructF16x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4) { + return EmitCompositeConstruct(ctx, inst, ctx.F16[4], e1, e2, e3, e4); } Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) { @@ -78,16 +98,30 @@ Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index); } -Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) { - return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2); +Id EmitCompositeShuffleF16x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) { + return ctx.OpVectorShuffle(ctx.F16[2], composite1, composite2, comp0, comp1); } -Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3) { - return ctx.OpCompositeConstruct(ctx.F32[3], e1, e2, e3); +Id EmitCompositeShuffleF16x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2) { + return ctx.OpVectorShuffle(ctx.F16[3], composite1, composite2, comp0, comp1, comp2); } -Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { - return ctx.OpCompositeConstruct(ctx.F32[4], e1, e2, e3, e4); +Id EmitCompositeShuffleF16x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3) { + return ctx.OpVectorShuffle(ctx.F16[4], composite1, composite2, comp0, comp1, comp2, comp3); +} + +Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { + return EmitCompositeConstruct(ctx, inst, ctx.F32[2], e1, e2); +} + +Id EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3) { + return EmitCompositeConstruct(ctx, inst, ctx.F32[3], e1, e2, e3); +} + +Id EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4) { + return EmitCompositeConstruct(ctx, inst, ctx.F32[4], e1, e2, e3, e4); } Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) { @@ -114,6 +148,20 @@ Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index return ctx.OpCompositeInsert(ctx.F32[4], object, composite, index); } +Id EmitCompositeShuffleF32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) { + return ctx.OpVectorShuffle(ctx.F32[2], composite1, composite2, comp0, comp1); +} + +Id EmitCompositeShuffleF32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2) { + return ctx.OpVectorShuffle(ctx.F32[3], composite1, composite2, comp0, comp1, comp2); +} + +Id EmitCompositeShuffleF32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3) { + return ctx.OpVectorShuffle(ctx.F32[4], composite1, composite2, comp0, comp1, comp2, comp3); +} + void EmitCompositeConstructF64x2(EmitContext&) { UNREACHABLE_MSG("SPIR-V Instruction"); } @@ -150,4 +198,18 @@ Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index); } +Id EmitCompositeShuffleF64x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) { + return ctx.OpVectorShuffle(ctx.F64[2], composite1, composite2, comp0, comp1); +} + +Id EmitCompositeShuffleF64x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2) { + return ctx.OpVectorShuffle(ctx.F64[3], composite1, composite2, comp0, comp1, comp2); +} + +Id EmitCompositeShuffleF64x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3) { + return ctx.OpVectorShuffle(ctx.F64[4], composite1, composite2, comp0, comp1, comp2, comp3); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 2946edab3..c3d937fe7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -238,7 +238,7 @@ Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod } texel = ctx.OpImageRead(color_type, image, coords, operands.mask, operands.operands); } - return !texture.is_integer ? ctx.OpBitcast(ctx.U32[4], texel) : texel; + return texture.is_integer ? ctx.OpBitcast(ctx.F32[4], texel) : texel; } void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id ms, @@ -253,8 +253,8 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id } else if (Sirit::ValidId(lod)) { LOG_WARNING(Render, "Image write with LOD not supported by driver"); } - ctx.OpImageWrite(image, coords, ctx.OpBitcast(color_type, color), operands.mask, - operands.operands); + const Id texel = texture.is_integer ? ctx.OpBitcast(color_type, color) : color; + ctx.OpImageWrite(image, coords, texel, operands.mask, operands.operands); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index d26cf6662..0d9fcff46 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -120,33 +120,48 @@ Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value); -Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); -Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); -Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); +Id EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3); +Id EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4); Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2); -Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); -Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeShuffleU32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); +Id EmitCompositeShuffleU32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2); +Id EmitCompositeShuffleU32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3); +Id EmitCompositeConstructF16x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); +Id EmitCompositeConstructF16x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF16x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4); Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2); -Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); -Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeShuffleF16x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); +Id EmitCompositeShuffleF16x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2); +Id EmitCompositeShuffleF16x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3); +Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); +Id EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4); Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeShuffleF32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); +Id EmitCompositeShuffleF32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2); +Id EmitCompositeShuffleF32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3); void EmitCompositeConstructF64x2(EmitContext& ctx); void EmitCompositeConstructF64x3(EmitContext& ctx); void EmitCompositeConstructF64x4(EmitContext& ctx); @@ -156,6 +171,11 @@ void EmitCompositeExtractF64x4(EmitContext& ctx); Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeShuffleF64x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); +Id EmitCompositeShuffleF64x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2); +Id EmitCompositeShuffleF64x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3); Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp index 5927aa696..83240e17f 100644 --- a/src/shader_recompiler/frontend/translate/export.cpp +++ b/src/shader_recompiler/frontend/translate/export.cpp @@ -25,34 +25,28 @@ void Translator::EmitExport(const GcnInst& inst) { IR::VectorReg(inst.src[3].code), }; - const auto swizzle = [&](u32 comp) { + const auto set_attribute = [&](u32 comp, IR::F32 value) { if (!IR::IsMrt(attrib)) { - return comp; + ir.SetAttribute(attrib, value, comp); + return; } const u32 index = u32(attrib) - u32(IR::Attribute::RenderTarget0); - switch (runtime_info.fs_info.color_buffers[index].mrt_swizzle) { - case MrtSwizzle::Identity: - return comp; - case MrtSwizzle::Alt: - static constexpr std::array AltSwizzle = {2, 1, 0, 3}; - return AltSwizzle[comp]; - case MrtSwizzle::Reverse: - static constexpr std::array RevSwizzle = {3, 2, 1, 0}; - return RevSwizzle[comp]; - case MrtSwizzle::ReverseAlt: - static constexpr std::array AltRevSwizzle = {3, 0, 1, 2}; - return AltRevSwizzle[comp]; - default: - UNREACHABLE(); + const auto [r, g, b, a] = runtime_info.fs_info.color_buffers[index].swizzle; + const std::array swizzle_array = {r, g, b, a}; + const auto swizzled_comp = swizzle_array[comp]; + if (u32(swizzled_comp) < u32(AmdGpu::CompSwizzle::Red)) { + ir.SetAttribute(attrib, value, comp); + return; } + ir.SetAttribute(attrib, value, u32(swizzled_comp) - u32(AmdGpu::CompSwizzle::Red)); }; const auto unpack = [&](u32 idx) { const IR::Value value = ir.UnpackHalf2x16(ir.GetVectorReg(vsrc[idx])); const IR::F32 r = IR::F32{ir.CompositeExtract(value, 0)}; const IR::F32 g = IR::F32{ir.CompositeExtract(value, 1)}; - ir.SetAttribute(attrib, r, swizzle(idx * 2)); - ir.SetAttribute(attrib, g, swizzle(idx * 2 + 1)); + set_attribute(idx * 2, r); + set_attribute(idx * 2 + 1, g); }; // Components are float16 packed into a VGPR @@ -73,7 +67,7 @@ void Translator::EmitExport(const GcnInst& inst) { continue; } const IR::F32 comp = ir.GetVectorReg(vsrc[i]); - ir.SetAttribute(attrib, comp, swizzle(i)); + set_attribute(i, comp); } } if (IR::IsMrt(attrib)) { diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 237acf309..7f5504663 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -10,6 +10,7 @@ #include "shader_recompiler/info.h" #include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/reg.h" +#include "shader_recompiler/ir/reinterpret.h" #include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/resource.h" #include "video_core/amdgpu/types.h" @@ -475,26 +476,12 @@ void Translator::EmitFetch(const GcnInst& inst) { // Read the V# of the attribute to figure out component number and type. const auto buffer = info.ReadUdReg(attrib.sgpr_base, attrib.dword_offset); + const auto values = + ir.CompositeConstruct(ir.GetAttribute(attr, 0), ir.GetAttribute(attr, 1), + ir.GetAttribute(attr, 2), ir.GetAttribute(attr, 3)); + const auto swizzled = ApplySwizzle(ir, values, buffer.DstSelect()); for (u32 i = 0; i < 4; i++) { - const IR::F32 comp = [&] { - switch (buffer.GetSwizzle(i)) { - case AmdGpu::CompSwizzle::One: - return ir.Imm32(1.f); - case AmdGpu::CompSwizzle::Zero: - return ir.Imm32(0.f); - case AmdGpu::CompSwizzle::Red: - return ir.GetAttribute(attr, 0); - case AmdGpu::CompSwizzle::Green: - return ir.GetAttribute(attr, 1); - case AmdGpu::CompSwizzle::Blue: - return ir.GetAttribute(attr, 2); - case AmdGpu::CompSwizzle::Alpha: - return ir.GetAttribute(attr, 3); - default: - UNREACHABLE(); - } - }(); - ir.SetVectorReg(dst_reg++, comp); + ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(swizzled, i)}); } // In case of programmable step rates we need to fallback to instance data pulling in diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 79d46cd42..c5be08b7d 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -326,7 +326,7 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, const GcnInst& inst) { const IR::VectorReg src_reg{inst.src[1].code}; - std::array comps{}; + std::array comps{}; for (u32 i = 0; i < num_dwords; i++) { comps[i] = ir.GetVectorReg(src_reg + i); } @@ -424,7 +424,7 @@ void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) { if (((mimg.dmask >> i) & 1) == 0) { continue; } - IR::U32 value = IR::U32{ir.CompositeExtract(texel, i)}; + IR::F32 value = IR::F32{ir.CompositeExtract(texel, i)}; ir.SetVectorReg(dest_reg++, value); } } diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 20e6eae0b..823f9bdcd 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -663,6 +663,86 @@ Value IREmitter::CompositeInsert(const Value& vector, const Value& object, size_ } } +Value IREmitter::CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0, + size_t comp1) { + if (vector1.Type() != vector2.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", vector1.Type(), vector2.Type()); + } + if (comp0 >= 4 || comp1 >= 4) { + UNREACHABLE_MSG("One or more out of bounds elements {}, {}", comp0, comp1); + } + const auto shuffle{[&](Opcode opcode) -> Value { + return Inst(opcode, vector1, vector2, Value{static_cast(comp0)}, + Value{static_cast(comp1)}); + }}; + switch (vector1.Type()) { + case Type::U32x4: + return shuffle(Opcode::CompositeShuffleU32x2); + case Type::F16x4: + return shuffle(Opcode::CompositeShuffleF16x2); + case Type::F32x4: + return shuffle(Opcode::CompositeShuffleF32x2); + case Type::F64x4: + return shuffle(Opcode::CompositeShuffleF64x2); + default: + ThrowInvalidType(vector1.Type()); + } +} + +Value IREmitter::CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0, + size_t comp1, size_t comp2) { + if (vector1.Type() != vector2.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", vector1.Type(), vector2.Type()); + } + if (comp0 >= 6 || comp1 >= 6 || comp2 >= 6) { + UNREACHABLE_MSG("One or more out of bounds elements {}, {}, {}", comp0, comp1, comp2); + } + const auto shuffle{[&](Opcode opcode) -> Value { + return Inst(opcode, vector1, vector2, Value{static_cast(comp0)}, + Value{static_cast(comp1)}, Value{static_cast(comp2)}); + }}; + switch (vector1.Type()) { + case Type::U32x4: + return shuffle(Opcode::CompositeShuffleU32x3); + case Type::F16x4: + return shuffle(Opcode::CompositeShuffleF16x3); + case Type::F32x4: + return shuffle(Opcode::CompositeShuffleF32x3); + case Type::F64x4: + return shuffle(Opcode::CompositeShuffleF64x3); + default: + ThrowInvalidType(vector1.Type()); + } +} + +Value IREmitter::CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0, + size_t comp1, size_t comp2, size_t comp3) { + if (vector1.Type() != vector2.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", vector1.Type(), vector2.Type()); + } + if (comp0 >= 8 || comp1 >= 8 || comp2 >= 8 || comp3 >= 8) { + UNREACHABLE_MSG("One or more out of bounds elements {}, {}, {}, {}", comp0, comp1, comp2, + comp3); + } + const auto shuffle{[&](Opcode opcode) -> Value { + return Inst(opcode, vector1, vector2, Value{static_cast(comp0)}, + Value{static_cast(comp1)}, Value{static_cast(comp2)}, + Value{static_cast(comp3)}); + }}; + switch (vector1.Type()) { + case Type::U32x4: + return shuffle(Opcode::CompositeShuffleU32x4); + case Type::F16x4: + return shuffle(Opcode::CompositeShuffleF16x4); + case Type::F32x4: + return shuffle(Opcode::CompositeShuffleF32x4); + case Type::F64x4: + return shuffle(Opcode::CompositeShuffleF64x4); + default: + ThrowInvalidType(vector1.Type()); + } +} + Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) { if (true_value.Type() != false_value.Type()) { UNREACHABLE_MSG("Mismatching types {} and {}", true_value.Type(), false_value.Type()); diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index f65baee2a..9aab9459b 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -155,6 +155,13 @@ public: [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); [[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element); + [[nodiscard]] Value CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0, + size_t comp1); + [[nodiscard]] Value CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0, + size_t comp1, size_t comp2); + [[nodiscard]] Value CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0, + size_t comp1, size_t comp2, size_t comp3); + [[nodiscard]] Value Select(const U1& condition, const Value& true_value, const Value& false_value); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 1194c3792..6242a230e 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -99,7 +99,7 @@ OPCODE(StoreBufferU32, Void, Opaq OPCODE(StoreBufferU32x2, Void, Opaque, Opaque, U32x2, ) OPCODE(StoreBufferU32x3, Void, Opaque, Opaque, U32x3, ) OPCODE(StoreBufferU32x4, Void, Opaque, Opaque, U32x4, ) -OPCODE(StoreBufferFormatF32, Void, Opaque, Opaque, U32x4, ) +OPCODE(StoreBufferFormatF32, Void, Opaque, Opaque, F32x4, ) // Buffer atomic operations OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 ) @@ -124,6 +124,9 @@ OPCODE(CompositeExtractU32x4, U32, U32x OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, ) OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, ) OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, ) +OPCODE(CompositeShuffleU32x2, U32x2, U32x2, U32x2, U32, U32, ) +OPCODE(CompositeShuffleU32x3, U32x3, U32x3, U32x3, U32, U32, U32, ) +OPCODE(CompositeShuffleU32x4, U32x4, U32x4, U32x4, U32, U32, U32, U32, ) OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) @@ -133,6 +136,9 @@ OPCODE(CompositeExtractF16x4, F16, F16x OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, ) OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, ) OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, ) +OPCODE(CompositeShuffleF16x2, F16x2, F16x2, F16x2, U32, U32, ) +OPCODE(CompositeShuffleF16x3, F16x3, F16x3, F16x3, U32, U32, U32, ) +OPCODE(CompositeShuffleF16x4, F16x4, F16x4, F16x4, U32, U32, U32, U32, ) OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) @@ -142,6 +148,9 @@ OPCODE(CompositeExtractF32x4, F32, F32x OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, ) OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, ) OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, ) +OPCODE(CompositeShuffleF32x2, F32x2, F32x2, F32x2, U32, U32, ) +OPCODE(CompositeShuffleF32x3, F32x3, F32x3, F32x3, U32, U32, U32, ) +OPCODE(CompositeShuffleF32x4, F32x4, F32x4, F32x4, U32, U32, U32, U32, ) OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) @@ -151,6 +160,9 @@ OPCODE(CompositeExtractF64x4, F64, F64x OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) +OPCODE(CompositeShuffleF64x2, F64x2, F64x2, F64x2, U32, U32, ) +OPCODE(CompositeShuffleF64x3, F64x3, F64x3, F64x3, U32, U32, U32, ) +OPCODE(CompositeShuffleF64x4, F64x4, F64x4, F64x4, U32, U32, U32, U32, ) // Select operations OPCODE(SelectU1, U1, U1, U1, U1, ) @@ -346,8 +358,8 @@ OPCODE(ImageGatherDref, F32x4, Opaq OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, ) OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, F32, ) -OPCODE(ImageRead, U32x4, Opaque, Opaque, U32, U32, ) -OPCODE(ImageWrite, Void, Opaque, Opaque, U32, U32, U32x4, ) +OPCODE(ImageRead, F32x4, Opaque, Opaque, U32, U32, ) +OPCODE(ImageWrite, Void, Opaque, Opaque, U32, U32, F32x4, ) // Image atomic operations OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, ) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index e6d23bfe7..636752912 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -8,6 +8,7 @@ #include "shader_recompiler/ir/breadth_first_search.h" #include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/program.h" +#include "shader_recompiler/ir/reinterpret.h" #include "video_core/amdgpu/resource.h" namespace Shader::Optimization { @@ -128,35 +129,6 @@ bool IsImageInstruction(const IR::Inst& inst) { } } -IR::Value SwizzleVector(IR::IREmitter& ir, auto sharp, IR::Value texel) { - boost::container::static_vector comps; - for (u32 i = 0; i < 4; i++) { - switch (sharp.GetSwizzle(i)) { - case AmdGpu::CompSwizzle::Zero: - comps.emplace_back(ir.Imm32(0.f)); - break; - case AmdGpu::CompSwizzle::One: - comps.emplace_back(ir.Imm32(1.f)); - break; - case AmdGpu::CompSwizzle::Red: - comps.emplace_back(ir.CompositeExtract(texel, 0)); - break; - case AmdGpu::CompSwizzle::Green: - comps.emplace_back(ir.CompositeExtract(texel, 1)); - break; - case AmdGpu::CompSwizzle::Blue: - comps.emplace_back(ir.CompositeExtract(texel, 2)); - break; - case AmdGpu::CompSwizzle::Alpha: - comps.emplace_back(ir.CompositeExtract(texel, 3)); - break; - default: - UNREACHABLE(); - } - } - return ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]); -}; - class Descriptors { public: explicit Descriptors(Info& info_) @@ -409,15 +381,6 @@ void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; inst.SetArg(0, ir.Imm32(binding)); ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable); - - // Apply dst_sel swizzle on formatted buffer instructions - if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) { - inst.SetArg(2, SwizzleVector(ir, buffer, inst.Arg(2))); - } else { - const auto inst_info = inst.Flags(); - const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info); - inst.ReplaceUsesWith(SwizzleVector(ir, buffer, texel)); - } } IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t, @@ -765,10 +728,6 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip }(); inst.SetArg(1, coords); - if (inst.GetOpcode() == IR::Opcode::ImageWrite) { - inst.SetArg(4, SwizzleVector(ir, image, inst.Arg(4))); - } - if (inst_info.has_lod) { ASSERT(inst.GetOpcode() == IR::Opcode::ImageRead || inst.GetOpcode() == IR::Opcode::ImageWrite); @@ -783,6 +742,50 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip } } +void PatchTextureBufferInterpretation(IR::Block& block, IR::Inst& inst, Info& info) { + const auto binding = inst.Arg(0).U32(); + const auto buffer_res = info.texture_buffers[binding]; + const auto buffer = buffer_res.GetSharp(info); + if (!buffer.Valid()) { + // Don't need to swizzle invalid buffer. + return; + } + + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) { + inst.SetArg(2, ApplySwizzle(ir, inst.Arg(2), buffer.DstSelect())); + } else if (inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32) { + const auto inst_info = inst.Flags(); + const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info); + const auto swizzled = ApplySwizzle(ir, texel, buffer.DstSelect()); + inst.ReplaceUsesWith(swizzled); + } +} + +void PatchImageInterpretation(IR::Block& block, IR::Inst& inst, Info& info) { + const auto binding = inst.Arg(0).U32(); + const auto image_res = info.images[binding & 0xFFFF]; + const auto image = image_res.GetSharp(info); + if (!image.Valid() || !image_res.IsStorage(image)) { + // Don't need to swizzle invalid or non-storage image. + return; + } + + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + if (inst.GetOpcode() == IR::Opcode::ImageWrite) { + inst.SetArg(4, ApplySwizzle(ir, inst.Arg(4), image.DstSelect())); + } else if (inst.GetOpcode() == IR::Opcode::ImageRead) { + const auto inst_info = inst.Flags(); + const auto lod = inst.Arg(2); + const auto ms = inst.Arg(3); + const auto texel = + ir.ImageRead(inst.Arg(0), inst.Arg(1), lod.IsEmpty() ? IR::U32{} : IR::U32{lod}, + ms.IsEmpty() ? IR::U32{} : IR::U32{ms}, inst_info); + const auto swizzled = ApplySwizzle(ir, texel, image.DstSelect()); + inst.ReplaceUsesWith(swizzled); + } +} + void PatchDataRingInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { // Insert gds binding in the shader if it doesn't exist already. @@ -852,6 +855,19 @@ void ResourceTrackingPass(IR::Program& program) { } } } + // Second pass to reinterpret format read/write where needed, since we now know + // the bindings and their properties. + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (IsTextureBufferInstruction(inst)) { + PatchTextureBufferInterpretation(*block, inst, info); + continue; + } + if (IsImageInstruction(inst)) { + PatchImageInterpretation(*block, inst, info); + } + } + } } } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/reinterpret.h b/src/shader_recompiler/ir/reinterpret.h new file mode 100644 index 000000000..73d587a56 --- /dev/null +++ b/src/shader_recompiler/ir/reinterpret.h @@ -0,0 +1,24 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "shader_recompiler/ir/ir_emitter.h" +#include "video_core/amdgpu/resource.h" + +namespace Shader::IR { + +/// Applies a component swizzle to a vec4. +inline Value ApplySwizzle(IREmitter& ir, const Value& vector, const AmdGpu::CompMapping& swizzle) { + // Constants are indexed as 0 and 1, and components are 4-7. Thus we can apply a swizzle + // using two vectors and a shuffle, using one vector of constants and one of the components. + const auto zero = ir.Imm32(0.f); + const auto one = ir.Imm32(1.f); + const auto constants_vec = ir.CompositeConstruct(zero, one, zero, zero); + const auto swizzled = + ir.CompositeShuffle(constants_vec, vector, size_t(swizzle.r), size_t(swizzle.g), + size_t(swizzle.b), size_t(swizzle.a)); + return swizzled; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index bbf74f5d3..781a0b14a 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -180,7 +180,7 @@ struct FragmentRuntimeInfo { std::array inputs; struct PsColorBuffer { AmdGpu::NumberFormat num_format; - MrtSwizzle mrt_swizzle; + AmdGpu::CompMapping swizzle; auto operator<=>(const PsColorBuffer&) const noexcept = default; }; diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index 5bf97ee51..f8a86c63b 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -31,7 +31,7 @@ struct BufferSpecialization { struct TextureBufferSpecialization { bool is_integer = false; - u32 dst_select = 0; + AmdGpu::CompMapping dst_select{}; auto operator<=>(const TextureBufferSpecialization&) const = default; }; @@ -40,13 +40,9 @@ struct ImageSpecialization { AmdGpu::ImageType type = AmdGpu::ImageType::Color2D; bool is_integer = false; bool is_storage = false; - u32 dst_select = 0; + AmdGpu::CompMapping dst_select{}; - bool operator==(const ImageSpecialization& other) const { - return type == other.type && is_integer == other.is_integer && - is_storage == other.is_storage && - (dst_select != 0 ? dst_select == other.dst_select : true); - } + auto operator<=>(const ImageSpecialization&) const = default; }; struct FMaskSpecialization { diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 83271a82d..f1607f03e 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -889,10 +889,54 @@ struct Liverpool { return !info.linear_general; } - NumberFormat NumFormat() const { + [[nodiscard]] DataFormat DataFormat() const { + return RemapDataFormat(info.format); + } + + [[nodiscard]] NumberFormat NumFormat() const { // There is a small difference between T# and CB number types, account for it. - return info.number_type == AmdGpu::NumberFormat::SnormNz ? AmdGpu::NumberFormat::Srgb - : info.number_type.Value(); + return RemapNumberFormat(info.number_type == NumberFormat::SnormNz + ? NumberFormat::Srgb + : info.number_type.Value()); + } + + [[nodiscard]] CompMapping Swizzle() const { + // clang-format off + static constexpr std::array, 4> mrt_swizzles{{ + // Standard + std::array{{ + {.r = CompSwizzle::Red, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Blue, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Blue, .a = CompSwizzle::Alpha}, + }}, + // Alternate + std::array{{ + {.r = CompSwizzle::Green, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Red, .g = CompSwizzle::Alpha, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Alpha, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Blue, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Alpha}, + }}, + // StandardReverse + std::array{{ + {.r = CompSwizzle::Blue, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Green, .g = CompSwizzle::Red, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Blue, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Alpha, .g = CompSwizzle::Blue, .b = CompSwizzle::Green, .a = CompSwizzle::Red}, + }}, + // AlternateReverse + std::array{{ + {.r = CompSwizzle::Alpha, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Alpha, .g = CompSwizzle::Red, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Alpha, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Alpha, .g = CompSwizzle::Red, .b = CompSwizzle::Green, .a = CompSwizzle::Blue}, + }}, + }}; + // clang-format on + const auto swap_idx = static_cast(info.comp_swap.Value()); + const auto components_idx = NumComponents(info.format) - 1; + const auto mrt_swizzle = mrt_swizzles[swap_idx][components_idx]; + return RemapComponents(info.format, mrt_swizzle); } }; diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index 6bbe1fb7e..4de25adbf 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -20,6 +20,85 @@ enum class CompSwizzle : u32 { Alpha = 7, }; +struct CompMapping { + CompSwizzle r : 3; + CompSwizzle g : 3; + CompSwizzle b : 3; + CompSwizzle a : 3; + + auto operator<=>(const CompMapping& other) const = default; + + template + [[nodiscard]] std::array Apply(const std::array& data) const { + return { + ApplySingle(data, r), + ApplySingle(data, g), + ApplySingle(data, b), + ApplySingle(data, a), + }; + } + +private: + template + T ApplySingle(const std::array& data, const CompSwizzle swizzle) const { + switch (swizzle) { + case CompSwizzle::Zero: + return T(0); + case CompSwizzle::One: + return T(1); + case CompSwizzle::Red: + return data[0]; + case CompSwizzle::Green: + return data[1]; + case CompSwizzle::Blue: + return data[2]; + case CompSwizzle::Alpha: + return data[3]; + default: + UNREACHABLE(); + } + } +}; + +inline DataFormat RemapDataFormat(const DataFormat format) { + switch (format) { + case DataFormat::Format11_11_10: + return DataFormat::Format10_11_11; + case DataFormat::Format10_10_10_2: + return DataFormat::Format2_10_10_10; + case DataFormat::Format5_5_5_1: + return DataFormat::Format1_5_5_5; + default: + return format; + } +} + +inline NumberFormat RemapNumberFormat(const NumberFormat format) { + return format; +} + +inline CompMapping RemapComponents(const DataFormat format, const CompMapping components) { + switch (format) { + case DataFormat::Format11_11_10: + return { + .r = components.b, + .g = components.g, + .b = components.r, + .a = components.a, + }; + case DataFormat::Format10_10_10_2: + case DataFormat::Format5_5_5_1: + return { + .r = components.a, + .g = components.b, + .b = components.g, + .a = components.r, + }; + default: + return components; + } +} + // Table 8.5 Buffer Resource Descriptor [Sea Islands Series Instruction Set Architecture] struct Buffer { u64 base_address : 44; @@ -52,21 +131,22 @@ struct Buffer { return std::memcmp(this, &other, sizeof(Buffer)) == 0; } - u32 DstSelect() const { - return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9); - } - - CompSwizzle GetSwizzle(u32 comp) const noexcept { - const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w}; - return static_cast(select[comp]); + CompMapping DstSelect() const { + const CompMapping dst_sel{ + .r = CompSwizzle(dst_sel_x), + .g = CompSwizzle(dst_sel_y), + .b = CompSwizzle(dst_sel_z), + .a = CompSwizzle(dst_sel_w), + }; + return RemapComponents(DataFormat(data_format), dst_sel); } NumberFormat GetNumberFmt() const noexcept { - return static_cast(num_format); + return RemapNumberFormat(NumberFormat(num_format)); } DataFormat GetDataFmt() const noexcept { - return static_cast(data_format); + return RemapDataFormat(DataFormat(data_format)); } u32 GetStride() const noexcept { @@ -186,10 +266,11 @@ struct Image { static constexpr Image Null() { Image image{}; image.data_format = u64(DataFormat::Format8_8_8_8); - image.dst_sel_x = 4; - image.dst_sel_y = 5; - image.dst_sel_z = 6; - image.dst_sel_w = 7; + image.num_format = u64(NumberFormat::Unorm); + image.dst_sel_x = u64(CompSwizzle::Red); + image.dst_sel_y = u64(CompSwizzle::Green); + image.dst_sel_z = u64(CompSwizzle::Blue); + image.dst_sel_w = u64(CompSwizzle::Alpha); image.tiling_index = u64(TilingMode::Texture_MicroTiled); image.type = u64(ImageType::Color2D); return image; @@ -207,43 +288,14 @@ struct Image { return base_address != 0; } - u32 DstSelect() const { - return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9); - } - - CompSwizzle GetSwizzle(u32 comp) const noexcept { - const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w}; - return static_cast(select[comp]); - } - - static char SelectComp(u32 sel) { - switch (sel) { - case 0: - return '0'; - case 1: - return '1'; - case 4: - return 'R'; - case 5: - return 'G'; - case 6: - return 'B'; - case 7: - return 'A'; - default: - UNREACHABLE(); - } - } - - std::string DstSelectName() const { - std::string result = "["; - u32 dst_sel = DstSelect(); - for (u32 i = 0; i < 4; i++) { - result += SelectComp(dst_sel & 7); - dst_sel >>= 3; - } - result += ']'; - return result; + CompMapping DstSelect() const { + const CompMapping dst_sel{ + .r = CompSwizzle(dst_sel_x), + .g = CompSwizzle(dst_sel_y), + .b = CompSwizzle(dst_sel_z), + .a = CompSwizzle(dst_sel_w), + }; + return RemapComponents(DataFormat(data_format), dst_sel); } u32 Pitch() const { @@ -285,11 +337,11 @@ struct Image { } DataFormat GetDataFmt() const noexcept { - return static_cast(data_format); + return RemapDataFormat(DataFormat(data_format)); } NumberFormat GetNumberFmt() const noexcept { - return static_cast(num_format); + return RemapNumberFormat(NumberFormat(num_format)); } TilingMode GetTilingMode() const { diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 6bd50ab06..c41b760ba 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -324,6 +324,34 @@ vk::BorderColor BorderColor(AmdGpu::BorderColor color) { } } +vk::ComponentSwizzle ComponentSwizzle(AmdGpu::CompSwizzle comp_swizzle) { + switch (comp_swizzle) { + case AmdGpu::CompSwizzle::Zero: + return vk::ComponentSwizzle::eZero; + case AmdGpu::CompSwizzle::One: + return vk::ComponentSwizzle::eOne; + case AmdGpu::CompSwizzle::Red: + return vk::ComponentSwizzle::eR; + case AmdGpu::CompSwizzle::Green: + return vk::ComponentSwizzle::eG; + case AmdGpu::CompSwizzle::Blue: + return vk::ComponentSwizzle::eB; + case AmdGpu::CompSwizzle::Alpha: + return vk::ComponentSwizzle::eA; + default: + UNREACHABLE(); + } +} + +vk::ComponentMapping ComponentMapping(AmdGpu::CompMapping comp_mapping) { + return vk::ComponentMapping{ + .r = ComponentSwizzle(comp_mapping.r), + .g = ComponentSwizzle(comp_mapping.g), + .b = ComponentSwizzle(comp_mapping.b), + .a = ComponentSwizzle(comp_mapping.a), + }; +} + static constexpr vk::FormatFeatureFlags2 BufferRead = vk::FormatFeatureFlagBits2::eUniformTexelBuffer | vk::FormatFeatureFlagBits2::eVertexBuffer; static constexpr vk::FormatFeatureFlags2 BufferWrite = @@ -538,10 +566,8 @@ std::span SurfaceFormats() { // 10_11_11 CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format10_11_11, AmdGpu::NumberFormat::Float, vk::Format::eB10G11R11UfloatPack32), - // 11_11_10 - CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format11_11_10, AmdGpu::NumberFormat::Float, - vk::Format::eB10G11R11UfloatPack32), - // 10_10_10_2 + // 11_11_10 - Remapped to 10_11_11. + // 10_10_10_2 - Remapped to 2_10_10_10. // 2_10_10_10 CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Unorm, vk::Format::eA2B10G10R10UnormPack32), @@ -614,7 +640,7 @@ std::span SurfaceFormats() { // 1_5_5_5 CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format1_5_5_5, AmdGpu::NumberFormat::Unorm, vk::Format::eR5G5B5A1UnormPack16), - // 5_5_5_1 + // 5_5_5_1 - Remapped to 1_5_5_5. // 4_4_4_4 CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format4_4_4_4, AmdGpu::NumberFormat::Unorm, vk::Format::eR4G4B4A4UnormPack16), @@ -677,31 +703,6 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu return format->vk_format; } -vk::Format AdjustColorBufferFormat(vk::Format base_format, - Liverpool::ColorBuffer::SwapMode comp_swap) { - const bool comp_swap_alt = comp_swap == Liverpool::ColorBuffer::SwapMode::Alternate; - const bool comp_swap_reverse = comp_swap == Liverpool::ColorBuffer::SwapMode::StandardReverse; - const bool comp_swap_alt_reverse = - comp_swap == Liverpool::ColorBuffer::SwapMode::AlternateReverse; - if (comp_swap_alt) { - switch (base_format) { - case vk::Format::eR8G8B8A8Unorm: - return vk::Format::eB8G8R8A8Unorm; - case vk::Format::eB8G8R8A8Unorm: - return vk::Format::eR8G8B8A8Unorm; - case vk::Format::eR8G8B8A8Srgb: - return vk::Format::eB8G8R8A8Srgb; - case vk::Format::eB8G8R8A8Srgb: - return vk::Format::eR8G8B8A8Srgb; - case vk::Format::eA2B10G10R10UnormPack32: - return vk::Format::eA2R10G10B10UnormPack32; - default: - break; - } - } - return base_format; -} - static constexpr DepthFormatInfo CreateDepthFormatInfo( const DepthBuffer::ZFormat z_format, const DepthBuffer::StencilFormat stencil_format, const vk::Format vk_format) { @@ -744,21 +745,12 @@ vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat } vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color_buffer) { - const auto comp_swap = color_buffer.info.comp_swap.Value(); - const auto format = color_buffer.info.format.Value(); - const auto number_type = color_buffer.info.number_type.Value(); + const auto comp_swizzle = color_buffer.Swizzle(); + const auto format = color_buffer.DataFormat(); + const auto number_type = color_buffer.NumFormat(); const auto& c0 = color_buffer.clear_word0; const auto& c1 = color_buffer.clear_word1; - const auto num_bits = AmdGpu::NumBits(color_buffer.info.format); - const auto num_components = AmdGpu::NumComponents(format); - - const bool comp_swap_alt = - comp_swap == AmdGpu::Liverpool::ColorBuffer::SwapMode::Alternate || - comp_swap == AmdGpu::Liverpool::ColorBuffer::SwapMode::AlternateReverse; - const bool comp_swap_reverse = - comp_swap == AmdGpu::Liverpool::ColorBuffer::SwapMode::StandardReverse || - comp_swap == AmdGpu::Liverpool::ColorBuffer::SwapMode::AlternateReverse; vk::ClearColorValue color{}; @@ -1079,26 +1071,7 @@ vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color break; } - if (num_components == 1) { - if (comp_swap != Liverpool::ColorBuffer::SwapMode::Standard) { - color.float32[static_cast(comp_swap)] = color.float32[0]; - color.float32[0] = 0.0f; - } - } else { - if (comp_swap_alt && num_components == 4) { - std::swap(color.float32[0], color.float32[2]); - } - - if (comp_swap_reverse) { - std::reverse(std::begin(color.float32), std::begin(color.float32) + num_components); - } - - if (comp_swap_alt && num_components != 4) { - color.float32[3] = color.float32[num_components - 1]; - color.float32[num_components - 1] = 0.0f; - } - } - + color.float32 = comp_swizzle.Apply(color.float32); return {.color = color}; } diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.h b/src/video_core/renderer_vulkan/liverpool_to_vk.h index 25a27e20e..a68280e7d 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.h +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h @@ -42,6 +42,10 @@ vk::SamplerMipmapMode MipFilter(AmdGpu::MipFilter filter); vk::BorderColor BorderColor(AmdGpu::BorderColor color); +vk::ComponentSwizzle ComponentSwizzle(AmdGpu::CompSwizzle comp_swizzle); + +vk::ComponentMapping ComponentMapping(AmdGpu::CompMapping comp_mapping); + struct SurfaceFormatInfo { AmdGpu::DataFormat data_format; AmdGpu::NumberFormat number_format; @@ -52,9 +56,6 @@ std::span SurfaceFormats(); vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format); -vk::Format AdjustColorBufferFormat(vk::Format base_format, - Liverpool::ColorBuffer::SwapMode comp_swap); - struct DepthFormatInfo { Liverpool::DepthBuffer::ZFormat z_format; Liverpool::DepthBuffer::StencilFormat stencil_format; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index ee8afa3e6..c8f4999b1 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -32,7 +32,7 @@ struct GraphicsPipelineKey { u32 num_color_attachments; std::array color_formats; std::array color_num_formats; - std::array mrt_swizzles; + std::array color_swizzles; vk::Format depth_format; vk::Format stencil_format; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index c880cad70..cd1b42b05 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -168,7 +168,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS for (u32 i = 0; i < Shader::MaxColorBuffers; i++) { info.fs_info.color_buffers[i] = { .num_format = graphics_key.color_num_formats[i], - .mrt_swizzle = static_cast(graphics_key.mrt_swizzles[i]), + .swizzle = graphics_key.color_swizzles[i], }; } break; @@ -304,7 +304,7 @@ bool PipelineCache::RefreshGraphicsKey() { key.color_num_formats.fill(AmdGpu::NumberFormat::Unorm); key.blend_controls.fill({}); key.write_masks.fill({}); - key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard); + key.color_swizzles.fill({}); key.vertex_buffer_formats.fill(vk::Format::eUndefined); key.patch_control_points = 0; @@ -327,14 +327,10 @@ bool PipelineCache::RefreshGraphicsKey() { continue; } - const auto base_format = - LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat()); key.color_formats[remapped_cb] = - LiverpoolToVK::AdjustColorBufferFormat(base_format, col_buf.info.comp_swap.Value()); + LiverpoolToVK::SurfaceFormat(col_buf.DataFormat(), col_buf.NumFormat()); key.color_num_formats[remapped_cb] = col_buf.NumFormat(); - if (base_format == key.color_formats[remapped_cb]) { - key.mrt_swizzles[remapped_cb] = col_buf.info.comp_swap.Value(); - } + key.color_swizzles[remapped_cb] = col_buf.Swizzle(); } fetch_shader = std::nullopt; @@ -450,7 +446,7 @@ bool PipelineCache::RefreshGraphicsKey() { // of the latter we need to change format to undefined, and either way we need to // increment the index for the null attachment binding. key.color_formats[remapped_cb] = vk::Format::eUndefined; - key.mrt_swizzles[remapped_cb] = Liverpool::ColorBuffer::SwapMode::Standard; + key.color_swizzles[remapped_cb] = {}; ++remapped_cb; continue; } diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 2cc4aab38..0559f1be3 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -265,9 +265,9 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, const AmdGpu::Liverpool::CbDbExtent& hint /*= {}*/) noexcept { props.is_tiled = buffer.IsTiled(); tiling_mode = buffer.GetTilingMode(); - pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat()); + pixel_format = LiverpoolToVK::SurfaceFormat(buffer.DataFormat(), buffer.NumFormat()); num_samples = buffer.NumSamples(); - num_bits = NumBits(buffer.info.format); + num_bits = NumBits(buffer.DataFormat()); type = vk::ImageType::e2D; size.width = hint.Valid() ? hint.width : buffer.Pitch(); size.height = hint.Valid() ? hint.height : buffer.Height(); diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 9e67b7f73..a9ae41dd1 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -31,25 +31,6 @@ vk::ImageViewType ConvertImageViewType(AmdGpu::ImageType type) { } } -vk::ComponentSwizzle ConvertComponentSwizzle(u32 dst_sel) { - switch (dst_sel) { - case 0: - return vk::ComponentSwizzle::eZero; - case 1: - return vk::ComponentSwizzle::eOne; - case 4: - return vk::ComponentSwizzle::eR; - case 5: - return vk::ComponentSwizzle::eG; - case 6: - return vk::ComponentSwizzle::eB; - case 7: - return vk::ComponentSwizzle::eA; - default: - UNREACHABLE(); - } -} - ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept : is_storage{desc.IsStorage(image)} { const auto dfmt = image.GetDataFmt(); @@ -87,21 +68,15 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageReso } if (!is_storage) { - mapping.r = ConvertComponentSwizzle(image.dst_sel_x); - mapping.g = ConvertComponentSwizzle(image.dst_sel_y); - mapping.b = ConvertComponentSwizzle(image.dst_sel_z); - mapping.a = ConvertComponentSwizzle(image.dst_sel_w); + mapping = Vulkan::LiverpoolToVK::ComponentMapping(image.DstSelect()); } } ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer) noexcept { - const auto base_format = - Vulkan::LiverpoolToVK::SurfaceFormat(col_buffer.info.format, col_buffer.NumFormat()); range.base.layer = col_buffer.view.slice_start; range.extent.layers = col_buffer.NumSlices() - range.base.layer; type = range.extent.layers > 1 ? vk::ImageViewType::e2DArray : vk::ImageViewType::e2D; - format = Vulkan::LiverpoolToVK::AdjustColorBufferFormat(base_format, - col_buffer.info.comp_swap.Value()); + format = Vulkan::LiverpoolToVK::SurfaceFormat(col_buffer.DataFormat(), col_buffer.NumFormat()); } ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer, From e1cf1f500da9237eccd4c4a4352332e7ca8326cf Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Tue, 31 Dec 2024 00:04:26 -0800 Subject: [PATCH 38/45] native_clock: Remove unused process code. (#1989) --- src/common/native_clock.cpp | 11 ----------- src/common/native_clock.h | 1 - 2 files changed, 12 deletions(-) diff --git a/src/common/native_clock.cpp b/src/common/native_clock.cpp index c3fa637aa..0c05dbe84 100644 --- a/src/common/native_clock.cpp +++ b/src/common/native_clock.cpp @@ -4,11 +4,6 @@ #include "common/native_clock.h" #include "common/rdtsc.h" #include "common/uint128.h" -#ifdef _WIN64 -#include -#else -#include -#endif namespace Common { @@ -34,10 +29,4 @@ u64 NativeClock::GetUptime() const { return FencedRDTSC(); } -u64 NativeClock::GetProcessTimeUS() const { - timespec ret; - clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ret); - return ret.tv_nsec / 1000 + ret.tv_sec * 1000000; -} - } // namespace Common diff --git a/src/common/native_clock.h b/src/common/native_clock.h index b5e389452..1542c2f3a 100644 --- a/src/common/native_clock.h +++ b/src/common/native_clock.h @@ -20,7 +20,6 @@ public: u64 GetTimeUS(u64 base_ptc = 0) const; u64 GetTimeMS(u64 base_ptc = 0) const; u64 GetUptime() const; - u64 GetProcessTimeUS() const; private: u64 rdtsc_frequency; From 052473e04882c871b1e9bc70769ef7db6045af7b Mon Sep 17 00:00:00 2001 From: Vladislav Mikhalin Date: Tue, 31 Dec 2024 12:02:33 +0300 Subject: [PATCH 39/45] infra: emphasize the contact information (#1990) --- .github/ISSUE_TEMPLATE/app-bug-report.yaml | 4 ++-- .github/ISSUE_TEMPLATE/game-bug-report.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/app-bug-report.yaml b/.github/ISSUE_TEMPLATE/app-bug-report.yaml index c38bbb814..cd540e06e 100644 --- a/.github/ISSUE_TEMPLATE/app-bug-report.yaml +++ b/.github/ISSUE_TEMPLATE/app-bug-report.yaml @@ -10,8 +10,8 @@ body: value: | ## Important: Read First - Please do not make support requests on GitHub. Our issue tracker is for tracking bugs and feature requests only - If you need help using the emulator or unsure about your issue please contact us on [discord](https://discord.gg/bFJxfftGW6). + **Please do not make support requests on GitHub. Our issue tracker is for tracking bugs and feature requests only. + If you have a support request or are unsure about the nature of your issue please contact us on [discord](https://discord.gg/bFJxfftGW6).** Please make an effort to make sure your issue isn't already reported. diff --git a/.github/ISSUE_TEMPLATE/game-bug-report.yaml b/.github/ISSUE_TEMPLATE/game-bug-report.yaml index 7eb9441d2..407ee2fe3 100644 --- a/.github/ISSUE_TEMPLATE/game-bug-report.yaml +++ b/.github/ISSUE_TEMPLATE/game-bug-report.yaml @@ -10,8 +10,8 @@ body: value: | ## Important: Read First - Please do not make support requests on GitHub. Our issue tracker is for tracking bugs and feature requests only - If you need help using the emulator or unsure about your issue please contact us on [discord](https://discord.gg/bFJxfftGW6). + **Please do not make support requests on GitHub. Our issue tracker is for tracking bugs and feature requests only. + If you have a support request or are unsure about the nature of your issue please contact us on [discord](https://discord.gg/bFJxfftGW6).** You can also check the [Game Compatibility Repository](https://github.com/shadps4-emu/shadps4-game-compatibility) for the information about the status of the game. From f41829707dfea64bf43e9f93d5737ee45fb2036d Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Tue, 31 Dec 2024 02:16:26 -0800 Subject: [PATCH 40/45] equeue: Fix regression from Filter type. (#1992) --- src/core/libraries/kernel/equeue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/libraries/kernel/equeue.h b/src/core/libraries/kernel/equeue.h index 17900238f..f8759137c 100644 --- a/src/core/libraries/kernel/equeue.h +++ b/src/core/libraries/kernel/equeue.h @@ -21,7 +21,7 @@ class EqueueInternal; struct EqueueEvent; struct SceKernelEvent { - enum Filter : int { + enum Filter : s16 { None = 0, Read = -1, Write = -2, From 927dc6d95c0b5b8ba0dc069cf655d8c58c230529 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Tue, 31 Dec 2024 02:38:30 -0800 Subject: [PATCH 41/45] vk_platform: Fix incorrect type for MVK debug flag. (#1993) --- src/video_core/renderer_vulkan/vk_platform.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp index ab61af6a4..7f0bcb5d2 100644 --- a/src/video_core/renderer_vulkan/vk_platform.cpp +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -283,6 +283,9 @@ vk::UniqueInstance CreateInstance(Frontend::WindowSystemType window_type, bool e Common::FS::GetUserPathString(Common::FS::PathType::LogDir); const char* log_path = crash_diagnostic_path.c_str(); vk::Bool32 enable_force_barriers = vk::True; +#ifdef __APPLE__ + const vk::Bool32 mvk_debug_mode = enable_crash_diagnostic ? vk::True : vk::False; +#endif const std::array layer_setings = { vk::LayerSettingEXT{ @@ -356,7 +359,7 @@ vk::UniqueInstance CreateInstance(Frontend::WindowSystemType window_type, bool e .pSettingName = "MVK_CONFIG_DEBUG", .type = vk::LayerSettingTypeEXT::eBool32, .valueCount = 1, - .pValues = &enable_crash_diagnostic, + .pValues = &mvk_debug_mode, } #endif }; From 48c51bd9eff0dc94e84d7b75afb98f8f02a28832 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Tue, 31 Dec 2024 02:38:52 -0800 Subject: [PATCH 42/45] audio: Accurate audio output timing. (#1986) * audio: Accurate audio output timing. * audio: Handle SDL audio queue stalls. * audio: Format info cleanup. --- .gitmodules | 4 - CMakeLists.txt | 5 +- LICENSES/ISC.txt | 7 - externals/CMakeLists.txt | 10 - externals/cubeb | 1 - src/common/config.cpp | 17 - src/common/config.h | 2 - src/common/ringbuffer.h | 374 -------------------- src/core/libraries/audio/audioout.cpp | 240 +++++-------- src/core/libraries/audio/audioout.h | 45 ++- src/core/libraries/audio/audioout_backend.h | 21 +- src/core/libraries/audio/cubeb_audio.cpp | 174 --------- src/core/libraries/audio/sdl_audio.cpp | 62 +++- src/qt_gui/settings_dialog.cpp | 6 - src/qt_gui/settings_dialog.ui | 23 -- 15 files changed, 170 insertions(+), 821 deletions(-) delete mode 100644 LICENSES/ISC.txt delete mode 160000 externals/cubeb delete mode 100644 src/common/ringbuffer.h delete mode 100644 src/core/libraries/audio/cubeb_audio.cpp diff --git a/.gitmodules b/.gitmodules index 1c05ba6f3..3d0d21c5b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -119,7 +119,3 @@ path = externals/MoltenVK/cereal url = https://github.com/USCiLab/cereal shallow = true -[submodule "externals/cubeb"] - path = externals/cubeb - url = https://github.com/mozilla/cubeb - shallow = true diff --git a/CMakeLists.txt b/CMakeLists.txt index 833bbe3ce..c0f675266 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -127,7 +127,6 @@ find_package(xxHash 0.8.2 MODULE) find_package(ZLIB 1.3 MODULE) find_package(Zydis 5.0.0 CONFIG) find_package(pugixml 1.14 CONFIG) -find_package(cubeb CONFIG) if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR NOT MSVC) find_package(cryptopp 8.9.0 MODULE) @@ -203,7 +202,6 @@ set(AUDIO_LIB src/core/libraries/audio/audioin.cpp src/core/libraries/audio/audioout.h src/core/libraries/audio/audioout_backend.h src/core/libraries/audio/audioout_error.h - src/core/libraries/audio/cubeb_audio.cpp src/core/libraries/audio/sdl_audio.cpp src/core/libraries/ngs2/ngs2.cpp src/core/libraries/ngs2/ngs2.h @@ -499,7 +497,6 @@ set(COMMON src/common/logging/backend.cpp src/common/polyfill_thread.h src/common/rdtsc.cpp src/common/rdtsc.h - src/common/ringbuffer.h src/common/signal_context.h src/common/signal_context.cpp src/common/singleton.h @@ -892,7 +889,7 @@ endif() create_target_directory_groups(shadps4) target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API FFmpeg::ffmpeg Dear_ImGui gcn half::half ZLIB::ZLIB PNG::PNG) -target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::glslang SDL3::SDL3 pugixml::pugixml stb::headers cubeb::cubeb) +target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::glslang SDL3::SDL3 pugixml::pugixml stb::headers) target_compile_definitions(shadps4 PRIVATE IMGUI_USER_CONFIG="imgui/imgui_config.h") target_compile_definitions(Dear_ImGui PRIVATE IMGUI_USER_CONFIG="${PROJECT_SOURCE_DIR}/src/imgui/imgui_config.h") diff --git a/LICENSES/ISC.txt b/LICENSES/ISC.txt deleted file mode 100644 index b9bcfa3a4..000000000 --- a/LICENSES/ISC.txt +++ /dev/null @@ -1,7 +0,0 @@ -ISC License - - - -Permission to use, copy, modify, and /or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 8bdf089f8..4350948b7 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -228,16 +228,6 @@ if (NOT TARGET stb::headers) add_library(stb::headers ALIAS stb) endif() -# cubeb -if (NOT TARGET cubeb::cubeb) - option(BUILD_TESTS "" OFF) - option(BUILD_TOOLS "" OFF) - option(BUNDLE_SPEEX "" ON) - option(USE_SANITIZERS "" OFF) - add_subdirectory(cubeb) - add_library(cubeb::cubeb ALIAS cubeb) -endif() - # Apple-only dependencies if (APPLE) # date diff --git a/externals/cubeb b/externals/cubeb deleted file mode 160000 index 9a9d034c5..000000000 --- a/externals/cubeb +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 9a9d034c51859a045a34f201334f612c51e6c19d diff --git a/src/common/config.cpp b/src/common/config.cpp index 93627d8c9..deef0fa88 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -67,7 +67,6 @@ static int cursorHideTimeout = 5; // 5 seconds (default) static bool separateupdatefolder = false; static bool compatibilityData = false; static bool checkCompatibilityOnStartup = false; -static std::string audioBackend = "cubeb"; // Gui std::vector settings_install_dirs = {}; @@ -240,10 +239,6 @@ bool getCheckCompatibilityOnStartup() { return checkCompatibilityOnStartup; } -std::string getAudioBackend() { - return audioBackend; -} - void setGpuId(s32 selectedGpuId) { gpuId = selectedGpuId; } @@ -376,10 +371,6 @@ void setCheckCompatibilityOnStartup(bool use) { checkCompatibilityOnStartup = use; } -void setAudioBackend(std::string backend) { - audioBackend = backend; -} - void setMainWindowGeometry(u32 x, u32 y, u32 w, u32 h) { main_window_geometry_x = x; main_window_geometry_y = y; @@ -620,12 +611,6 @@ void load(const std::filesystem::path& path) { vkCrashDiagnostic = toml::find_or(vk, "crashDiagnostic", false); } - if (data.contains("Audio")) { - const toml::value& audio = data.at("Audio"); - - audioBackend = toml::find_or(audio, "backend", "cubeb"); - } - if (data.contains("Debug")) { const toml::value& debug = data.at("Debug"); @@ -724,7 +709,6 @@ void save(const std::filesystem::path& path) { data["Vulkan"]["rdocEnable"] = rdocEnable; data["Vulkan"]["rdocMarkersEnable"] = vkMarkers; data["Vulkan"]["crashDiagnostic"] = vkCrashDiagnostic; - data["Audio"]["backend"] = audioBackend; data["Debug"]["DebugDump"] = isDebugDump; data["Debug"]["CollectShader"] = isShaderDebug; @@ -828,7 +812,6 @@ void setDefaultValues() { separateupdatefolder = false; compatibilityData = false; checkCompatibilityOnStartup = false; - audioBackend = "cubeb"; } } // namespace Config diff --git a/src/common/config.h b/src/common/config.h index 43ef5024b..701aadb12 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -24,7 +24,6 @@ bool getEnableDiscordRPC(); bool getSeparateUpdateEnabled(); bool getCompatibilityEnabled(); bool getCheckCompatibilityOnStartup(); -std::string getAudioBackend(); std::string getLogFilter(); std::string getLogType(); @@ -76,7 +75,6 @@ void setSeparateUpdateEnabled(bool use); void setGameInstallDirs(const std::vector& settings_install_dirs_config); void setCompatibilityEnabled(bool use); void setCheckCompatibilityOnStartup(bool use); -void setAudioBackend(std::string backend); void setCursorState(s16 cursorState); void setCursorHideTimeout(int newcursorHideTimeout); diff --git a/src/common/ringbuffer.h b/src/common/ringbuffer.h deleted file mode 100644 index 6a71c2888..000000000 --- a/src/common/ringbuffer.h +++ /dev/null @@ -1,374 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2016 Mozilla Foundation -// SPDX-License-Identifier: ISC - -#pragma once - -#include -#include -#include -#include -#include -#include "common/assert.h" - -/** - * Single producer single consumer lock-free and wait-free ring buffer. - * - * This data structure allows producing data from one thread, and consuming it - * on another thread, safely and without explicit synchronization. If used on - * two threads, this data structure uses atomics for thread safety. It is - * possible to disable the use of atomics at compile time and only use this data - * structure on one thread. - * - * The role for the producer and the consumer must be constant, i.e., the - * producer should always be on one thread and the consumer should always be on - * another thread. - * - * Some words about the inner workings of this class: - * - Capacity is fixed. Only one allocation is performed, in the constructor. - * When reading and writing, the return value of the method allows checking if - * the ring buffer is empty or full. - * - We always keep the read index at least one element ahead of the write - * index, so we can distinguish between an empty and a full ring buffer: an - * empty ring buffer is when the write index is at the same position as the - * read index. A full buffer is when the write index is exactly one position - * before the read index. - * - We synchronize updates to the read index after having read the data, and - * the write index after having written the data. This means that the each - * thread can only touch a portion of the buffer that is not touched by the - * other thread. - * - Callers are expected to provide buffers. When writing to the queue, - * elements are copied into the internal storage from the buffer passed in. - * When reading from the queue, the user is expected to provide a buffer. - * Because this is a ring buffer, data might not be contiguous in memory, - * providing an external buffer to copy into is an easy way to have linear - * data for further processing. - */ -template -class RingBuffer { -public: - /** - * Constructor for a ring buffer. - * - * This performs an allocation, but is the only allocation that will happen - * for the life time of a `RingBuffer`. - * - * @param capacity The maximum number of element this ring buffer will hold. - */ - RingBuffer(int capacity) - /* One more element to distinguish from empty and full buffer. */ - : capacity_(capacity + 1) { - ASSERT(storage_capacity() < std::numeric_limits::max() / 2 && - "buffer too large for the type of index used."); - ASSERT(capacity_ > 0); - - data_.reset(new T[storage_capacity()]); - /* If this queue is using atomics, initializing those members as the last - * action in the constructor acts as a full barrier, and allow capacity() to - * be thread-safe. */ - write_index_ = 0; - read_index_ = 0; - } - /** - * Push `count` zero or default constructed elements in the array. - * - * Only safely called on the producer thread. - * - * @param count The number of elements to enqueue. - * @return The number of element enqueued. - */ - int enqueue_default(int count) { - return enqueue(nullptr, count); - } - /** - * @brief Put an element in the queue - * - * Only safely called on the producer thread. - * - * @param element The element to put in the queue. - * - * @return 1 if the element was inserted, 0 otherwise. - */ - int enqueue(T& element) { - return enqueue(&element, 1); - } - /** - * Push `count` elements in the ring buffer. - * - * Only safely called on the producer thread. - * - * @param elements a pointer to a buffer containing at least `count` elements. - * If `elements` is nullptr, zero or default constructed elements are - * enqueued. - * @param count The number of elements to read from `elements` - * @return The number of elements successfully coped from `elements` and - * inserted into the ring buffer. - */ - int enqueue(T* elements, int count) { -#ifndef NDEBUG - assert_correct_thread(producer_id); -#endif - - int wr_idx = write_index_.load(std::memory_order_relaxed); - int rd_idx = read_index_.load(std::memory_order_acquire); - - if (full_internal(rd_idx, wr_idx)) { - return 0; - } - - int to_write = std::min(available_write_internal(rd_idx, wr_idx), count); - - /* First part, from the write index to the end of the array. */ - int first_part = std::min(storage_capacity() - wr_idx, to_write); - /* Second part, from the beginning of the array */ - int second_part = to_write - first_part; - - if (elements) { - Copy(data_.get() + wr_idx, elements, first_part); - Copy(data_.get(), elements + first_part, second_part); - } else { - ConstructDefault(data_.get() + wr_idx, first_part); - ConstructDefault(data_.get(), second_part); - } - - write_index_.store(increment_index(wr_idx, to_write), std::memory_order_release); - - return to_write; - } - /** - * Retrieve at most `count` elements from the ring buffer, and copy them to - * `elements`, if non-null. - * - * Only safely called on the consumer side. - * - * @param elements A pointer to a buffer with space for at least `count` - * elements. If `elements` is `nullptr`, `count` element will be discarded. - * @param count The maximum number of elements to dequeue. - * @return The number of elements written to `elements`. - */ - int dequeue(T* elements, int count) { -#ifndef NDEBUG - assert_correct_thread(consumer_id); -#endif - - int rd_idx = read_index_.load(std::memory_order_relaxed); - int wr_idx = write_index_.load(std::memory_order_acquire); - - if (empty_internal(rd_idx, wr_idx)) { - return 0; - } - - int to_read = std::min(available_read_internal(rd_idx, wr_idx), count); - - int first_part = std::min(storage_capacity() - rd_idx, to_read); - int second_part = to_read - first_part; - - if (elements) { - Copy(elements, data_.get() + rd_idx, first_part); - Copy(elements + first_part, data_.get(), second_part); - } - - read_index_.store(increment_index(rd_idx, to_read), std::memory_order_release); - - return to_read; - } - /** - * Get the number of available element for consuming. - * - * Only safely called on the consumer thread. - * - * @return The number of available elements for reading. - */ - int available_read() const { -#ifndef NDEBUG - assert_correct_thread(consumer_id); -#endif - return available_read_internal(read_index_.load(std::memory_order_relaxed), - write_index_.load(std::memory_order_acquire)); - } - /** - * Get the number of available elements for consuming. - * - * Only safely called on the producer thread. - * - * @return The number of empty slots in the buffer, available for writing. - */ - int available_write() const { -#ifndef NDEBUG - assert_correct_thread(producer_id); -#endif - return available_write_internal(read_index_.load(std::memory_order_acquire), - write_index_.load(std::memory_order_relaxed)); - } - /** - * Get the total capacity, for this ring buffer. - * - * Can be called safely on any thread. - * - * @return The maximum capacity of this ring buffer. - */ - int capacity() const { - return storage_capacity() - 1; - } - /** - * Reset the consumer and producer thread identifier, in case the thread are - * being changed. This has to be externally synchronized. This is no-op when - * asserts are disabled. - */ - void reset_thread_ids() { -#ifndef NDEBUG - consumer_id = producer_id = std::thread::id(); -#endif - } - -private: - /** Return true if the ring buffer is empty. - * - * @param read_index the read index to consider - * @param write_index the write index to consider - * @return true if the ring buffer is empty, false otherwise. - **/ - bool empty_internal(int read_index, int write_index) const { - return write_index == read_index; - } - /** Return true if the ring buffer is full. - * - * This happens if the write index is exactly one element behind the read - * index. - * - * @param read_index the read index to consider - * @param write_index the write index to consider - * @return true if the ring buffer is full, false otherwise. - **/ - bool full_internal(int read_index, int write_index) const { - return (write_index + 1) % storage_capacity() == read_index; - } - /** - * Return the size of the storage. It is one more than the number of elements - * that can be stored in the buffer. - * - * @return the number of elements that can be stored in the buffer. - */ - int storage_capacity() const { - return capacity_; - } - /** - * Returns the number of elements available for reading. - * - * @return the number of available elements for reading. - */ - int available_read_internal(int read_index, int write_index) const { - if (write_index >= read_index) { - return write_index - read_index; - } else { - return write_index + storage_capacity() - read_index; - } - } - /** - * Returns the number of empty elements, available for writing. - * - * @return the number of elements that can be written into the array. - */ - int available_write_internal(int read_index, int write_index) const { - /* We substract one element here to always keep at least one sample - * free in the buffer, to distinguish between full and empty array. */ - int rv = read_index - write_index - 1; - if (write_index >= read_index) { - rv += storage_capacity(); - } - return rv; - } - /** - * Increments an index, wrapping it around the storage. - * - * @param index a reference to the index to increment. - * @param increment the number by which `index` is incremented. - * @return the new index. - */ - int increment_index(int index, int increment) const { - ASSERT(increment >= 0); - return (index + increment) % storage_capacity(); - } - /** - * @brief This allows checking that enqueue (resp. dequeue) are always called - * by the right thread. - * - * @param id the id of the thread that has called the calling method first. - */ -#ifndef NDEBUG - static void assert_correct_thread(std::thread::id& id) { - if (id == std::thread::id()) { - id = std::this_thread::get_id(); - return; - } - ASSERT(id == std::this_thread::get_id()); - } -#endif - /** Similar to memcpy, but accounts for the size of an element. */ - template - void PodCopy(CopyT* destination, const CopyT* source, size_t count) { - static_assert(std::is_trivial::value, "Requires trivial type"); - ASSERT(destination && source); - memcpy(destination, source, count * sizeof(CopyT)); - } - /** Similar to a memset to zero, but accounts for the size of an element. */ - template - void PodZero(ZeroT* destination, size_t count) { - static_assert(std::is_trivial::value, "Requires trivial type"); - ASSERT(destination); - memset(destination, 0, count * sizeof(ZeroT)); - } - template - void Copy(CopyT* destination, const CopyT* source, size_t count, Trait) { - for (size_t i = 0; i < count; i++) { - destination[i] = source[i]; - } - } - template - void Copy(CopyT* destination, const CopyT* source, size_t count, std::true_type) { - PodCopy(destination, source, count); - } - /** - * This allows copying a number of elements from a `source` pointer to a - * `destination` pointer, using `memcpy` if it is safe to do so, or a loop that - * calls the constructors and destructors otherwise. - */ - template - void Copy(CopyT* destination, const T* source, size_t count) { - ASSERT(destination && source); - Copy(destination, source, count, typename std::is_trivial::type()); - } - template - void ConstructDefault(ConstructT* destination, size_t count, Trait) { - for (size_t i = 0; i < count; i++) { - destination[i] = ConstructT(); - } - } - template - void ConstructDefault(ConstructT* destination, size_t count, std::true_type) { - PodZero(destination, count); - } - /** - * This allows zeroing (using memset) or default-constructing a number of - * elements calling the constructors and destructors if necessary. - */ - template - void ConstructDefault(ConstructT* destination, size_t count) { - ASSERT(destination); - ConstructDefault(destination, count, typename std::is_arithmetic::type()); - } - /** Index at which the oldest element is at, in samples. */ - std::atomic read_index_; - /** Index at which to write new elements. `write_index` is always at - * least one element ahead of `read_index_`. */ - std::atomic write_index_; - /** Maximum number of elements that can be stored in the ring buffer. */ - const int capacity_; - /** Data storage */ - std::unique_ptr data_; -#ifndef NDEBUG - /** The id of the only thread that is allowed to read from the queue. */ - mutable std::thread::id consumer_id; - /** The id of the only thread that is allowed to write from the queue. */ - mutable std::thread::id producer_id; -#endif -}; diff --git a/src/core/libraries/audio/audioout.cpp b/src/core/libraries/audio/audioout.cpp index 89ea1d3f5..d69454c39 100644 --- a/src/core/libraries/audio/audioout.cpp +++ b/src/core/libraries/audio/audioout.cpp @@ -9,6 +9,8 @@ #include "common/assert.h" #include "common/config.h" #include "common/logging/log.h" +#include "common/polyfill_thread.h" +#include "common/thread.h" #include "core/libraries/audio/audioout.h" #include "core/libraries/audio/audioout_backend.h" #include "core/libraries/audio/audioout_error.h" @@ -21,111 +23,28 @@ std::array ports_out{}; static std::unique_ptr audio; -static std::string_view GetAudioOutPort(OrbisAudioOutPort port) { - switch (port) { - case OrbisAudioOutPort::Main: - return "MAIN"; - case OrbisAudioOutPort::Bgm: - return "BGM"; - case OrbisAudioOutPort::Voice: - return "VOICE"; - case OrbisAudioOutPort::Personal: - return "PERSONAL"; - case OrbisAudioOutPort::Padspk: - return "PADSPK"; - case OrbisAudioOutPort::Aux: - return "AUX"; - default: - return "INVALID"; - } -} - -static std::string_view GetAudioOutParamFormat(OrbisAudioOutParamFormat param) { - switch (param) { - case OrbisAudioOutParamFormat::S16Mono: - return "S16_MONO"; - case OrbisAudioOutParamFormat::S16Stereo: - return "S16_STEREO"; - case OrbisAudioOutParamFormat::S16_8CH: - return "S16_8CH"; - case OrbisAudioOutParamFormat::FloatMono: - return "FLOAT_MONO"; - case OrbisAudioOutParamFormat::FloatStereo: - return "FLOAT_STEREO"; - case OrbisAudioOutParamFormat::Float_8CH: - return "FLOAT_8CH"; - case OrbisAudioOutParamFormat::S16_8CH_Std: - return "S16_8CH_STD"; - case OrbisAudioOutParamFormat::Float_8CH_Std: - return "FLOAT_8CH_STD"; - default: - return "INVALID"; - } -} - -static std::string_view GetAudioOutParamAttr(OrbisAudioOutParamAttr attr) { - switch (attr) { - case OrbisAudioOutParamAttr::None: - return "NONE"; - case OrbisAudioOutParamAttr::Restricted: - return "RESTRICTED"; - case OrbisAudioOutParamAttr::MixToMain: - return "MIX_TO_MAIN"; - default: - return "INVALID"; - } -} - -static bool IsFormatFloat(const OrbisAudioOutParamFormat format) { - switch (format) { - case OrbisAudioOutParamFormat::S16Mono: - case OrbisAudioOutParamFormat::S16Stereo: - case OrbisAudioOutParamFormat::S16_8CH: - case OrbisAudioOutParamFormat::S16_8CH_Std: - return false; - case OrbisAudioOutParamFormat::FloatMono: - case OrbisAudioOutParamFormat::FloatStereo: - case OrbisAudioOutParamFormat::Float_8CH: - case OrbisAudioOutParamFormat::Float_8CH_Std: - return true; - default: - UNREACHABLE_MSG("Unknown format"); - } -} - -static u8 GetFormatNumChannels(const OrbisAudioOutParamFormat format) { - switch (format) { - case OrbisAudioOutParamFormat::S16Mono: - case OrbisAudioOutParamFormat::FloatMono: - return 1; - case OrbisAudioOutParamFormat::S16Stereo: - case OrbisAudioOutParamFormat::FloatStereo: - return 2; - case OrbisAudioOutParamFormat::S16_8CH: - case OrbisAudioOutParamFormat::Float_8CH: - case OrbisAudioOutParamFormat::S16_8CH_Std: - case OrbisAudioOutParamFormat::Float_8CH_Std: - return 8; - default: - UNREACHABLE_MSG("Unknown format"); - } -} - -static u8 GetFormatSampleSize(const OrbisAudioOutParamFormat format) { - switch (format) { - case OrbisAudioOutParamFormat::S16Mono: - case OrbisAudioOutParamFormat::S16Stereo: - case OrbisAudioOutParamFormat::S16_8CH: - case OrbisAudioOutParamFormat::S16_8CH_Std: - return 2; - case OrbisAudioOutParamFormat::FloatMono: - case OrbisAudioOutParamFormat::FloatStereo: - case OrbisAudioOutParamFormat::Float_8CH: - case OrbisAudioOutParamFormat::Float_8CH_Std: - return 4; - default: - UNREACHABLE_MSG("Unknown format"); - } +static AudioFormatInfo GetFormatInfo(const OrbisAudioOutParamFormat format) { + static constexpr std::array format_infos = {{ + // S16Mono + {false, 2, 1, {0}}, + // S16Stereo + {false, 2, 2, {0, 1}}, + // S16_8CH + {false, 2, 8, {0, 1, 2, 3, 4, 5, 6, 7}}, + // FloatMono + {true, 4, 1, {0}}, + // FloatStereo + {true, 4, 2, {0, 1}}, + // Float_8CH + {true, 4, 8, {0, 1, 2, 3, 4, 5, 6, 7}}, + // S16_8CH_Std + {false, 2, 8, {0, 1, 2, 3, 6, 7, 4, 5}}, + // Float_8CH_Std + {true, 4, 8, {0, 1, 2, 3, 6, 7, 4, 5}}, + }}; + const auto index = static_cast(format); + ASSERT_MSG(index < format_infos.size(), "Unknown audio format {}", index); + return format_infos[index]; } int PS4_SYSV_ABI sceAudioOutDeviceIdOpen() { @@ -180,6 +99,10 @@ int PS4_SYSV_ABI sceAudioOutClose(s32 handle) { return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; } + port.output_thread.Stop(); + std::free(port.output_buffer); + port.output_buffer = nullptr; + port.output_ready = false; port.impl = nullptr; return ORBIS_OK; } @@ -263,7 +186,7 @@ int PS4_SYSV_ABI sceAudioOutGetPortState(s32 handle, OrbisAudioOutPortState* sta case OrbisAudioOutPort::Bgm: case OrbisAudioOutPort::Voice: state->output = 1; - state->channel = port.channels_num > 2 ? 2 : port.channels_num; + state->channel = port.format_info.num_channels > 2 ? 2 : port.format_info.num_channels; break; case OrbisAudioOutPort::Personal: case OrbisAudioOutPort::Padspk: @@ -311,16 +234,7 @@ int PS4_SYSV_ABI sceAudioOutInit() { if (audio != nullptr) { return ORBIS_AUDIO_OUT_ERROR_ALREADY_INIT; } - const auto backend = Config::getAudioBackend(); - if (backend == "cubeb") { - audio = std::make_unique(); - } else if (backend == "sdl") { - audio = std::make_unique(); - } else { - // Cubeb as a default fallback. - LOG_ERROR(Lib_AudioOut, "Invalid audio backend '{}', defaulting to cubeb.", backend); - audio = std::make_unique(); - } + audio = std::make_unique(); return ORBIS_OK; } @@ -354,6 +268,30 @@ int PS4_SYSV_ABI sceAudioOutMbusInit() { return ORBIS_OK; } +static void AudioOutputThread(PortOut* port, const std::stop_token& stop) { + { + const auto thread_name = fmt::format("shadPS4:AudioOutputThread:{}", fmt::ptr(port)); + Common::SetCurrentThreadName(thread_name.c_str()); + } + + Common::AccurateTimer timer( + std::chrono::nanoseconds(1000000000ULL * port->buffer_frames / port->sample_rate)); + while (true) { + timer.Start(); + { + std::unique_lock lock{port->output_mutex}; + Common::CondvarWait(port->output_cv, lock, stop, [&] { return port->output_ready; }); + if (stop.stop_requested()) { + break; + } + port->impl->Output(port->output_buffer); + port->output_ready = false; + } + port->output_cv.notify_one(); + timer.End(); + } +} + s32 PS4_SYSV_ABI sceAudioOutOpen(UserService::OrbisUserServiceUserId user_id, OrbisAudioOutPort port_type, s32 index, u32 length, u32 sample_rate, @@ -361,9 +299,9 @@ s32 PS4_SYSV_ABI sceAudioOutOpen(UserService::OrbisUserServiceUserId user_id, LOG_INFO(Lib_AudioOut, "id = {} port_type = {} index = {} length = {} sample_rate = {} " "param_type = {} attr = {}", - user_id, GetAudioOutPort(port_type), index, length, sample_rate, - GetAudioOutParamFormat(param_type.data_format), - GetAudioOutParamAttr(param_type.attributes)); + user_id, magic_enum::enum_name(port_type), index, length, sample_rate, + magic_enum::enum_name(param_type.data_format.Value()), + magic_enum::enum_name(param_type.attributes.Value())); if ((port_type < OrbisAudioOutPort::Main || port_type > OrbisAudioOutPort::Padspk) && (port_type != OrbisAudioOutPort::Aux)) { LOG_ERROR(Lib_AudioOut, "Invalid port type"); @@ -403,17 +341,18 @@ s32 PS4_SYSV_ABI sceAudioOutOpen(UserService::OrbisUserServiceUserId user_id, } port->type = port_type; - port->format = format; - port->is_float = IsFormatFloat(format); - port->sample_size = GetFormatSampleSize(format); - port->channels_num = GetFormatNumChannels(format); - port->samples_num = length; - port->frame_size = port->sample_size * port->channels_num; - port->buffer_size = port->frame_size * port->samples_num; - port->freq = sample_rate; + port->format_info = GetFormatInfo(format); + port->sample_rate = sample_rate; + port->buffer_frames = length; port->volume.fill(SCE_AUDIO_OUT_VOLUME_0DB); + port->impl = audio->Open(*port); + port->output_buffer = std::malloc(port->BufferSize()); + port->output_ready = false; + port->output_thread.Run( + [port](const std::stop_token& stop) { AudioOutputThread(&*port, stop); }); + return std::distance(ports_out.begin(), port) + 1; } @@ -426,24 +365,30 @@ s32 PS4_SYSV_ABI sceAudioOutOutput(s32 handle, void* ptr) { if (handle < 1 || handle > SCE_AUDIO_OUT_NUM_PORTS) { return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; } - if (ptr == nullptr) { - // Nothing to output - return ORBIS_OK; - } auto& port = ports_out.at(handle - 1); if (!port.impl) { return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; } - port.impl->Output(ptr, port.buffer_size); + { + std::unique_lock lock{port.output_mutex}; + port.output_cv.wait(lock, [&] { return !port.output_ready; }); + if (ptr != nullptr) { + std::memcpy(port.output_buffer, ptr, port.BufferSize()); + port.output_ready = true; + } + } + port.output_cv.notify_one(); return ORBIS_OK; } int PS4_SYSV_ABI sceAudioOutOutputs(OrbisAudioOutOutputParam* param, u32 num) { for (u32 i = 0; i < num; i++) { - if (const auto err = sceAudioOutOutput(param[i].handle, param[i].ptr); err != 0) - return err; + const auto [handle, ptr] = param[i]; + if (const auto ret = sceAudioOutOutput(handle, ptr); ret != ORBIS_OK) { + return ret; + } } return ORBIS_OK; } @@ -549,30 +494,9 @@ s32 PS4_SYSV_ABI sceAudioOutSetVolume(s32 handle, s32 flag, s32* vol) { return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; } - for (int i = 0; i < port.channels_num; i++, flag >>= 1u) { - auto bit = flag & 0x1u; - if (bit == 1) { - int src_index = i; - if (port.format == OrbisAudioOutParamFormat::Float_8CH_Std || - port.format == OrbisAudioOutParamFormat::S16_8CH_Std) { - switch (i) { - case 4: - src_index = 6; - break; - case 5: - src_index = 7; - break; - case 6: - src_index = 4; - break; - case 7: - src_index = 5; - break; - default: - break; - } - } - port.volume[i] = vol[src_index]; + for (int i = 0; i < port.format_info.num_channels; i++, flag >>= 1u) { + if (flag & 0x1u) { + port.volume[i] = vol[i]; } } diff --git a/src/core/libraries/audio/audioout.h b/src/core/libraries/audio/audioout.h index 58c77db99..4f7378dcd 100644 --- a/src/core/libraries/audio/audioout.h +++ b/src/core/libraries/audio/audioout.h @@ -6,6 +6,7 @@ #include #include "common/bit_field.h" +#include "core/libraries/kernel/threads.h" #include "core/libraries/system/userservice.h" namespace Libraries::AudioOut { @@ -14,12 +15,12 @@ class PortBackend; // Main up to 8 ports, BGM 1 port, voice up to 4 ports, // personal up to 4 ports, padspk up to 5 ports, aux 1 port -constexpr int SCE_AUDIO_OUT_NUM_PORTS = 22; -constexpr int SCE_AUDIO_OUT_VOLUME_0DB = 32768; // max volume value +constexpr s32 SCE_AUDIO_OUT_NUM_PORTS = 22; +constexpr s32 SCE_AUDIO_OUT_VOLUME_0DB = 32768; // max volume value enum class OrbisAudioOutPort { Main = 0, Bgm = 1, Voice = 2, Personal = 3, Padspk = 4, Aux = 127 }; -enum class OrbisAudioOutParamFormat { +enum class OrbisAudioOutParamFormat : u32 { S16Mono = 0, S16Stereo = 1, S16_8CH = 2, @@ -30,7 +31,7 @@ enum class OrbisAudioOutParamFormat { Float_8CH_Std = 7 }; -enum class OrbisAudioOutParamAttr { +enum class OrbisAudioOutParamAttr : u32 { None = 0, Restricted = 1, MixToMain = 2, @@ -59,19 +60,37 @@ struct OrbisAudioOutPortState { u64 reserved64[2]; }; +struct AudioFormatInfo { + bool is_float; + u8 sample_size; + u8 num_channels; + /// Layout array remapping channel indices, specified in this order: + /// FL, FR, FC, LFE, BL, BR, SL, SR + std::array channel_layout; + + [[nodiscard]] u16 FrameSize() const { + return sample_size * num_channels; + } +}; + struct PortOut { std::unique_ptr impl{}; + void* output_buffer; + std::mutex output_mutex; + std::condition_variable_any output_cv; + bool output_ready; + Kernel::Thread output_thread{}; + OrbisAudioOutPort type; - OrbisAudioOutParamFormat format; - bool is_float; - u8 sample_size; - u8 channels_num; - u32 samples_num; - u32 frame_size; - u32 buffer_size; - u32 freq; - std::array volume; + AudioFormatInfo format_info; + u32 sample_rate; + u32 buffer_frames; + std::array volume; + + [[nodiscard]] u32 BufferSize() const { + return buffer_frames * format_info.FrameSize(); + } }; int PS4_SYSV_ABI sceAudioOutDeviceIdOpen(); diff --git a/src/core/libraries/audio/audioout_backend.h b/src/core/libraries/audio/audioout_backend.h index f423d4963..0f36f19c8 100644 --- a/src/core/libraries/audio/audioout_backend.h +++ b/src/core/libraries/audio/audioout_backend.h @@ -3,8 +3,6 @@ #pragma once -typedef struct cubeb cubeb; - namespace Libraries::AudioOut { struct PortOut; @@ -13,7 +11,10 @@ class PortBackend { public: virtual ~PortBackend() = default; - virtual void Output(void* ptr, size_t size) = 0; + /// Guaranteed to be called in intervals of at least port buffer time, + /// with size equal to port buffer size. + virtual void Output(void* ptr) = 0; + virtual void SetVolume(const std::array& ch_volumes) = 0; }; @@ -25,20 +26,6 @@ public: virtual std::unique_ptr Open(PortOut& port) = 0; }; -class CubebAudioOut final : public AudioOutBackend { -public: - CubebAudioOut(); - ~CubebAudioOut() override; - - std::unique_ptr Open(PortOut& port) override; - -private: - cubeb* ctx = nullptr; -#ifdef _WIN32 - bool owns_com = false; -#endif -}; - class SDLAudioOut final : public AudioOutBackend { public: std::unique_ptr Open(PortOut& port) override; diff --git a/src/core/libraries/audio/cubeb_audio.cpp b/src/core/libraries/audio/cubeb_audio.cpp deleted file mode 100644 index 4127931b7..000000000 --- a/src/core/libraries/audio/cubeb_audio.cpp +++ /dev/null @@ -1,174 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include -#include -#include - -#include "common/logging/log.h" -#include "common/ringbuffer.h" -#include "core/libraries/audio/audioout.h" -#include "core/libraries/audio/audioout_backend.h" - -#ifdef _WIN32 -#include -#endif - -namespace Libraries::AudioOut { - -class CubebPortBackend : public PortBackend { -public: - CubebPortBackend(cubeb* ctx, const PortOut& port) - : frame_size(port.frame_size), buffer(static_cast(port.buffer_size) * 4) { - if (!ctx) { - return; - } - const auto get_channel_layout = [&port] -> cubeb_channel_layout { - switch (port.channels_num) { - case 1: - return CUBEB_LAYOUT_MONO; - case 2: - return CUBEB_LAYOUT_STEREO; - case 8: - return CUBEB_LAYOUT_3F4_LFE; - default: - UNREACHABLE(); - } - }; - cubeb_stream_params stream_params = { - .format = port.is_float ? CUBEB_SAMPLE_FLOAT32LE : CUBEB_SAMPLE_S16LE, - .rate = port.freq, - .channels = port.channels_num, - .layout = get_channel_layout(), - .prefs = CUBEB_STREAM_PREF_NONE, - }; - u32 latency_frames = 512; - if (const auto ret = cubeb_get_min_latency(ctx, &stream_params, &latency_frames); - ret != CUBEB_OK) { - LOG_WARNING(Lib_AudioOut, - "Could not get minimum cubeb audio latency, falling back to default: {}", - ret); - } - char stream_name[64]; - snprintf(stream_name, sizeof(stream_name), "shadPS4 stream %p", this); - if (const auto ret = cubeb_stream_init(ctx, &stream, stream_name, nullptr, nullptr, nullptr, - &stream_params, latency_frames, &DataCallback, - &StateCallback, this); - ret != CUBEB_OK) { - LOG_ERROR(Lib_AudioOut, "Failed to create cubeb stream: {}", ret); - return; - } - if (const auto ret = cubeb_stream_start(stream); ret != CUBEB_OK) { - LOG_ERROR(Lib_AudioOut, "Failed to start cubeb stream: {}", ret); - cubeb_stream_destroy(stream); - stream = nullptr; - return; - } - } - - ~CubebPortBackend() override { - if (!stream) { - return; - } - if (const auto ret = cubeb_stream_stop(stream); ret != CUBEB_OK) { - LOG_WARNING(Lib_AudioOut, "Failed to stop cubeb stream: {}", ret); - } - cubeb_stream_destroy(stream); - stream = nullptr; - } - - void Output(void* ptr, size_t size) override { - if (!stream) { - return; - } - auto* data = static_cast(ptr); - - std::unique_lock lock{buffer_mutex}; - buffer_cv.wait(lock, [&] { return buffer.available_write() >= size; }); - buffer.enqueue(data, static_cast(size)); - } - - void SetVolume(const std::array& ch_volumes) override { - if (!stream) { - return; - } - // Cubeb does not have per-channel volumes, for now just take the maximum of the channels. - const auto vol = *std::ranges::max_element(ch_volumes); - if (const auto ret = - cubeb_stream_set_volume(stream, static_cast(vol) / SCE_AUDIO_OUT_VOLUME_0DB); - ret != CUBEB_OK) { - LOG_WARNING(Lib_AudioOut, "Failed to change cubeb stream volume: {}", ret); - } - } - -private: - static long DataCallback(cubeb_stream* stream, void* user_data, const void* in, void* out, - long num_frames) { - auto* stream_data = static_cast(user_data); - const auto out_data = static_cast(out); - const auto requested_size = static_cast(num_frames * stream_data->frame_size); - - std::unique_lock lock{stream_data->buffer_mutex}; - const auto dequeued_size = stream_data->buffer.dequeue(out_data, requested_size); - lock.unlock(); - stream_data->buffer_cv.notify_one(); - - if (dequeued_size < requested_size) { - // Need to fill remaining space with silence. - std::memset(out_data + dequeued_size, 0, requested_size - dequeued_size); - } - return num_frames; - } - - static void StateCallback(cubeb_stream* stream, void* user_data, cubeb_state state) { - switch (state) { - case CUBEB_STATE_STARTED: - LOG_INFO(Lib_AudioOut, "Cubeb stream started"); - break; - case CUBEB_STATE_STOPPED: - LOG_INFO(Lib_AudioOut, "Cubeb stream stopped"); - break; - case CUBEB_STATE_DRAINED: - LOG_INFO(Lib_AudioOut, "Cubeb stream drained"); - break; - case CUBEB_STATE_ERROR: - LOG_ERROR(Lib_AudioOut, "Cubeb stream encountered an error"); - break; - } - } - - size_t frame_size; - RingBuffer buffer; - std::mutex buffer_mutex; - std::condition_variable buffer_cv; - cubeb_stream* stream{}; -}; - -CubebAudioOut::CubebAudioOut() { -#ifdef _WIN32 - // Need to initialize COM for this thread on Windows, in case WASAPI backend is used. - owns_com = CoInitializeEx(nullptr, COINIT_MULTITHREADED) == S_OK; -#endif - if (const auto ret = cubeb_init(&ctx, "shadPS4", nullptr); ret != CUBEB_OK) { - LOG_CRITICAL(Lib_AudioOut, "Failed to create cubeb context: {}", ret); - } -} - -CubebAudioOut::~CubebAudioOut() { - if (ctx) { - cubeb_destroy(ctx); - ctx = nullptr; - } -#ifdef _WIN32 - if (owns_com) { - CoUninitialize(); - owns_com = false; - } -#endif -} - -std::unique_ptr CubebAudioOut::Open(PortOut& port) { - return std::make_unique(ctx, port); -} - -} // namespace Libraries::AudioOut diff --git a/src/core/libraries/audio/sdl_audio.cpp b/src/core/libraries/audio/sdl_audio.cpp index 598941ba7..59d2d5cfb 100644 --- a/src/core/libraries/audio/sdl_audio.cpp +++ b/src/core/libraries/audio/sdl_audio.cpp @@ -3,6 +3,7 @@ #include #include +#include #include "common/logging/log.h" #include "core/libraries/audio/audioout.h" @@ -10,15 +11,21 @@ namespace Libraries::AudioOut { -constexpr int AUDIO_STREAM_BUFFER_THRESHOLD = 65536; // Define constant for buffer threshold - class SDLPortBackend : public PortBackend { public: - explicit SDLPortBackend(const PortOut& port) { + explicit SDLPortBackend(const PortOut& port) + : frame_size(port.format_info.FrameSize()), buffer_size(port.BufferSize()) { + // We want the latency for delivering frames out to be as small as possible, + // so set the sample frames hint to the number of frames per buffer. + const auto samples_num_str = std::to_string(port.buffer_frames); + if (!SDL_SetHint(SDL_HINT_AUDIO_DEVICE_SAMPLE_FRAMES, samples_num_str.c_str())) { + LOG_WARNING(Lib_AudioOut, "Failed to set SDL audio sample frames hint to {}: {}", + samples_num_str, SDL_GetError()); + } const SDL_AudioSpec fmt = { - .format = port.is_float ? SDL_AUDIO_F32 : SDL_AUDIO_S16, - .channels = port.channels_num, - .freq = static_cast(port.freq), + .format = port.format_info.is_float ? SDL_AUDIO_F32LE : SDL_AUDIO_S16LE, + .channels = port.format_info.num_channels, + .freq = static_cast(port.sample_rate), }; stream = SDL_OpenAudioDeviceStream(SDL_AUDIO_DEVICE_DEFAULT_PLAYBACK, &fmt, nullptr, nullptr); @@ -26,6 +33,15 @@ public: LOG_ERROR(Lib_AudioOut, "Failed to create SDL audio stream: {}", SDL_GetError()); return; } + queue_threshold = CalculateQueueThreshold(); + if (!SDL_SetAudioStreamInputChannelMap(stream, port.format_info.channel_layout.data(), + port.format_info.num_channels)) { + LOG_ERROR(Lib_AudioOut, "Failed to configure SDL audio stream channel map: {}", + SDL_GetError()); + SDL_DestroyAudioStream(stream); + stream = nullptr; + return; + } if (!SDL_ResumeAudioStreamDevice(stream)) { LOG_ERROR(Lib_AudioOut, "Failed to resume SDL audio stream: {}", SDL_GetError()); SDL_DestroyAudioStream(stream); @@ -42,14 +58,23 @@ public: stream = nullptr; } - void Output(void* ptr, size_t size) override { + void Output(void* ptr) override { if (!stream) { return; } - SDL_PutAudioStreamData(stream, ptr, static_cast(size)); - while (SDL_GetAudioStreamAvailable(stream) > AUDIO_STREAM_BUFFER_THRESHOLD) { - // Yield to allow the stream to drain. - std::this_thread::yield(); + // AudioOut library manages timing, but we still need to guard against the SDL + // audio queue stalling, which may happen during device changes, for example. + // Otherwise, latency may grow over time unbounded. + if (const auto queued = SDL_GetAudioStreamQueued(stream); queued >= queue_threshold) { + LOG_WARNING(Lib_AudioOut, + "SDL audio queue backed up ({} queued, {} threshold), clearing.", queued, + queue_threshold); + SDL_ClearAudioStream(stream); + // Recalculate the threshold in case this happened because of a device change. + queue_threshold = CalculateQueueThreshold(); + } + if (!SDL_PutAudioStreamData(stream, ptr, static_cast(buffer_size))) { + LOG_ERROR(Lib_AudioOut, "Failed to output to SDL audio stream: {}", SDL_GetError()); } } @@ -66,6 +91,21 @@ public: } private: + [[nodiscard]] u32 CalculateQueueThreshold() const { + SDL_AudioSpec discard; + int sdl_buffer_frames; + if (!SDL_GetAudioDeviceFormat(SDL_GetAudioStreamDevice(stream), &discard, + &sdl_buffer_frames)) { + LOG_WARNING(Lib_AudioOut, "Failed to get SDL audio stream buffer size: {}", + SDL_GetError()); + sdl_buffer_frames = 0; + } + return std::max(buffer_size, sdl_buffer_frames * frame_size) * 4; + } + + u32 frame_size; + u32 buffer_size; + u32 queue_threshold; SDL_AudioStream* stream; }; diff --git a/src/qt_gui/settings_dialog.cpp b/src/qt_gui/settings_dialog.cpp index 6d4de6603..0c1375c7f 100644 --- a/src/qt_gui/settings_dialog.cpp +++ b/src/qt_gui/settings_dialog.cpp @@ -211,7 +211,6 @@ SettingsDialog::SettingsDialog(std::span physical_devices, ui->enableCompatibilityCheckBox->installEventFilter(this); ui->checkCompatibilityOnStartupCheckBox->installEventFilter(this); ui->updateCompatibilityButton->installEventFilter(this); - ui->audioBackendComboBox->installEventFilter(this); // Input ui->hideCursorGroupBox->installEventFilter(this); @@ -305,8 +304,6 @@ void SettingsDialog::LoadValuesFromConfig() { toml::find_or(data, "General", "compatibilityEnabled", false)); ui->checkCompatibilityOnStartupCheckBox->setChecked( toml::find_or(data, "General", "checkCompatibilityOnStartup", false)); - ui->audioBackendComboBox->setCurrentText( - QString::fromStdString(toml::find_or(data, "Audio", "backend", "cubeb"))); #ifdef ENABLE_UPDATER ui->updateCheckBox->setChecked(toml::find_or(data, "General", "autoUpdate", false)); @@ -428,8 +425,6 @@ void SettingsDialog::updateNoteTextEdit(const QString& elementName) { text = tr("checkCompatibilityOnStartupCheckBox"); } else if (elementName == "updateCompatibilityButton") { text = tr("updateCompatibilityButton"); - } else if (elementName == "audioBackendGroupBox") { - text = tr("audioBackendGroupBox"); } // Input @@ -543,7 +538,6 @@ void SettingsDialog::UpdateSettings() { Config::setUpdateChannel(ui->updateComboBox->currentText().toStdString()); Config::setCompatibilityEnabled(ui->enableCompatibilityCheckBox->isChecked()); Config::setCheckCompatibilityOnStartup(ui->checkCompatibilityOnStartupCheckBox->isChecked()); - Config::setAudioBackend(ui->audioBackendComboBox->currentText().toStdString()); #ifdef ENABLE_DISCORD_RPC auto* rpc = Common::Singleton::Instance(); diff --git a/src/qt_gui/settings_dialog.ui b/src/qt_gui/settings_dialog.ui index f2d6b77d2..af1edb0dd 100644 --- a/src/qt_gui/settings_dialog.ui +++ b/src/qt_gui/settings_dialog.ui @@ -263,29 +263,6 @@ - - - - Audio Backend - - - - - - - cubeb - - - - - sdl - - - - - - - From 174b5c0f954c1348be29c0b9177a6b89145dd6c9 Mon Sep 17 00:00:00 2001 From: psucien Date: Tue, 31 Dec 2024 17:24:56 +0100 Subject: [PATCH 43/45] kernel: equeue: added missing `sceKernelDeleteHRTimerEvent` --- src/core/libraries/kernel/equeue.cpp | 14 ++++++++++++++ src/core/libraries/kernel/equeue.h | 7 +++++++ 2 files changed, 21 insertions(+) diff --git a/src/core/libraries/kernel/equeue.cpp b/src/core/libraries/kernel/equeue.cpp index 03259cd22..64d4966c0 100644 --- a/src/core/libraries/kernel/equeue.cpp +++ b/src/core/libraries/kernel/equeue.cpp @@ -283,6 +283,19 @@ s32 PS4_SYSV_ABI sceKernelAddHRTimerEvent(SceKernelEqueue eq, int id, timespec* return ORBIS_OK; } +int PS4_SYSV_ABI sceKernelDeleteHRTimerEvent(SceKernelEqueue eq, int id) { + if (eq == nullptr) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + if (eq->HasSmallTimer()) { + return eq->RemoveSmallTimer(id) ? ORBIS_OK : ORBIS_KERNEL_ERROR_ENOENT; + } else { + return eq->RemoveEvent(id, SceKernelEvent::Filter::HrTimer) ? ORBIS_OK + : ORBIS_KERNEL_ERROR_ENOENT; + } +} + int PS4_SYSV_ABI sceKernelAddUserEvent(SceKernelEqueue eq, int id) { if (eq == nullptr) { return ORBIS_KERNEL_ERROR_EBADF; @@ -362,6 +375,7 @@ void RegisterEventQueue(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("4R6-OvI2cEA", "libkernel", 1, "libkernel", 1, 1, sceKernelAddUserEvent); LIB_FUNCTION("WDszmSbWuDk", "libkernel", 1, "libkernel", 1, 1, sceKernelAddUserEventEdge); LIB_FUNCTION("R74tt43xP6k", "libkernel", 1, "libkernel", 1, 1, sceKernelAddHRTimerEvent); + LIB_FUNCTION("J+LF6LwObXU", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteHRTimerEvent); LIB_FUNCTION("F6e0kwo4cnk", "libkernel", 1, "libkernel", 1, 1, sceKernelTriggerUserEvent); LIB_FUNCTION("LJDwdSNTnDg", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteUserEvent); LIB_FUNCTION("mJ7aghmgvfc", "libkernel", 1, "libkernel", 1, 1, sceKernelGetEventId); diff --git a/src/core/libraries/kernel/equeue.h b/src/core/libraries/kernel/equeue.h index f8759137c..2db5e6ca7 100644 --- a/src/core/libraries/kernel/equeue.h +++ b/src/core/libraries/kernel/equeue.h @@ -111,6 +111,13 @@ public: bool HasSmallTimer() const { return small_timer_event.event.data != 0; } + bool RemoveSmallTimer(u64 id) { + if (HasSmallTimer() && small_timer_event.event.ident == id) { + small_timer_event = {}; + return true; + } + return false; + } int WaitForSmallTimer(SceKernelEvent* ev, int num, u32 micros); From 65cd3be4cae48c9479a83092b2d7b4a28ffaf0e4 Mon Sep 17 00:00:00 2001 From: oltolm Date: Tue, 31 Dec 2024 19:08:47 +0100 Subject: [PATCH 44/45] Qt: fix deprecation warnings (#1672) --- src/qt_gui/check_update.cpp | 7 +++++-- src/qt_gui/settings_dialog.cpp | 22 +++++++++++++++++----- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/src/qt_gui/check_update.cpp b/src/qt_gui/check_update.cpp index edd55b804..e3e019144 100644 --- a/src/qt_gui/check_update.cpp +++ b/src/qt_gui/check_update.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -24,11 +25,9 @@ #include #include #include -#include #include "check_update.h" using namespace Common::FS; -namespace fs = std::filesystem; CheckUpdate::CheckUpdate(const bool showMessage, QWidget* parent) : QDialog(parent), networkManager(new QNetworkAccessManager(this)) { @@ -254,7 +253,11 @@ void CheckUpdate::setupUI(const QString& downloadUrl, const QString& latestDate, connect(noButton, &QPushButton::clicked, this, [this]() { close(); }); autoUpdateCheckBox->setChecked(Config::autoUpdate()); +#if (QT_VERSION < QT_VERSION_CHECK(6, 7, 0)) connect(autoUpdateCheckBox, &QCheckBox::stateChanged, this, [](int state) { +#else + connect(autoUpdateCheckBox, &QCheckBox::checkStateChanged, this, [](Qt::CheckState state) { +#endif const auto user_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); Config::setAutoUpdate(state == Qt::Checked); Config::save(user_dir / "config.toml"); diff --git a/src/qt_gui/settings_dialog.cpp b/src/qt_gui/settings_dialog.cpp index 0c1375c7f..5cd0a4d65 100644 --- a/src/qt_gui/settings_dialog.cpp +++ b/src/qt_gui/settings_dialog.cpp @@ -3,22 +3,24 @@ #include #include +#include #include +#include -#include #include "common/config.h" +#include "common/version.h" #include "qt_gui/compatibility_info.h" #ifdef ENABLE_DISCORD_RPC #include "common/discord_rpc_handler.h" +#include "common/singleton.h" #endif #ifdef ENABLE_UPDATER #include "check_update.h" #endif #include +#include "background_music_player.h" #include "common/logging/backend.h" #include "common/logging/filter.h" -#include "common/logging/formatter.h" -#include "main_window.h" #include "settings_dialog.h" #include "ui_settings_dialog.h" QStringList languageNames = {"Arabic", @@ -130,8 +132,13 @@ SettingsDialog::SettingsDialog(std::span physical_devices, // GENERAL TAB { #ifdef ENABLE_UPDATER +#if (QT_VERSION < QT_VERSION_CHECK(6, 7, 0)) connect(ui->updateCheckBox, &QCheckBox::stateChanged, this, [](int state) { Config::setAutoUpdate(state == Qt::Checked); }); +#else + connect(ui->updateCheckBox, &QCheckBox::checkStateChanged, this, + [](Qt::CheckState state) { Config::setAutoUpdate(state == Qt::Checked); }); +#endif connect(ui->updateComboBox, &QComboBox::currentTextChanged, this, [](const QString& channel) { Config::setUpdateChannel(channel.toStdString()); }); @@ -150,7 +157,12 @@ SettingsDialog::SettingsDialog(std::span physical_devices, emit CompatibilityChanged(); }); +#if (QT_VERSION < QT_VERSION_CHECK(6, 7, 0)) connect(ui->enableCompatibilityCheckBox, &QCheckBox::stateChanged, this, [this](int state) { +#else + connect(ui->enableCompatibilityCheckBox, &QCheckBox::checkStateChanged, this, + [this](Qt::CheckState state) { +#endif Config::setCompatibilityEnabled(state); emit CompatibilityChanged(); }); @@ -358,7 +370,7 @@ void SettingsDialog::InitializeEmulatorLanguages() { idx++; } - connect(ui->emulatorLanguageComboBox, qOverload(&QComboBox::currentIndexChanged), this, + connect(ui->emulatorLanguageComboBox, &QComboBox::currentIndexChanged, this, &SettingsDialog::OnLanguageChanged); } @@ -578,4 +590,4 @@ void SettingsDialog::ResetInstallFolders() { } Config::setGameInstallDirs(settings_install_dirs_config); } -} \ No newline at end of file +} From d69341fd31a4209cf2b29b62cd3101301212dac4 Mon Sep 17 00:00:00 2001 From: psucien Date: Wed, 1 Jan 2025 03:40:28 +0100 Subject: [PATCH 45/45] hot-fix: detiler: forgotten lut optimizations --- src/video_core/amdgpu/liverpool.cpp | 2 +- src/video_core/host_shaders/detile_m32x1.comp | 2 +- src/video_core/host_shaders/detile_m32x2.comp | 2 +- src/video_core/host_shaders/detile_m32x4.comp | 2 +- src/video_core/host_shaders/detile_m8x1.comp | 2 +- src/video_core/host_shaders/detile_m8x2.comp | 2 +- .../host_shaders/detile_macro32x1.comp | 70 ++++++++++--------- .../host_shaders/detile_macro32x2.comp | 70 ++++++++++--------- 8 files changed, 78 insertions(+), 74 deletions(-) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 2926bcc69..bdf4cc92a 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -815,7 +815,7 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, u32 vqid) { } if (rasterizer && (cs_program.dispatch_initiator & 1)) { const auto cmd_address = reinterpret_cast(header); - rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:Dispatch", vqid, cmd_address)); + rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:DispatchIndirect", vqid, cmd_address)); rasterizer->DispatchDirect(); rasterizer->ScopeMarkerEnd(); } diff --git a/src/video_core/host_shaders/detile_m32x1.comp b/src/video_core/host_shaders/detile_m32x1.comp index 802f5f531..cdc8d0018 100644 --- a/src/video_core/host_shaders/detile_m32x1.comp +++ b/src/video_core/host_shaders/detile_m32x1.comp @@ -20,7 +20,7 @@ layout(push_constant) uniform image_info { } info; // Inverse morton LUT, small enough to fit into K$ -uint rmort[16] = { +const uint rmort[16] = { 0x11011000, 0x31213020, 0x13031202, 0x33233222, 0x51415040, 0x71617060, diff --git a/src/video_core/host_shaders/detile_m32x2.comp b/src/video_core/host_shaders/detile_m32x2.comp index 90063a185..c128ba5a1 100644 --- a/src/video_core/host_shaders/detile_m32x2.comp +++ b/src/video_core/host_shaders/detile_m32x2.comp @@ -20,7 +20,7 @@ layout(push_constant) uniform image_info { } info; // Inverse morton LUT, small enough to fit into K$ -uint rmort[16] = { +const uint rmort[16] = { 0x11011000, 0x31213020, 0x13031202, 0x33233222, 0x51415040, 0x71617060, diff --git a/src/video_core/host_shaders/detile_m32x4.comp b/src/video_core/host_shaders/detile_m32x4.comp index e1b988172..a09a0b4c4 100644 --- a/src/video_core/host_shaders/detile_m32x4.comp +++ b/src/video_core/host_shaders/detile_m32x4.comp @@ -20,7 +20,7 @@ layout(push_constant) uniform image_info { } info; // Inverse morton LUT, small enough to fit into K$ -uint rmort[16] = { +const uint rmort[16] = { 0x11011000, 0x31213020, 0x13031202, 0x33233222, 0x51415040, 0x71617060, diff --git a/src/video_core/host_shaders/detile_m8x1.comp b/src/video_core/host_shaders/detile_m8x1.comp index 39d0aaeb1..ecf706450 100644 --- a/src/video_core/host_shaders/detile_m8x1.comp +++ b/src/video_core/host_shaders/detile_m8x1.comp @@ -48,4 +48,4 @@ void main() { uint dw_ofs_x = target_tile_x * 2 + col; // 2 = uints uint dw_ofs_y = (target_tile_y * MICRO_TILE_DIM + row) * tiles_per_pitch * 2; // 2 = uints out_data[dw_ofs_x + dw_ofs_y] = dst_tx; -} \ No newline at end of file +} diff --git a/src/video_core/host_shaders/detile_m8x2.comp b/src/video_core/host_shaders/detile_m8x2.comp index 3f8e5ab33..909a14acc 100644 --- a/src/video_core/host_shaders/detile_m8x2.comp +++ b/src/video_core/host_shaders/detile_m8x2.comp @@ -25,7 +25,7 @@ layout(push_constant) uniform image_info { #define TEXELS_PER_ELEMENT 2 // Inverse morton LUT, small enough to fit into K$ -uint rmort[16] = { +const uint rmort[16] = { 0x11011000, 0x31213020, 0x13031202, 0x33233222, 0x51415040, 0x71617060, diff --git a/src/video_core/host_shaders/detile_macro32x1.comp b/src/video_core/host_shaders/detile_macro32x1.comp index 086fbcfb5..ecac47d1c 100644 --- a/src/video_core/host_shaders/detile_macro32x1.comp +++ b/src/video_core/host_shaders/detile_macro32x1.comp @@ -21,46 +21,46 @@ layout(push_constant) uniform image_info { } info; // Each LUT is 64 bytes, so should fit into K$ given tiled slices locality -const uint lut_32bpp[][64] = { +const uint lut_32bpp[][16] = { { - 0x00, 0x01, 0x04, 0x05, 0x40, 0x41, 0x44, 0x45, - 0x02, 0x03, 0x06, 0x07, 0x42, 0x43, 0x46, 0x47, - 0x10, 0x11, 0x14, 0x15, 0x50, 0x51, 0x54, 0x55, - 0x12, 0x13, 0x16, 0x17, 0x52, 0x53, 0x56, 0x57, - 0x80, 0x81, 0x84, 0x85, 0xc0, 0xc1, 0xc4, 0xc5, - 0x82, 0x83, 0x86, 0x87, 0xc2, 0xc3, 0xc6, 0xc7, - 0x90, 0x91, 0x94, 0x95, 0xd0, 0xd1, 0xd4, 0xd5, - 0x92, 0x93, 0x96, 0x97, 0xd2, 0xd3, 0xd6, 0xd7, + 0x05040100, 0x45444140, + 0x07060302, 0x47464342, + 0x15141110, 0x55545150, + 0x17161312, 0x57565352, + 0x85848180, 0xc5c4c1c0, + 0x87868382, 0xc7c6c3c2, + 0x95949190, 0xd5d4d1d0, + 0x97969392, 0xd7d6d3d2, }, { - 0x08, 0x09, 0x0c, 0x0d, 0x48, 0x49, 0x4c, 0x4d, - 0x0a, 0x0b, 0x0e, 0x0f, 0x4a, 0x4b, 0x4e, 0x4f, - 0x18, 0x19, 0x1c, 0x1d, 0x58, 0x59, 0x5c, 0x5d, - 0x1a, 0x1b, 0x1e, 0x1f, 0x5a, 0x5b, 0x5e, 0x5f, - 0x88, 0x89, 0x8c, 0x8d, 0xc8, 0xc9, 0xcc, 0xcd, - 0x8a, 0x8b, 0x8e, 0x8f, 0xca, 0xcb, 0xce, 0xcf, - 0x98, 0x99, 0x9c, 0x9d, 0xd8, 0xd9, 0xdc, 0xdd, - 0x9a, 0x9b, 0x9e, 0x9f, 0xda, 0xdb, 0xde, 0xdf, + 0x0d0c0908, 0x4d4c4948, + 0x0f0e0b0a, 0x4f4e4b4a, + 0x1d1c1918, 0x5d5c5958, + 0x1f1e1b1a, 0x5f5e5b5a, + 0x8d8c8988, 0xcdccc9c8, + 0x8f8e8b8a, 0xcfcecbca, + 0x9d9c9998, 0xdddcd9d8, + 0x9f9e9b9a, 0xdfdedbda, }, { - 0x20, 0x21, 0x24, 0x25, 0x60, 0x61, 0x64, 0x65, - 0x22, 0x23, 0x26, 0x27, 0x62, 0x63, 0x66, 0x67, - 0x30, 0x31, 0x34, 0x35, 0x70, 0x71, 0x74, 0x75, - 0x32, 0x33, 0x36, 0x37, 0x72, 0x73, 0x76, 0x77, - 0xa0, 0xa1, 0xa4, 0xa5, 0xe0, 0xe1, 0xe4, 0xe5, - 0xa2, 0xa3, 0xa6, 0xa7, 0xe2, 0xe3, 0xe6, 0xe7, - 0xb0, 0xb1, 0xb4, 0xb5, 0xf0, 0xf1, 0xf4, 0xf5, - 0xb2, 0xb3, 0xb6, 0xb7, 0xf2, 0xf3, 0xf6, 0xf7, + 0x25242120, 0x65646160, + 0x27262322, 0x67666362, + 0x35343130, 0x75747170, + 0x37363332, 0x77767372, + 0xa5a4a1a0, 0xe5e4e1e0, + 0xa7a6a3a2, 0xe7e6e3e2, + 0xb5b4b1b0, 0xf5f4f1f0, + 0xb7b6b3b2, 0xf7f6f3f2, }, { - 0x28, 0x29, 0x2c, 0x2d, 0x68, 0x69, 0x6c, 0x6d, - 0x2a, 0x2b, 0x2e, 0x2f, 0x6a, 0x6b, 0x6e, 0x6f, - 0x38, 0x39, 0x3c, 0x3d, 0x78, 0x79, 0x7c, 0x7d, - 0x3a, 0x3b, 0x3e, 0x3f, 0x7a, 0x7b, 0x7e, 0x7f, - 0xa8, 0xa9, 0xac, 0xad, 0xe8, 0xe9, 0xec, 0xed, - 0xaa, 0xab, 0xae, 0xaf, 0xea, 0xeb, 0xee, 0xef, - 0xb8, 0xb9, 0xbc, 0xbd, 0xf8, 0xf9, 0xfc, 0xfd, - 0xba, 0xbb, 0xbe, 0xbf, 0xfa, 0xfb, 0xfe, 0xff, + 0x2d2c2928, 0x6d6c6968, + 0x2f2e2b2a, 0x6f6e6b6a, + 0x3d3c3938, 0x7d7c7978, + 0x3f3e3b3a, 0x7f7e7b7a, + 0xadaca9a8, 0xedece9e8, + 0xafaeabaa, 0xefeeebea, + 0xbdbcb9b8, 0xfdfcf9f8, + 0xbfbebbba, 0xfffefbfa, } }; @@ -77,7 +77,9 @@ void main() { uint col = bitfieldExtract(x, 0, 3); uint row = bitfieldExtract(y, 0, 3); uint lut = bitfieldExtract(z, 0, 2); - uint idx = lut_32bpp[lut][col + row * MICRO_TILE_DIM]; + uint idx_dw = lut_32bpp[lut][(col + row * MICRO_TILE_DIM) >> 2u]; + uint byte_ofs = gl_LocalInvocationID.x & 3u; + uint idx = bitfieldExtract(idx_dw >> (8 * byte_ofs), 0, 8); uint slice_offs = (z >> 2u) * info.c1 * MICRO_TILE_SZ; uint tile_row = y / MICRO_TILE_DIM; diff --git a/src/video_core/host_shaders/detile_macro32x2.comp b/src/video_core/host_shaders/detile_macro32x2.comp index 296311c7a..d161484c1 100644 --- a/src/video_core/host_shaders/detile_macro32x2.comp +++ b/src/video_core/host_shaders/detile_macro32x2.comp @@ -20,46 +20,46 @@ layout(push_constant) uniform image_info { uint c1; } info; -const uint lut_64bpp[][64] = { +const uint lut_64bpp[][16] = { { - 0x00, 0x01, 0x08, 0x09, 0x40, 0x41, 0x48, 0x49, - 0x02, 0x03, 0x0a, 0x0b, 0x42, 0x43, 0x4a, 0x4b, - 0x10, 0x11, 0x18, 0x19, 0x50, 0x51, 0x58, 0x59, - 0x12, 0x13, 0x1a, 0x1b, 0x52, 0x53, 0x5a, 0x5b, - 0x80, 0x81, 0x88, 0x89, 0xc0, 0xc1, 0xc8, 0xc9, - 0x82, 0x83, 0x8a, 0x8b, 0xc2, 0xc3, 0xca, 0xcb, - 0x90, 0x91, 0x98, 0x99, 0xd0, 0xd1, 0xd8, 0xd9, - 0x92, 0x93, 0x9a, 0x9b, 0xd2, 0xd3, 0xda, 0xdb, + 0x09080100, 0x49484140, + 0x0b0a0302, 0x4a4b4342, + 0x19181110, 0x59585150, + 0x1b1a1312, 0x5a5b5352, + 0x89888180, 0xc9c8c1c0, + 0x8b8a8382, 0xcacbc3c2, + 0x99989190, 0xd9d8d1d0, + 0x9b9a9392, 0xdbdad3d2, }, { - 0x04, 0x05, 0x0c, 0x0d, 0x44, 0x45, 0x4c, 0x4d, - 0x06, 0x07, 0x0e, 0x0f, 0x46, 0x47, 0x4e, 0x4f, - 0x14, 0x15, 0x1c, 0x1d, 0x54, 0x55, 0x5c, 0x5d, - 0x16, 0x17, 0x1e, 0x1f, 0x56, 0x57, 0x5e, 0x5f, - 0x84, 0x85, 0x8c, 0x8d, 0xc4, 0xc5, 0xcc, 0xcd, - 0x86, 0x87, 0x8e, 0x8f, 0xc6, 0xc7, 0xce, 0xcf, - 0x94, 0x95, 0x9c, 0x9d, 0xd4, 0xd5, 0xdc, 0xdd, - 0x96, 0x97, 0x9e, 0x9f, 0xd6, 0xd7, 0xde, 0xdf, + 0x0d0c0504, 0x4d4c4544, + 0x0f0e0706, 0x4f4e4746, + 0x1d1c1514, 0x5d5c5554, + 0x1f1e1716, 0x5f5e5756, + 0x8d8c8584, 0xcdccc5c4, + 0x8f8e8786, 0xcfcec7c6, + 0x9d9c9594, 0xdddcd5d4, + 0x9f9e9796, 0xdfded7d6, }, { - 0x20, 0x21, 0x28, 0x29, 0x60, 0x61, 0x68, 0x69, - 0x22, 0x23, 0x2a, 0x2b, 0x62, 0x63, 0x6a, 0x6b, - 0x30, 0x31, 0x38, 0x39, 0x70, 0x71, 0x78, 0x79, - 0x32, 0x33, 0x3a, 0x3b, 0x72, 0x73, 0x7a, 0x7b, - 0xa0, 0xa1, 0xa8, 0xa9, 0xe0, 0xe1, 0xe8, 0xe9, - 0xa2, 0xa3, 0xaa, 0xab, 0xe2, 0xe3, 0xea, 0xeb, - 0xb0, 0xb1, 0xb8, 0xb9, 0xf0, 0xf1, 0xf8, 0xf9, - 0xb2, 0xb3, 0xba, 0xbb, 0xf2, 0xf3, 0xfa, 0xfb, + 0x29282120, 0x69686160, + 0x2b2a2322, 0x6b6a6362, + 0x39383130, 0x79787170, + 0x3b3a3332, 0x7b7a7372, + 0xa9a8a1a0, 0xe9e8e1e0, + 0xabaaa3a2, 0xebeae3e2, + 0xb9b8b1b0, 0xf9f8f1f0, + 0xbbbab3b2, 0xfbfaf3f2, }, { - 0x24, 0x25, 0x2c, 0x2d, 0x64, 0x65, 0x6c, 0x6d, - 0x26, 0x27, 0x2e, 0x2f, 0x66, 0x67, 0x6e, 0x6f, - 0x34, 0x35, 0x3c, 0x3d, 0x74, 0x75, 0x7c, 0x7d, - 0x36, 0x37, 0x3e, 0x3f, 0x76, 0x77, 0x7e, 0x7f, - 0xa4, 0xa5, 0xac, 0xad, 0xe4, 0xe5, 0xec, 0xed, - 0xa6, 0xa7, 0xae, 0xaf, 0xe6, 0xe7, 0xee, 0xef, - 0xb4, 0xb5, 0xbc, 0xbd, 0xf4, 0xf5, 0xfc, 0xfd, - 0xb6, 0xb7, 0xbe, 0xbf, 0xf6, 0xf7, 0xfe, 0xff, + 0x2d2c2524, 0x6d6c6564, + 0x2f2e2726, 0x6f6e6766, + 0x3d3c3534, 0x7d7c7574, + 0x3f3e3736, 0x7f7e7776, + 0xadaca5a4, 0xedece5e4, + 0xafaea7a6, 0xefeee7e6, + 0xbdbcb5b4, 0xfdfcf5f4, + 0xbfbeb7b6, 0xfffef7f6, }, }; @@ -76,7 +76,9 @@ void main() { uint col = bitfieldExtract(x, 0, 3); uint row = bitfieldExtract(y, 0, 3); uint lut = bitfieldExtract(z, 0, 2); - uint idx = lut_64bpp[lut][col + row * MICRO_TILE_DIM]; + uint idx_dw = lut_64bpp[lut][(col + row * MICRO_TILE_DIM) >> 2u]; + uint byte_ofs = gl_LocalInvocationID.x & 3u; + uint idx = bitfieldExtract(idx_dw >> (8 * byte_ofs), 0, 8); uint slice_offs = (z >> 2u) * info.c1 * MICRO_TILE_SZ; uint tile_row = y / MICRO_TILE_DIM;