From 632ed99ee77de8e1c7513eed0bd2319fffca9ff8 Mon Sep 17 00:00:00 2001 From: MajorP93 Date: Sat, 26 Apr 2025 00:06:51 +0200 Subject: [PATCH 01/13] ci: Bump Clang to 19 for Linux builds, align LLVM repository with runner version (#2844) * ci: Bump Clang to 19 for Linux builds * PR #2434 was intended to bump Clang to 19. In reality it only made sure that clang-format-19 is being used and that the shadPS4 codebase can be compiled with Clang 19. This PR makes sure that Clang 19 is actually being used for Linux builds which makes sense since we use Clang 19 for Windows builds already (Since Visual Studio 17.13 Clang 19 is being shipped). * ci: Use noble variant of LLVM repository * shadPS4 has been using Ubuntu 24.04 runners for some time now. This commit makes sure the correct LLVM repository is being used. --- .github/workflows/build.yml | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 55549ab4c..787aba251 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -30,7 +30,7 @@ jobs: - name: Install run: | wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - - sudo add-apt-repository 'deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-19 main' + sudo add-apt-repository 'deb http://apt.llvm.org/noble/ llvm-toolchain-noble-19 main' sudo apt update sudo apt install clang-format-19 - name: Build @@ -281,8 +281,13 @@ jobs: with: submodules: recursive + - name: Add LLVM repository + run: | + wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - + sudo add-apt-repository 'deb http://apt.llvm.org/noble/ llvm-toolchain-noble-19 main' + - name: Install dependencies - run: sudo apt-get update && sudo apt install -y libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libglfw3-dev libgles2-mesa-dev libfuse2 clang build-essential libasound2-dev libpulse-dev libopenal-dev libudev-dev + run: sudo apt-get update && sudo apt install -y libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libglfw3-dev libgles2-mesa-dev libfuse2 clang-19 build-essential libasound2-dev libpulse-dev libopenal-dev libudev-dev - name: Cache CMake Configuration uses: actions/cache@v4 @@ -304,7 +309,7 @@ jobs: key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} - name: Configure CMake - run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-19 -DCMAKE_CXX_COMPILER=clang++-19 -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc) @@ -337,8 +342,13 @@ jobs: with: submodules: recursive + - name: Add LLVM repository + run: | + wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - + sudo add-apt-repository 'deb http://apt.llvm.org/noble/ llvm-toolchain-noble-19 main' + - name: Install dependencies - run: sudo apt-get update && sudo apt install -y libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libglfw3-dev libgles2-mesa-dev libfuse2 clang build-essential qt6-base-dev qt6-tools-dev qt6-multimedia-dev libasound2-dev libpulse-dev libopenal-dev libudev-dev + run: sudo apt-get update && sudo apt install -y libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libglfw3-dev libgles2-mesa-dev libfuse2 clang-19 build-essential qt6-base-dev qt6-tools-dev qt6-multimedia-dev libasound2-dev libpulse-dev libopenal-dev libudev-dev - name: Cache CMake Configuration uses: actions/cache@v4 @@ -360,7 +370,7 @@ jobs: key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} - name: Configure CMake - run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-19 -DCMAKE_CXX_COMPILER=clang++-19 -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc) From e816bc4b99ec1859467f7e69fce68b46009d821b Mon Sep 17 00:00:00 2001 From: baggins183 Date: Fri, 25 Apr 2025 19:44:03 -0700 Subject: [PATCH 02/13] Use GetSrc in VALU insts instead of assuming vector reg (was vcc_lo) (#2845) * Use GetSrc in v_add_i32 instead of assuming vector reg (was vcc_lo) * some other cases --- src/shader_recompiler/frontend/translate/vector_alu.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 787cf6ad3..3ce86c131 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -513,13 +513,13 @@ void Translator::V_LSHLREV_B32(const GcnInst& inst) { void Translator::V_AND_B32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))}; + const IR::U32 src1{GetSrc(inst.src[1])}; SetDst(inst.dst[0], ir.BitwiseAnd(src0, src1)); } void Translator::V_OR_B32(bool is_xor, const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))}; + const IR::U32 src1{GetSrc(inst.src[1])}; SetDst(inst.dst[0], is_xor ? ir.BitwiseXor(src0, src1) : IR::U32(ir.BitwiseOr(src0, src1))); } @@ -579,7 +579,7 @@ void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) { void Translator::V_ADD_I32(const GcnInst& inst) { // Signed or unsigned components const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))}; + const IR::U32 src1{GetSrc(inst.src[1])}; const IR::U32 result{ir.IAdd(src0, src1)}; SetDst(inst.dst[0], result); From c09fff2da6f4a879711d915f51797eebe315a60a Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Sun, 27 Apr 2025 01:04:17 -0500 Subject: [PATCH 03/13] VideoOut event cleanup (#2849) * Readable VideoOutEvent data packing Inspired by the work of former shadPS4 devs and mostly based on red_prig's current code. * Apply DceData struct to sceVideoOutGetEventCount Makes the code easier to read * Update equeue.h * Update main.cpp * Update equeue.h * Proper struct names * Fix hint mask Thanks to red_prig for catching my mistake here. * Clang * Fix header discrepancy --- src/core/libraries/kernel/equeue.h | 33 +++++++++++++++-------- src/core/libraries/videoout/video_out.cpp | 5 ++-- src/core/libraries/videoout/video_out.h | 10 +++++-- src/main.cpp | 2 +- 4 files changed, 34 insertions(+), 16 deletions(-) diff --git a/src/core/libraries/kernel/equeue.h b/src/core/libraries/kernel/equeue.h index 11c09bb37..2bd7ef510 100644 --- a/src/core/libraries/kernel/equeue.h +++ b/src/core/libraries/kernel/equeue.h @@ -61,6 +61,18 @@ struct SceKernelEvent { void* udata = nullptr; /* opaque user data identifier */ }; +struct OrbisVideoOutEventHint { + u64 event_id : 8; + u64 video_id : 8; + u64 flip_arg : 48; +}; + +struct OrbisVideoOutEventData { + u64 time : 12; + u64 count : 4; + u64 flip_arg : 48; +}; + struct EqueueEvent { SceKernelEvent event; void* data = nullptr; @@ -84,19 +96,18 @@ struct EqueueEvent { void TriggerDisplay(void* data) { is_triggered = true; - auto hint = reinterpret_cast(data); - if (hint != 0) { - auto hint_h = static_cast(hint >> 8) & 0xFFFFFF; - auto ident_h = static_cast(event.ident >> 40); - if ((static_cast(hint) & 0xFF) == event.ident && event.ident != 0xFE && - ((hint_h ^ ident_h) & 0xFF) == 0) { + if (data != nullptr) { + auto event_data = static_cast(event.data); + auto event_hint_raw = reinterpret_cast(data); + auto event_hint = static_cast(event_hint_raw); + if (event_hint.event_id == event.ident && event.ident != 0xfe) { auto time = Common::FencedRDTSC(); - auto mask = 0xF000; - if ((static_cast(event.data) & 0xF000) != 0xF000) { - mask = (static_cast(event.data) + 0x1000) & 0xF000; + auto counter = event_data.count; + if (counter != 0xf) { + counter++; } - event.data = (mask | static_cast(static_cast(time) & 0xFFF) | - (hint & 0xFFFFFFFFFFFF0000)); + event.data = + (time & 0xfff) | (counter << 0xc) | (event_hint_raw & 0xffffffffffff0000); } } } diff --git a/src/core/libraries/videoout/video_out.cpp b/src/core/libraries/videoout/video_out.cpp index 3c839dadd..c5208b6dd 100644 --- a/src/core/libraries/videoout/video_out.cpp +++ b/src/core/libraries/videoout/video_out.cpp @@ -220,7 +220,7 @@ s32 PS4_SYSV_ABI sceVideoOutGetEventData(const Kernel::SceKernelEvent* ev, s64* if (ev->ident != static_cast(OrbisVideoOutInternalEventId::Flip) || ev->data == 0) { *data = event_data; } else { - *data = event_data | 0xFFFF000000000000; + *data = event_data | 0xffff000000000000; } return ORBIS_OK; } @@ -233,7 +233,8 @@ s32 PS4_SYSV_ABI sceVideoOutGetEventCount(const Kernel::SceKernelEvent* ev) { return ORBIS_VIDEO_OUT_ERROR_INVALID_EVENT; } - return (ev->data >> 0xc) & 0xf; + auto event_data = static_cast(ev->data); + return event_data.count; } s32 PS4_SYSV_ABI sceVideoOutGetFlipStatus(s32 handle, FlipStatus* status) { diff --git a/src/core/libraries/videoout/video_out.h b/src/core/libraries/videoout/video_out.h index f3e661de4..7db09530b 100644 --- a/src/core/libraries/videoout/video_out.h +++ b/src/core/libraries/videoout/video_out.h @@ -111,6 +111,12 @@ struct SceVideoOutColorSettings { u32 reserved[3]; }; +struct OrbisVideoOutEventData { + u64 time : 12; + u64 count : 4; + u64 flip_arg : 48; +}; + void PS4_SYSV_ABI sceVideoOutSetBufferAttribute(BufferAttribute* attribute, PixelFormat pixelFormat, u32 tilingMode, u32 aspectRatio, u32 width, u32 height, u32 pitchInPixel); @@ -128,8 +134,8 @@ s32 PS4_SYSV_ABI sceVideoOutGetResolutionStatus(s32 handle, SceVideoOutResolutio s32 PS4_SYSV_ABI sceVideoOutOpen(SceUserServiceUserId userId, s32 busType, s32 index, const void* param); s32 PS4_SYSV_ABI sceVideoOutClose(s32 handle); -int PS4_SYSV_ABI sceVideoOutGetEventId(const Kernel::SceKernelEvent* ev); -int PS4_SYSV_ABI sceVideoOutGetEventData(const Kernel::SceKernelEvent* ev, int64_t* data); +s32 PS4_SYSV_ABI sceVideoOutGetEventId(const Kernel::SceKernelEvent* ev); +s32 PS4_SYSV_ABI sceVideoOutGetEventData(const Kernel::SceKernelEvent* ev, s64* data); s32 PS4_SYSV_ABI sceVideoOutColorSettingsSetGamma(SceVideoOutColorSettings* settings, float gamma); s32 PS4_SYSV_ABI sceVideoOutAdjustColor(s32 handle, const SceVideoOutColorSettings* settings); diff --git a/src/main.cpp b/src/main.cpp index 6b334e446..85581774b 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -154,7 +154,7 @@ int main(int argc, char* argv[]) { // If no game directory is set and no command line argument, prompt for it if (Config::getGameInstallDirs().empty()) { std::cout << "Warning: No game folder set, please set it by calling shadps4" - " with the --add-game-folder argument"; + " with the --add-game-folder argument\n"; } if (!has_game_argument) { From 410313ca87840de8b8bd06a18e74863863b60db6 Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Sun, 27 Apr 2025 01:32:01 -0500 Subject: [PATCH 04/13] Implement sceKernelGetModuleInfo, sceKernelGetModuleInfoInternal, and sceKernelGetModuleList (#2850) * Fix GetModule exception Simple mistake * Prevent OOB writes in add_segment Due to mistakes in our linker logic, OpenOrbis' libSceFios2 causes OOB writes here. While the ideal solution would be to fix the erroneous behavior, the best I'm capable of right now is just preventing the OOB writes. * Implement sceKernelGetModuleInfo, sceKernelGetModuleInfoInternal, sceKernelGetModuleList These are implemented based on hardware observations and a homebrew sample made by red_prig. I've yet to test what error cases can show up. * Clang * Accurate error returns If there are more modules than provided space, then return kernel ENOMEM. If either handles or out_count are null, return kernel EFAULT. * Accurate error checks in ModuleInfo functions * Clang --- src/core/libraries/kernel/process.cpp | 59 +++++++++++++++++++++++++++ src/core/linker.h | 2 +- src/core/module.cpp | 12 ++++-- 3 files changed, 68 insertions(+), 5 deletions(-) diff --git a/src/core/libraries/kernel/process.cpp b/src/core/libraries/kernel/process.cpp index 02f8a538d..8a37e78d5 100644 --- a/src/core/libraries/kernel/process.cpp +++ b/src/core/libraries/kernel/process.cpp @@ -127,6 +127,62 @@ int PS4_SYSV_ABI sceKernelGetModuleInfoFromAddr(VAddr addr, int flags, return ORBIS_OK; } +s32 PS4_SYSV_ABI sceKernelGetModuleInfo(s32 handle, Core::OrbisKernelModuleInfo* info) { + if (info == nullptr) { + return ORBIS_KERNEL_ERROR_EFAULT; + } + if (info->st_size != sizeof(Core::OrbisKernelModuleInfo)) { + return ORBIS_KERNEL_ERROR_EINVAL; + } + + auto* linker = Common::Singleton::Instance(); + auto* module = linker->GetModule(handle); + if (module == nullptr) { + return ORBIS_KERNEL_ERROR_ESRCH; + } + *info = module->GetModuleInfo(); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceKernelGetModuleInfoInternal(s32 handle, Core::OrbisKernelModuleInfoEx* info) { + if (info == nullptr) { + return ORBIS_KERNEL_ERROR_EFAULT; + } + if (info->st_size != sizeof(Core::OrbisKernelModuleInfoEx)) { + return ORBIS_KERNEL_ERROR_EINVAL; + } + + auto* linker = Common::Singleton::Instance(); + auto* module = linker->GetModule(handle); + if (module == nullptr) { + return ORBIS_KERNEL_ERROR_ESRCH; + } + *info = module->GetModuleInfoEx(); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceKernelGetModuleList(s32* handles, u64 num_array, u64* out_count) { + if (handles == nullptr || out_count == nullptr) { + return ORBIS_KERNEL_ERROR_EFAULT; + } + + auto* linker = Common::Singleton::Instance(); + u64 count = 0; + auto* module = linker->GetModule(count); + while (module != nullptr && count < num_array) { + handles[count] = count; + count++; + module = linker->GetModule(count); + } + + if (count == num_array && module != nullptr) { + return ORBIS_KERNEL_ERROR_ENOMEM; + } + + *out_count = count; + return ORBIS_OK; +} + s32 PS4_SYSV_ABI exit(s32 status) { UNREACHABLE_MSG("Exiting with status code {}", status); return 0; @@ -141,6 +197,9 @@ void RegisterProcess(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("LwG8g3niqwA", "libkernel", 1, "libkernel", 1, 1, sceKernelDlsym); LIB_FUNCTION("RpQJJVKTiFM", "libkernel", 1, "libkernel", 1, 1, sceKernelGetModuleInfoForUnwind); LIB_FUNCTION("f7KBOafysXo", "libkernel", 1, "libkernel", 1, 1, sceKernelGetModuleInfoFromAddr); + LIB_FUNCTION("kUpgrXIrz7Q", "libkernel", 1, "libkernel", 1, 1, sceKernelGetModuleInfo); + LIB_FUNCTION("HZO7xOos4xc", "libkernel", 1, "libkernel", 1, 1, sceKernelGetModuleInfoInternal); + LIB_FUNCTION("IuxnUuXk6Bg", "libkernel", 1, "libkernel", 1, 1, sceKernelGetModuleList); LIB_FUNCTION("6Z83sYWFlA8", "libkernel", 1, "libkernel", 1, 1, exit); } diff --git a/src/core/linker.h b/src/core/linker.h index 63dfc37e8..028e18ead 100644 --- a/src/core/linker.h +++ b/src/core/linker.h @@ -83,7 +83,7 @@ public: } Module* GetModule(s32 index) const { - if (index >= 0 || index < m_modules.size()) { + if (index >= 0 && index < m_modules.size()) { return m_modules.at(index).get(); } return nullptr; diff --git a/src/core/module.cpp b/src/core/module.cpp index 1004f4404..cbe44457c 100644 --- a/src/core/module.cpp +++ b/src/core/module.cpp @@ -135,10 +135,14 @@ void Module::LoadModuleToMemory(u32& max_tls_index) { if (do_map) { elf.LoadSegment(segment_addr, phdr.p_offset, phdr.p_filesz); } - auto& segment = info.segments[info.num_segments++]; - segment.address = segment_addr; - segment.prot = phdr.p_flags; - segment.size = GetAlignedSize(phdr); + if (info.num_segments < 4) { + auto& segment = info.segments[info.num_segments++]; + segment.address = segment_addr; + segment.prot = phdr.p_flags; + segment.size = GetAlignedSize(phdr); + } else { + LOG_ERROR(Core_Linker, "Attempting to add too many segments!"); + } }; for (u16 i = 0; i < elf_header.e_phnum; i++) { From cef795b80b032fd623485bf41efb3309b3d7ee9d Mon Sep 17 00:00:00 2001 From: Vinicius Rangel Date: Sun, 27 Apr 2025 13:32:29 -0300 Subject: [PATCH 05/13] devtools: persist fsr configs (#2852) Saves FSR config to imgui.ini so it won't reset every startup --- src/core/devtools/layer.cpp | 2 +- src/core/devtools/options.cpp | 22 +++++++++++++++++++++- src/imgui/imgui_texture.h | 3 +++ src/imgui/renderer/imgui_core.cpp | 2 ++ 4 files changed, 27 insertions(+), 2 deletions(-) diff --git a/src/core/devtools/layer.cpp b/src/core/devtools/layer.cpp index 94b39e801..a93178de5 100644 --- a/src/core/devtools/layer.cpp +++ b/src/core/devtools/layer.cpp @@ -1,11 +1,11 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include "SDL3/SDL_log.h" #include "layer.h" #include +#include "SDL3/SDL_log.h" #include "common/config.h" #include "common/singleton.h" #include "common/types.h" diff --git a/src/core/devtools/options.cpp b/src/core/devtools/options.cpp index 2def42071..f4b0ceb9a 100644 --- a/src/core/devtools/options.cpp +++ b/src/core/devtools/options.cpp @@ -1,9 +1,14 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "options.h" + +#include #include -#include "options.h" +#include "video_core/renderer_vulkan/vk_presenter.h" + +extern std::unique_ptr presenter; namespace Core::Devtools { @@ -12,6 +17,7 @@ TOptions Options; void LoadOptionsConfig(const char* line) { char str[512]; int i; + float f; if (sscanf(line, "disassembler_cli_isa=%511[^\n]", str) == 1) { Options.disassembler_cli_isa = str; return; @@ -24,12 +30,26 @@ void LoadOptionsConfig(const char* line) { Options.frame_dump_render_on_collapse = i != 0; return; } + if (sscanf(line, "fsr_enabled=%d", &i) == 1) { + presenter->GetFsrSettingsRef().enable = i != 0; + return; + } + if (sscanf(line, "fsr_rcas_enabled=%d", &i) == 1) { + presenter->GetFsrSettingsRef().use_rcas = i != 0; + return; + } + if (sscanf(line, "fsr_rcas_attenuation=%f", &f) == 1) { + presenter->GetFsrSettingsRef().rcas_attenuation = f; + } } void SerializeOptionsConfig(ImGuiTextBuffer* buf) { buf->appendf("disassembler_cli_isa=%s\n", Options.disassembler_cli_isa.c_str()); buf->appendf("disassembler_cli_spv=%s\n", Options.disassembler_cli_spv.c_str()); buf->appendf("frame_dump_render_on_collapse=%d\n", Options.frame_dump_render_on_collapse); + buf->appendf("fsr_enabled=%d\n", presenter->GetFsrSettingsRef().enable); + buf->appendf("fsr_rcas_enabled=%d\n", presenter->GetFsrSettingsRef().use_rcas); + buf->appendf("fsr_rcas_attenuation=%f\n", presenter->GetFsrSettingsRef().rcas_attenuation); } } // namespace Core::Devtools diff --git a/src/imgui/imgui_texture.h b/src/imgui/imgui_texture.h index 1a38066d0..d84eda6b7 100644 --- a/src/imgui/imgui_texture.h +++ b/src/imgui/imgui_texture.h @@ -4,8 +4,11 @@ #pragma once #include +#include #include +#include "common/types.h" + namespace ImGui { namespace Core::TextureManager { diff --git a/src/imgui/renderer/imgui_core.cpp b/src/imgui/renderer/imgui_core.cpp index 50ce41ebf..d143232dc 100644 --- a/src/imgui/renderer/imgui_core.cpp +++ b/src/imgui/renderer/imgui_core.cpp @@ -112,6 +112,8 @@ void Initialize(const ::Vulkan::Instance& instance, const Frontend::WindowSDL& w if (const auto dpi = SDL_GetWindowDisplayScale(window.GetSDLWindow()); dpi > 0.0f) { GetIO().FontGlobalScale = dpi; } + + std::at_quick_exit([] { SaveIniSettingsToDisk(GetIO().IniFilename); }); } void OnResize() { From 254375ef0c2807f7c7a68ecfa3bb87fe82cbab1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valdis=20Bogd=C4=81ns?= Date: Sun, 27 Apr 2025 20:57:20 +0300 Subject: [PATCH 06/13] Update ime_dialog.h (#2853) Fix the incorrect ORBIS_IME_DIALOG_MAX_TEXT_LENGTH; a larger value is required for at least the game Undertale --- src/core/libraries/ime/ime_dialog.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/libraries/ime/ime_dialog.h b/src/core/libraries/ime/ime_dialog.h index 33abc7ecd..526e5f022 100644 --- a/src/core/libraries/ime/ime_dialog.h +++ b/src/core/libraries/ime/ime_dialog.h @@ -13,7 +13,7 @@ class SymbolsResolver; namespace Libraries::ImeDialog { -constexpr u32 ORBIS_IME_DIALOG_MAX_TEXT_LENGTH = 0x78; +constexpr u32 ORBIS_IME_DIALOG_MAX_TEXT_LENGTH = 2048; enum class Error : u32 { OK = 0x0, From b505829e1603fa6c638572203dd846690cd2f080 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sun, 27 Apr 2025 16:52:52 -0700 Subject: [PATCH 07/13] lower_buffer_format_to_raw: Fix handling of format remapping. (#2857) --- .../ir/passes/lower_buffer_format_to_raw.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp b/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp index 3fdc6f0cd..658a495bc 100644 --- a/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp +++ b/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp @@ -196,13 +196,18 @@ static void LowerBufferFormatInst(IR::Block& block, IR::Inst& inst, Info& info) const auto buffer{desc.GetSharp(info)}; const auto is_inst_typed = flags.inst_data_fmt != AmdGpu::DataFormat::FormatInvalid; - const auto data_format = is_inst_typed ? flags.inst_data_fmt.Value() : buffer.GetDataFmt(); - const auto num_format = is_inst_typed ? flags.inst_num_fmt.Value() : buffer.GetNumberFmt(); + const auto data_format = + is_inst_typed ? AmdGpu::RemapDataFormat(flags.inst_data_fmt.Value()) : buffer.GetDataFmt(); const auto format_info = FormatInfo{ .data_format = data_format, - .num_format = num_format, - .swizzle = is_inst_typed ? AmdGpu::IdentityMapping : buffer.DstSelect(), - .num_conversion = AmdGpu::MapNumberConversion(num_format), + .num_format = is_inst_typed + ? AmdGpu::RemapNumberFormat(flags.inst_num_fmt.Value(), data_format) + : buffer.GetNumberFmt(), + .swizzle = is_inst_typed + ? AmdGpu::RemapSwizzle(flags.inst_data_fmt.Value(), AmdGpu::IdentityMapping) + : buffer.DstSelect(), + .num_conversion = is_inst_typed ? AmdGpu::MapNumberConversion(flags.inst_num_fmt.Value()) + : buffer.GetNumberConversion(), .num_components = AmdGpu::NumComponents(data_format), }; From ff984d3cde34ff0c725b6ce379540f48fc163b05 Mon Sep 17 00:00:00 2001 From: MajorP93 Date: Mon, 28 Apr 2025 05:34:59 +0200 Subject: [PATCH 08/13] ci: Use mold linker for Linux builds (#2847) * The default linker which happens to be BFD in Ubuntu 24.04 does not support Clang's ThinLTO which CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON tries to enable. * Using mold linker fixes this and reduces build time a bit. * For consistency reasons we enable mold linker for GCC builds aswell. --- .github/workflows/build.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 787aba251..ceb915f6a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -287,7 +287,7 @@ jobs: sudo add-apt-repository 'deb http://apt.llvm.org/noble/ llvm-toolchain-noble-19 main' - name: Install dependencies - run: sudo apt-get update && sudo apt install -y libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libglfw3-dev libgles2-mesa-dev libfuse2 clang-19 build-essential libasound2-dev libpulse-dev libopenal-dev libudev-dev + run: sudo apt-get update && sudo apt install -y libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libglfw3-dev libgles2-mesa-dev libfuse2 clang-19 mold build-essential libasound2-dev libpulse-dev libopenal-dev libudev-dev - name: Cache CMake Configuration uses: actions/cache@v4 @@ -309,7 +309,7 @@ jobs: key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} - name: Configure CMake - run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-19 -DCMAKE_CXX_COMPILER=clang++-19 -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-19 -DCMAKE_CXX_COMPILER=clang++-19 -DCMAKE_EXE_LINKER_FLAGS="-fuse-ld=mold" -DCMAKE_SHARED_LINKER_FLAGS="-fuse-ld=mold" -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc) @@ -348,7 +348,7 @@ jobs: sudo add-apt-repository 'deb http://apt.llvm.org/noble/ llvm-toolchain-noble-19 main' - name: Install dependencies - run: sudo apt-get update && sudo apt install -y libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libglfw3-dev libgles2-mesa-dev libfuse2 clang-19 build-essential qt6-base-dev qt6-tools-dev qt6-multimedia-dev libasound2-dev libpulse-dev libopenal-dev libudev-dev + run: sudo apt-get update && sudo apt install -y libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libglfw3-dev libgles2-mesa-dev libfuse2 clang-19 mold build-essential qt6-base-dev qt6-tools-dev qt6-multimedia-dev libasound2-dev libpulse-dev libopenal-dev libudev-dev - name: Cache CMake Configuration uses: actions/cache@v4 @@ -370,7 +370,7 @@ jobs: key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} - name: Configure CMake - run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-19 -DCMAKE_CXX_COMPILER=clang++-19 -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-19 -DCMAKE_CXX_COMPILER=clang++-19 -DCMAKE_EXE_LINKER_FLAGS="-fuse-ld=mold" -DCMAKE_SHARED_LINKER_FLAGS="-fuse-ld=mold" -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc) @@ -395,7 +395,7 @@ jobs: submodules: recursive - name: Install dependencies - run: sudo apt-get update && sudo apt install -y libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libglfw3-dev libgles2-mesa-dev libfuse2 gcc-14 build-essential libasound2-dev libpulse-dev libopenal-dev libudev-dev + run: sudo apt-get update && sudo apt install -y libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libglfw3-dev libgles2-mesa-dev libfuse2 gcc-14 mold build-essential libasound2-dev libpulse-dev libopenal-dev libudev-dev - name: Cache CMake Configuration uses: actions/cache@v4 @@ -417,7 +417,7 @@ jobs: key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} - name: Configure CMake - run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=gcc-14 -DCMAKE_CXX_COMPILER=g++-14 -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=gcc-14 -DCMAKE_CXX_COMPILER=g++-14 -DCMAKE_EXE_LINKER_FLAGS="-fuse-ld=mold" -DCMAKE_SHARED_LINKER_FLAGS="-fuse-ld=mold" -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc) @@ -431,7 +431,7 @@ jobs: submodules: recursive - name: Install dependencies - run: sudo apt-get update && sudo apt install -y libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libglfw3-dev libgles2-mesa-dev libfuse2 gcc-14 build-essential qt6-base-dev qt6-tools-dev qt6-multimedia-dev libasound2-dev libpulse-dev libopenal-dev libudev-dev + run: sudo apt-get update && sudo apt install -y libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libglfw3-dev libgles2-mesa-dev libfuse2 gcc-14 mold build-essential qt6-base-dev qt6-tools-dev qt6-multimedia-dev libasound2-dev libpulse-dev libopenal-dev libudev-dev - name: Cache CMake Configuration uses: actions/cache@v4 @@ -453,7 +453,7 @@ jobs: key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} - name: Configure CMake - run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=gcc-14 -DCMAKE_CXX_COMPILER=g++-14 -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + run: cmake --fresh -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=gcc-14 -DCMAKE_CXX_COMPILER=g++-14 -DCMAKE_EXE_LINKER_FLAGS="-fuse-ld=mold" -DCMAKE_SHARED_LINKER_FLAGS="-fuse-ld=mold" -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc) From 81ad31ce319e47fb94c9303145a123afbdaddfa1 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sun, 27 Apr 2025 20:56:17 -0700 Subject: [PATCH 09/13] pp_pass: Use correct surface format. (#2860) --- src/video_core/renderer_vulkan/host_passes/pp_pass.cpp | 4 ++-- src/video_core/renderer_vulkan/host_passes/pp_pass.h | 2 +- src/video_core/renderer_vulkan/vk_presenter.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/video_core/renderer_vulkan/host_passes/pp_pass.cpp b/src/video_core/renderer_vulkan/host_passes/pp_pass.cpp index 0c40ffd7a..73dd3a7b5 100644 --- a/src/video_core/renderer_vulkan/host_passes/pp_pass.cpp +++ b/src/video_core/renderer_vulkan/host_passes/pp_pass.cpp @@ -14,7 +14,7 @@ namespace Vulkan::HostPasses { -void PostProcessingPass::Create(vk::Device device) { +void PostProcessingPass::Create(vk::Device device, const vk::Format surface_format) { static const std::array pp_shaders{ HostShaders::FS_TRI_VERT, HostShaders::POST_PROCESS_FRAG, @@ -76,7 +76,7 @@ void PostProcessingPass::Create(vk::Device device) { Check<"create pp pipeline layout">(device.createPipelineLayoutUnique(layout_info)); const std::array pp_color_formats{ - vk::Format::eB8G8R8A8Unorm, // swapchain.GetSurfaceFormat().format, + surface_format, }; const vk::PipelineRenderingCreateInfo pipeline_rendering_ci{ .colorAttachmentCount = pp_color_formats.size(), diff --git a/src/video_core/renderer_vulkan/host_passes/pp_pass.h b/src/video_core/renderer_vulkan/host_passes/pp_pass.h index 6127bb5c1..f95c02e8d 100644 --- a/src/video_core/renderer_vulkan/host_passes/pp_pass.h +++ b/src/video_core/renderer_vulkan/host_passes/pp_pass.h @@ -19,7 +19,7 @@ public: u32 hdr = 0; }; - void Create(vk::Device device); + void Create(vk::Device device, vk::Format surface_format); void Render(vk::CommandBuffer cmdbuf, vk::ImageView input, vk::Extent2D input_size, Frame& output, Settings settings); diff --git a/src/video_core/renderer_vulkan/vk_presenter.cpp b/src/video_core/renderer_vulkan/vk_presenter.cpp index 4a6a5c7c2..6bd4b26fa 100644 --- a/src/video_core/renderer_vulkan/vk_presenter.cpp +++ b/src/video_core/renderer_vulkan/vk_presenter.cpp @@ -130,7 +130,7 @@ Presenter::Presenter(Frontend::WindowSDL& window_, AmdGpu::Liverpool* liverpool_ } fsr_pass.Create(device, instance.GetAllocator(), num_images); - pp_pass.Create(device); + pp_pass.Create(device, swapchain.GetSurfaceFormat().format); ImGui::Layer::AddLayer(Common::Singleton::Instance()); } From 83fd0683fa71944a9af1150a541f117b9997ffa8 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sun, 27 Apr 2025 20:57:04 -0700 Subject: [PATCH 10/13] fix: Properly enable depthBounds feature. --- src/video_core/renderer_vulkan/vk_instance.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index d33a1607b..14c72836e 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -331,6 +331,7 @@ bool Instance::CreateDevice() { .tessellationShader = features.tessellationShader, .logicOp = features.logicOp, .depthBiasClamp = features.depthBiasClamp, + .depthBounds = features.depthBounds, .fillModeNonSolid = features.fillModeNonSolid, .multiViewport = features.multiViewport, .samplerAnisotropy = features.samplerAnisotropy, From 59d060bc164581e98ce606eda5821ff3a27c76d8 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sun, 27 Apr 2025 21:06:10 -0700 Subject: [PATCH 11/13] fix: gcc compile --- src/video_core/renderer_vulkan/vk_instance.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 14c72836e..072807124 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -331,8 +331,8 @@ bool Instance::CreateDevice() { .tessellationShader = features.tessellationShader, .logicOp = features.logicOp, .depthBiasClamp = features.depthBiasClamp, - .depthBounds = features.depthBounds, .fillModeNonSolid = features.fillModeNonSolid, + .depthBounds = features.depthBounds, .multiViewport = features.multiViewport, .samplerAnisotropy = features.samplerAnisotropy, .vertexPipelineStoresAndAtomics = features.vertexPipelineStoresAndAtomics, From 385c5a4507cca22891c6f4e267b7bf22fcb2e0b8 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sun, 27 Apr 2025 21:53:36 -0700 Subject: [PATCH 12/13] fix: Add missing OpSelectionMerge in bounds check. --- src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index c6ec65606..211899714 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -38,6 +38,7 @@ Id BufferAtomicU32BoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto e const Id ib_label = ctx.OpLabel(); const Id oob_label = ctx.OpLabel(); const Id end_label = ctx.OpLabel(); + ctx.OpSelectionMerge(end_label, spv::SelectionControlMask::MaskNone); ctx.OpBranchConditional(in_bounds, ib_label, oob_label); ctx.AddLabel(ib_label); const Id ib_result = emit_func(); From 81fa9b7fff603a188fc235373b2933962292ae9a Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Mon, 28 Apr 2025 00:04:16 -0700 Subject: [PATCH 13/13] shader_recompiler: Add lowering pass for when 64-bit float is unsupported. (#2858) * shader_recompiler: Add lowering pass for when 64-bit float is unsupported. * shader_recompiler: Fix PackDouble2x32/UnpackDouble2x32 type. * shader_recompiler: Remove extra bit cast implementations. --- CMakeLists.txt | 1 + .../spirv/emit_spirv_bitwise_conversion.cpp | 14 +- .../backend/spirv/emit_spirv_instructions.h | 5 +- .../frontend/translate/translate.cpp | 7 +- src/shader_recompiler/ir/ir_emitter.cpp | 18 +- src/shader_recompiler/ir/ir_emitter.h | 3 +- src/shader_recompiler/ir/opcodes.inc | 5 +- src/shader_recompiler/ir/passes/ir_passes.h | 1 + .../ir/passes/lower_fp64_to_fp32.cpp | 186 ++++++++++++++++++ .../ir/passes/shader_info_collection_pass.cpp | 3 +- src/shader_recompiler/profile.h | 1 + src/shader_recompiler/recompiler.cpp | 3 + src/video_core/renderer_vulkan/vk_instance.h | 5 + .../renderer_vulkan/vk_pipeline_cache.cpp | 1 + 14 files changed, 220 insertions(+), 33 deletions(-) create mode 100644 src/shader_recompiler/ir/passes/lower_fp64_to_fp32.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 96cce0b10..e36c1f280 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -840,6 +840,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/passes/identity_removal_pass.cpp src/shader_recompiler/ir/passes/ir_passes.h src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp + src/shader_recompiler/ir/passes/lower_fp64_to_fp32.cpp src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp src/shader_recompiler/ir/passes/resource_tracking_pass.cpp src/shader_recompiler/ir/passes/ring_access_elimination.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp index 56a6abc05..43655ba3f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -64,10 +64,6 @@ Id EmitBitCastU32F32(EmitContext& ctx, Id value) { return ctx.OpBitcast(ctx.U32[1], value); } -Id EmitBitCastU64F64(EmitContext& ctx, Id value) { - return ctx.OpBitcast(ctx.U64, value); -} - Id EmitBitCastF16U16(EmitContext& ctx, Id value) { return ctx.OpBitcast(ctx.F16[1], value); } @@ -76,10 +72,6 @@ Id EmitBitCastF32U32(EmitContext& ctx, Id value) { return ctx.OpBitcast(ctx.F32[1], value); } -void EmitBitCastF64U64(EmitContext&) { - UNREACHABLE_MSG("SPIR-V Instruction"); -} - Id EmitPackUint2x32(EmitContext& ctx, Id value) { return ctx.OpBitcast(ctx.U64, value); } @@ -88,10 +80,14 @@ Id EmitUnpackUint2x32(EmitContext& ctx, Id value) { return ctx.OpBitcast(ctx.U32[2], value); } -Id EmitPackFloat2x32(EmitContext& ctx, Id value) { +Id EmitPackDouble2x32(EmitContext& ctx, Id value) { return ctx.OpBitcast(ctx.F64[1], value); } +Id EmitUnpackDouble2x32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.U32[2], value); +} + Id EmitPackUnorm2x16(EmitContext& ctx, Id value) { return ctx.OpPackUnorm2x16(ctx.U32[1], value); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 9b7528be8..079f1005d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -202,13 +202,12 @@ Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitBitCastU16F16(EmitContext& ctx, Id value); Id EmitBitCastU32F32(EmitContext& ctx, Id value); -Id EmitBitCastU64F64(EmitContext& ctx, Id value); Id EmitBitCastF16U16(EmitContext& ctx, Id value); Id EmitBitCastF32U32(EmitContext& ctx, Id value); -void EmitBitCastF64U64(EmitContext& ctx); Id EmitPackUint2x32(EmitContext& ctx, Id value); Id EmitUnpackUint2x32(EmitContext& ctx, Id value); -Id EmitPackFloat2x32(EmitContext& ctx, Id value); +Id EmitPackDouble2x32(EmitContext& ctx, Id value); +Id EmitUnpackDouble2x32(EmitContext& ctx, Id value); Id EmitPackUnorm2x16(EmitContext& ctx, Id value); Id EmitUnpackUnorm2x16(EmitContext& ctx, Id value); Id EmitPackSnorm2x16(EmitContext& ctx, Id value); diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 230f3917f..c5a5814a4 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -336,7 +336,7 @@ T Translator::GetSrc64(const InstOperand& operand) { const auto value_lo = ir.GetVectorReg(IR::VectorReg(operand.code)); const auto value_hi = ir.GetVectorReg(IR::VectorReg(operand.code + 1)); if constexpr (is_float) { - value = ir.PackFloat2x32(ir.CompositeConstruct(value_lo, value_hi)); + value = ir.PackDouble2x32(ir.CompositeConstruct(value_lo, value_hi)); } else { value = ir.PackUint2x32(ir.CompositeConstruct(value_lo, value_hi)); } @@ -444,10 +444,9 @@ void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_ra value_untyped = ir.FPSaturate(value_raw); } } - const IR::U64 value = - is_float ? ir.BitCast(IR::F64{value_untyped}) : IR::U64{value_untyped}; - const IR::Value unpacked{ir.UnpackUint2x32(value)}; + const IR::Value unpacked{is_float ? ir.UnpackDouble2x32(IR::F64{value_untyped}) + : ir.UnpackUint2x32(IR::U64{value_untyped})}; const IR::U32 lo{ir.CompositeExtract(unpacked, 0U)}; const IR::U32 hi{ir.CompositeExtract(unpacked, 1U)}; switch (operand.field) { diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index e8836bb4c..e1ebf2206 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -84,16 +84,6 @@ IR::F16 IREmitter::BitCast(const IR::U16& value) { return Inst(Opcode::BitCastF16U16, value); } -template <> -IR::U64 IREmitter::BitCast(const IR::F64& value) { - return Inst(Opcode::BitCastU64F64, value); -} - -template <> -IR::F64 IREmitter::BitCast(const IR::U64& value) { - return Inst(Opcode::BitCastF64U64, value); -} - U1 IREmitter::ConditionRef(const U1& value) { return Inst(Opcode::ConditionRef, value); } @@ -841,8 +831,12 @@ Value IREmitter::UnpackUint2x32(const U64& value) { return Inst(Opcode::UnpackUint2x32, value); } -F64 IREmitter::PackFloat2x32(const Value& vector) { - return Inst(Opcode::PackFloat2x32, vector); +F64 IREmitter::PackDouble2x32(const Value& vector) { + return Inst(Opcode::PackDouble2x32, vector); +} + +Value IREmitter::UnpackDouble2x32(const F64& value) { + return Inst(Opcode::UnpackDouble2x32, value); } U32 IREmitter::Pack2x16(const AmdGpu::NumberFormat number_format, const Value& vector) { diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 186d83a07..d978b3b4f 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -181,7 +181,8 @@ public: [[nodiscard]] U64 PackUint2x32(const Value& vector); [[nodiscard]] Value UnpackUint2x32(const U64& value); - [[nodiscard]] F64 PackFloat2x32(const Value& vector); + [[nodiscard]] F64 PackDouble2x32(const Value& vector); + [[nodiscard]] Value UnpackDouble2x32(const F64& value); [[nodiscard]] U32 Pack2x16(AmdGpu::NumberFormat number_format, const Value& vector); [[nodiscard]] Value Unpack2x16(AmdGpu::NumberFormat number_format, const U32& value); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 4932ff9a0..6f186808c 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -191,14 +191,13 @@ OPCODE(SelectF64, F64, U1, // Bitwise conversions OPCODE(BitCastU16F16, U16, F16, ) OPCODE(BitCastU32F32, U32, F32, ) -OPCODE(BitCastU64F64, U64, F64, ) OPCODE(BitCastF16U16, F16, U16, ) OPCODE(BitCastF32U32, F32, U32, ) -OPCODE(BitCastF64U64, F64, U64, ) OPCODE(PackUint2x32, U64, U32x2, ) OPCODE(UnpackUint2x32, U32x2, U64, ) -OPCODE(PackFloat2x32, F64, F32x2, ) +OPCODE(PackDouble2x32, F64, U32x2, ) +OPCODE(UnpackDouble2x32, U32x2, F64, ) OPCODE(PackUnorm2x16, U32, F32x2, ) OPCODE(UnpackUnorm2x16, F32x2, U32, ) diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h index 760dbb112..06e4ac850 100644 --- a/src/shader_recompiler/ir/passes/ir_passes.h +++ b/src/shader_recompiler/ir/passes/ir_passes.h @@ -21,6 +21,7 @@ void ReadLaneEliminationPass(IR::Program& program); void ResourceTrackingPass(IR::Program& program); void CollectShaderInfoPass(IR::Program& program); void LowerBufferFormatToRaw(IR::Program& program); +void LowerFp64ToFp32(IR::Program& program); void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info); void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info); void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info); diff --git a/src/shader_recompiler/ir/passes/lower_fp64_to_fp32.cpp b/src/shader_recompiler/ir/passes/lower_fp64_to_fp32.cpp new file mode 100644 index 000000000..3c30e75b4 --- /dev/null +++ b/src/shader_recompiler/ir/passes/lower_fp64_to_fp32.cpp @@ -0,0 +1,186 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/info.h" +#include "shader_recompiler/ir/basic_block.h" +#include "shader_recompiler/ir/ir_emitter.h" +#include "shader_recompiler/ir/program.h" + +namespace Shader::Optimization { + +constexpr s32 F64ToF32Exp = +1023 - 127; +constexpr s32 F32ToF64Exp = +127 - 1023; + +static IR::F32 PackedF64ToF32(IR::IREmitter& ir, const IR::Value& packed) { + const IR::U32 lo{ir.CompositeExtract(packed, 0)}; + const IR::U32 hi{ir.CompositeExtract(packed, 1)}; + const IR::U32 sign{ir.BitFieldExtract(hi, ir.Imm32(31), ir.Imm32(1))}; + const IR::U32 exp{ir.BitFieldExtract(hi, ir.Imm32(20), ir.Imm32(11))}; + const IR::U32 mantissa_hi{ir.BitFieldExtract(hi, ir.Imm32(0), ir.Imm32(20))}; + const IR::U32 mantissa_lo{ir.BitFieldExtract(lo, ir.Imm32(29), ir.Imm32(3))}; + const IR::U32 mantissa{ + ir.BitwiseOr(ir.ShiftLeftLogical(mantissa_hi, ir.Imm32(3)), mantissa_lo)}; + const IR::U32 exp_if_subnorm{ + ir.Select(ir.IEqual(exp, ir.Imm32(0)), ir.Imm32(0), ir.IAdd(exp, ir.Imm32(F64ToF32Exp)))}; + const IR::U32 exp_if_infnan{ + ir.Select(ir.IEqual(exp, ir.Imm32(0x7ff)), ir.Imm32(0xff), exp_if_subnorm)}; + const IR::U32 result{ + ir.BitwiseOr(ir.ShiftLeftLogical(sign, ir.Imm32(31)), + ir.BitwiseOr(ir.ShiftLeftLogical(exp_if_infnan, ir.Imm32(23)), mantissa))}; + return ir.BitCast(result); +} + +IR::Value F32ToPackedF64(IR::IREmitter& ir, const IR::Value& raw) { + const IR::U32 value{ir.BitCast(IR::F32(raw))}; + const IR::U32 sign{ir.BitFieldExtract(value, ir.Imm32(31), ir.Imm32(1))}; + const IR::U32 exp{ir.BitFieldExtract(value, ir.Imm32(23), ir.Imm32(8))}; + const IR::U32 mantissa{ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(23))}; + const IR::U32 mantissa_hi{ir.BitFieldExtract(mantissa, ir.Imm32(3), ir.Imm32(20))}; + const IR::U32 mantissa_lo{ir.BitFieldExtract(mantissa, ir.Imm32(0), ir.Imm32(3))}; + const IR::U32 exp_if_subnorm{ + ir.Select(ir.IEqual(exp, ir.Imm32(0)), ir.Imm32(0), ir.IAdd(exp, ir.Imm32(F32ToF64Exp)))}; + const IR::U32 exp_if_infnan{ + ir.Select(ir.IEqual(exp, ir.Imm32(0xff)), ir.Imm32(0x7ff), exp_if_subnorm)}; + const IR::U32 lo{ir.ShiftLeftLogical(mantissa_lo, ir.Imm32(29))}; + const IR::U32 hi{ + ir.BitwiseOr(ir.ShiftLeftLogical(sign, ir.Imm32(31)), + ir.BitwiseOr(ir.ShiftLeftLogical(exp_if_infnan, ir.Imm32(20)), mantissa_hi))}; + return ir.CompositeConstruct(lo, hi); +} + +static IR::Opcode Replace(IR::Opcode op) { + switch (op) { + case IR::Opcode::CompositeConstructF64x2: + return IR::Opcode::CompositeConstructF32x2; + case IR::Opcode::CompositeConstructF64x3: + return IR::Opcode::CompositeConstructF32x3; + case IR::Opcode::CompositeConstructF64x4: + return IR::Opcode::CompositeConstructF32x4; + case IR::Opcode::CompositeExtractF64x2: + return IR::Opcode::CompositeExtractF32x2; + case IR::Opcode::CompositeExtractF64x3: + return IR::Opcode::CompositeExtractF32x3; + case IR::Opcode::CompositeExtractF64x4: + return IR::Opcode::CompositeExtractF32x4; + case IR::Opcode::CompositeInsertF64x2: + return IR::Opcode::CompositeInsertF32x2; + case IR::Opcode::CompositeInsertF64x3: + return IR::Opcode::CompositeInsertF32x3; + case IR::Opcode::CompositeInsertF64x4: + return IR::Opcode::CompositeInsertF32x4; + case IR::Opcode::CompositeShuffleF64x2: + return IR::Opcode::CompositeShuffleF32x2; + case IR::Opcode::CompositeShuffleF64x3: + return IR::Opcode::CompositeShuffleF32x3; + case IR::Opcode::CompositeShuffleF64x4: + return IR::Opcode::CompositeShuffleF32x4; + case IR::Opcode::SelectF64: + return IR::Opcode::SelectF64; + case IR::Opcode::FPAbs64: + return IR::Opcode::FPAbs32; + case IR::Opcode::FPAdd64: + return IR::Opcode::FPAdd32; + case IR::Opcode::FPFma64: + return IR::Opcode::FPFma32; + case IR::Opcode::FPMax64: + return IR::Opcode::FPMax32; + case IR::Opcode::FPMin64: + return IR::Opcode::FPMin32; + case IR::Opcode::FPMul64: + return IR::Opcode::FPMul32; + case IR::Opcode::FPDiv64: + return IR::Opcode::FPDiv32; + case IR::Opcode::FPNeg64: + return IR::Opcode::FPNeg32; + case IR::Opcode::FPRecip64: + return IR::Opcode::FPRecip32; + case IR::Opcode::FPRecipSqrt64: + return IR::Opcode::FPRecipSqrt32; + case IR::Opcode::FPSaturate64: + return IR::Opcode::FPSaturate32; + case IR::Opcode::FPClamp64: + return IR::Opcode::FPClamp32; + case IR::Opcode::FPRoundEven64: + return IR::Opcode::FPRoundEven32; + case IR::Opcode::FPFloor64: + return IR::Opcode::FPFloor32; + case IR::Opcode::FPCeil64: + return IR::Opcode::FPCeil32; + case IR::Opcode::FPTrunc64: + return IR::Opcode::FPTrunc32; + case IR::Opcode::FPFract64: + return IR::Opcode::FPFract32; + case IR::Opcode::FPFrexpSig64: + return IR::Opcode::FPFrexpSig32; + case IR::Opcode::FPFrexpExp64: + return IR::Opcode::FPFrexpExp32; + case IR::Opcode::FPOrdEqual64: + return IR::Opcode::FPOrdEqual32; + case IR::Opcode::FPUnordEqual64: + return IR::Opcode::FPUnordEqual32; + case IR::Opcode::FPOrdNotEqual64: + return IR::Opcode::FPOrdNotEqual32; + case IR::Opcode::FPUnordNotEqual64: + return IR::Opcode::FPUnordNotEqual32; + case IR::Opcode::FPOrdLessThan64: + return IR::Opcode::FPOrdLessThan32; + case IR::Opcode::FPUnordLessThan64: + return IR::Opcode::FPUnordLessThan32; + case IR::Opcode::FPOrdGreaterThan64: + return IR::Opcode::FPOrdGreaterThan32; + case IR::Opcode::FPUnordGreaterThan64: + return IR::Opcode::FPUnordGreaterThan32; + case IR::Opcode::FPOrdLessThanEqual64: + return IR::Opcode::FPOrdLessThanEqual32; + case IR::Opcode::FPUnordLessThanEqual64: + return IR::Opcode::FPUnordLessThanEqual32; + case IR::Opcode::FPOrdGreaterThanEqual64: + return IR::Opcode::FPOrdGreaterThanEqual32; + case IR::Opcode::FPUnordGreaterThanEqual64: + return IR::Opcode::FPUnordGreaterThanEqual32; + case IR::Opcode::FPIsNan64: + return IR::Opcode::FPIsNan32; + case IR::Opcode::FPIsInf64: + return IR::Opcode::FPIsInf32; + case IR::Opcode::ConvertS32F64: + return IR::Opcode::ConvertS32F32; + case IR::Opcode::ConvertF32F64: + return IR::Opcode::Identity; + case IR::Opcode::ConvertF64F32: + return IR::Opcode::Identity; + case IR::Opcode::ConvertF64S32: + return IR::Opcode::ConvertF32S32; + case IR::Opcode::ConvertF64U32: + return IR::Opcode::ConvertF32U32; + default: + return op; + } +} + +static void Lower(IR::Block& block, IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::PackDouble2x32: { + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + inst.ReplaceUsesWith(PackedF64ToF32(ir, inst.Arg(0))); + break; + } + case IR::Opcode::UnpackDouble2x32: { + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + inst.ReplaceUsesWith(F32ToPackedF64(ir, inst.Arg(0))); + break; + } + default: + inst.ReplaceOpcode(Replace(inst.GetOpcode())); + break; + } +} + +void LowerFp64ToFp32(IR::Program& program) { + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + Lower(*block, inst); + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index 219378a6c..d739b2da5 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -44,7 +44,8 @@ void Visit(Info& info, const IR::Inst& inst) { case IR::Opcode::BitCastF16U16: info.uses_fp16 = true; break; - case IR::Opcode::BitCastU64F64: + case IR::Opcode::PackDouble2x32: + case IR::Opcode::UnpackDouble2x32: info.uses_fp64 = true; break; case IR::Opcode::ImageWrite: diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 3b2854d59..9aac6230a 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -15,6 +15,7 @@ struct Profile { bool support_int8{}; bool support_int16{}; bool support_int64{}; + bool support_float64{}; bool support_vertex_instance_id{}; bool support_float_controls{}; bool support_separate_denorm_behavior{}; diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index 5004e0beb..3e0bd98d2 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -60,6 +60,9 @@ IR::Program TranslateProgram(std::span code, Pools& pools, Info& info program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front()); // Run optimization passes + if (!profile.support_float64) { + Shader::Optimization::LowerFp64ToFp32(program); + } Shader::Optimization::SsaRewritePass(program.post_order_blocks); Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); Shader::Optimization::IdentityRemovalPass(program.blocks); diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index b3f3e60b6..bf9af1f24 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -89,6 +89,11 @@ public: return features.depthBounds; } + /// Returns true if 64-bit floats are supported in shaders + bool IsShaderFloat64Supported() const { + return features.shaderFloat64; + } + /// Returns true when VK_EXT_custom_border_color is supported bool IsCustomBorderColorSupported() const { return custom_border_color; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index efb1966ba..0b991cda0 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -196,6 +196,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, profile = Shader::Profile{ .supported_spirv = SpirvVersion1_6, .subgroup_size = instance.SubgroupSize(), + .support_float64 = instance.IsShaderFloat64Supported(), .support_fp32_denorm_preserve = bool(vk12_props.shaderDenormPreserveFloat32), .support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32), .support_fp32_round_to_zero = bool(vk12_props.shaderRoundingModeRTZFloat32),