diff --git a/.github/ISSUE_TEMPLATE/app-bug-report.yaml b/.github/ISSUE_TEMPLATE/app-bug-report.yaml new file mode 100644 index 000000000..cd540e06e --- /dev/null +++ b/.github/ISSUE_TEMPLATE/app-bug-report.yaml @@ -0,0 +1,55 @@ +# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +# SPDX-License-Identifier: GPL-2.0-or-later +# Docs - https://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema +name: Application Bug Report +description: Problem with the application itself (ie. bad file path handling, UX issue) +title: "[APP BUG]: " +body: + - type: markdown + attributes: + value: | + ## Important: Read First + + **Please do not make support requests on GitHub. Our issue tracker is for tracking bugs and feature requests only. + If you have a support request or are unsure about the nature of your issue please contact us on [discord](https://discord.gg/bFJxfftGW6).** + + Please make an effort to make sure your issue isn't already reported. + + Do not create issues involving software piracy, our rules specifically prohibit this. Otherwise your issue will be closed and you will be banned in this repository. + - type: checkboxes + id: checklist + attributes: + label: Checklist + options: + - label: I have searched for a similar issue in this repository and did not find one. + required: true + - label: I am using an official build obtained from [releases](https://github.com/shadps4-emu/shadPS4/releases) or updated one of those builds using its in-app updater. + required: true + - type: textarea + id: desc + attributes: + label: Describe the Bug + description: "A clear and concise description of what the bug is" + validations: + required: true + - type: textarea + id: repro + attributes: + label: Reproduction Steps + description: "Detailed steps to reproduce the behavior" + validations: + required: true + - type: textarea + id: expected + attributes: + label: Expected Behavior + description: "A clear and concise description of what you expected to happen" + validations: + required: false + - type: input + id: os + attributes: + label: Specify OS Version + placeholder: "Example: Windows 11, Arch Linux, MacOS 15" + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 000000000..5adcf1437 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,10 @@ +# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +# SPDX-License-Identifier: GPL-2.0-or-later +blank_issues_enabled: false +contact_links: + - name: Discord + url: https://discord.gg/bFJxfftGW6 + about: Get direct support and hang out with us + - name: Wiki + url: https://github.com/shadps4-emu/shadPS4/wiki + about: Information, guides, etc. diff --git a/.github/ISSUE_TEMPLATE/feature-request.yaml b/.github/ISSUE_TEMPLATE/feature-request.yaml new file mode 100644 index 000000000..a1b49362a --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature-request.yaml @@ -0,0 +1,54 @@ +# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +# SPDX-License-Identifier: GPL-2.0-or-later +# Docs - https://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema +name: Feature Request +description: Suggest a new feature or improve an existing one +title: "[Feature Request]: " +body: + - type: markdown + attributes: + value: | + ## Important: Read First + + Please make an effort to make sure your issue isn't already reported. + + Do not create issues involving software piracy, our rules specifically prohibit this. Otherwise your issue will be closed and you will be banned in this repository. + + - type: checkboxes + id: checklist + attributes: + label: Checklist + options: + - label: I have searched for a similar issue in this repository and did not find one. + required: true + - type: textarea + id: desc + attributes: + label: Description + description: | + A concise description of the feature you want + + Include step by step examples of how the feature should work under various circumstances + validations: + required: true + - type: textarea + id: reason + attributes: + label: Reason + description: | + Give a reason why you want this feature + - How will it make things easier for you? + - How does this feature help your enjoyment of the emulator? + - What does it provide that isn't being provided currently? + validations: + required: true + - type: textarea + id: examples + attributes: + label: Examples + description: | + Provide examples of the feature as implemented by other software + + Include screenshots or video if you like to help demonstrate how you'd like this feature to work + validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/game-bug-report.yaml b/.github/ISSUE_TEMPLATE/game-bug-report.yaml new file mode 100644 index 000000000..2d984b697 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/game-bug-report.yaml @@ -0,0 +1,95 @@ +# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +# SPDX-License-Identifier: GPL-2.0-or-later +# Docs - https://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema +name: Game Emulation Bug Report +description: Problem in a game (ie. graphical artifacts, crashes, etc.) +title: "[GAME BUG]: " +body: + - type: markdown + attributes: + value: | + ## Important: Read First + + **Please do not make support requests on GitHub. Our issue tracker is for tracking bugs and feature requests only. + If you have a support request or are unsure about the nature of your issue please contact us on [discord](https://discord.gg/bFJxfftGW6).** + + This repository does not provide support for modded games. You should perform and test a clean game installation before submitting an issue. + + This repository does not provide support for game patches. If you are having issues with patches please refer to [Cheats and Patches Repository](https://github.com/shadps4-emu/ps4_cheats). + + Before submitting an issue please check [Game Compatibility Repository](https://github.com/shadps4-emu/shadps4-game-compatibility) for the information about the status of the game. + + Please make an effort to make sure your issue isn't already reported. + + Do not create issues involving software piracy, our rules specifically prohibit this. Otherwise your issue will be closed and you will be banned in this repository. + - type: checkboxes + id: checklist + attributes: + label: Checklist (we expect you to perform these steps before opening the issue) + options: + - label: I have searched for a similar issue in this repository and did not find one. + required: true + - label: I am using an official build obtained from [releases](https://github.com/shadps4-emu/shadPS4/releases) or updated one of those builds using its in-app updater. + required: true + - label: I have re-dumped the game and performed a clean install without mods and the issue is still present. + required: true + - label: I have disabled all patches and cheats and the issue is still present. + required: true + - label: I have all the required [system modules](https://github.com/shadps4-emu/shadps4-game-compatibility?tab=readme-ov-file#informations) installed. + required: true + - type: textarea + id: desc + attributes: + label: Describe the Bug + description: "A clear and concise description of what the bug is" + validations: + required: true + - type: textarea + id: repro + attributes: + label: Reproduction Steps + description: "Detailed steps to reproduce the behavior" + validations: + required: true + - type: input + id: os + attributes: + label: Specify OS Version + placeholder: "Example: Windows 11, Arch Linux, MacOS 15" + validations: + required: true + - type: input + id: cpu + attributes: + label: CPU + placeholder: "Example: Intel Core i7-8700" + validations: + required: true + - type: input + id: gpu + attributes: + label: GPU + placeholder: "Example: nVidia GTX 1650" + validations: + required: true + - type: input + id: ram + attributes: + label: Amount of RAM in GB + placeholder: "Example: 16 GB" + validations: + required: true + - type: input + id: vram + attributes: + label: Amount of VRAM in GB + placeholder: "Example: 8 GB" + validations: + required: true + - type: textarea + id: logs + attributes: + label: "Logs" + description: Attach any logs here. Log can be found by right clicking on a game name -> Open Folder... -> Open Log Folder. Make sure that the log type is set to `sync`. + validations: + required: false diff --git a/.gitmodules b/.gitmodules index 1c05ba6f3..3d0d21c5b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -119,7 +119,3 @@ path = externals/MoltenVK/cereal url = https://github.com/USCiLab/cereal shallow = true -[submodule "externals/cubeb"] - path = externals/cubeb - url = https://github.com/mozilla/cubeb - shallow = true diff --git a/CMakeLists.txt b/CMakeLists.txt index cd3894719..c0f675266 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -127,7 +127,6 @@ find_package(xxHash 0.8.2 MODULE) find_package(ZLIB 1.3 MODULE) find_package(Zydis 5.0.0 CONFIG) find_package(pugixml 1.14 CONFIG) -find_package(cubeb CONFIG) if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR NOT MSVC) find_package(cryptopp 8.9.0 MODULE) @@ -189,6 +188,8 @@ set(AJM_LIB src/core/libraries/ajm/ajm.cpp src/core/libraries/ajm/ajm_context.cpp src/core/libraries/ajm/ajm_context.h src/core/libraries/ajm/ajm_error.h + src/core/libraries/ajm/ajm_instance_statistics.cpp + src/core/libraries/ajm/ajm_instance_statistics.h src/core/libraries/ajm/ajm_instance.cpp src/core/libraries/ajm/ajm_instance.h src/core/libraries/ajm/ajm_mp3.cpp @@ -201,7 +202,6 @@ set(AUDIO_LIB src/core/libraries/audio/audioin.cpp src/core/libraries/audio/audioout.h src/core/libraries/audio/audioout_backend.h src/core/libraries/audio/audioout_error.h - src/core/libraries/audio/cubeb_audio.cpp src/core/libraries/audio/sdl_audio.cpp src/core/libraries/ngs2/ngs2.cpp src/core/libraries/ngs2/ngs2.h @@ -497,7 +497,6 @@ set(COMMON src/common/logging/backend.cpp src/common/polyfill_thread.h src/common/rdtsc.cpp src/common/rdtsc.h - src/common/ringbuffer.h src/common/signal_context.h src/common/signal_context.cpp src/common/singleton.h @@ -699,6 +698,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/post_order.h src/shader_recompiler/ir/program.cpp src/shader_recompiler/ir/program.h + src/shader_recompiler/ir/reinterpret.h src/shader_recompiler/ir/reg.h src/shader_recompiler/ir/type.cpp src/shader_recompiler/ir/type.h @@ -889,7 +889,7 @@ endif() create_target_directory_groups(shadps4) target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API FFmpeg::ffmpeg Dear_ImGui gcn half::half ZLIB::ZLIB PNG::PNG) -target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::glslang SDL3::SDL3 pugixml::pugixml stb::headers cubeb::cubeb) +target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::glslang SDL3::SDL3 pugixml::pugixml stb::headers) target_compile_definitions(shadps4 PRIVATE IMGUI_USER_CONFIG="imgui/imgui_config.h") target_compile_definitions(Dear_ImGui PRIVATE IMGUI_USER_CONFIG="${PROJECT_SOURCE_DIR}/src/imgui/imgui_config.h") @@ -917,6 +917,7 @@ if (APPLE) DEPENDS ${MVK_DYLIB_SRC} COMMAND cmake -E copy ${MVK_DYLIB_SRC} ${MVK_DYLIB_DST}) add_custom_target(CopyMoltenVK DEPENDS ${MVK_DYLIB_DST}) + add_dependencies(CopyMoltenVK MoltenVK) add_dependencies(shadps4 CopyMoltenVK) set_property(TARGET shadps4 APPEND PROPERTY BUILD_RPATH "@executable_path/../Frameworks") else() diff --git a/LICENSES/ISC.txt b/LICENSES/ISC.txt deleted file mode 100644 index b9bcfa3a4..000000000 --- a/LICENSES/ISC.txt +++ /dev/null @@ -1,7 +0,0 @@ -ISC License - - - -Permission to use, copy, modify, and /or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 8bdf089f8..4350948b7 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -228,16 +228,6 @@ if (NOT TARGET stb::headers) add_library(stb::headers ALIAS stb) endif() -# cubeb -if (NOT TARGET cubeb::cubeb) - option(BUILD_TESTS "" OFF) - option(BUILD_TOOLS "" OFF) - option(BUNDLE_SPEEX "" ON) - option(USE_SANITIZERS "" OFF) - add_subdirectory(cubeb) - add_library(cubeb::cubeb ALIAS cubeb) -endif() - # Apple-only dependencies if (APPLE) # date diff --git a/externals/MoltenVK/MoltenVK b/externals/MoltenVK/MoltenVK index 5ad3ee5d2..9f0b616d9 160000 --- a/externals/MoltenVK/MoltenVK +++ b/externals/MoltenVK/MoltenVK @@ -1 +1 @@ -Subproject commit 5ad3ee5d2f84342950c3fe93dec97719574d1932 +Subproject commit 9f0b616d9e2c39464d2a859b79dbc655c4a30e7e diff --git a/externals/cubeb b/externals/cubeb deleted file mode 160000 index 9a9d034c5..000000000 --- a/externals/cubeb +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 9a9d034c51859a045a34f201334f612c51e6c19d diff --git a/src/common/config.cpp b/src/common/config.cpp index 088cfa853..9d95030d9 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -68,7 +68,7 @@ static int cursorHideTimeout = 5; // 5 seconds (default) static bool separateupdatefolder = false; static bool compatibilityData = false; static bool checkCompatibilityOnStartup = false; -static std::string audioBackend = "cubeb"; +static std::string trophyKey; // Gui std::vector settings_install_dirs = {}; @@ -93,6 +93,14 @@ std::string emulator_language = "en"; // Language u32 m_language = 1; // english +std::string getTrophyKey() { + return trophyKey; +} + +void setTrophyKey(std::string key) { + trophyKey = key; +} + bool isNeoMode() { return isNeo; } @@ -245,10 +253,6 @@ bool getCheckCompatibilityOnStartup() { return checkCompatibilityOnStartup; } -std::string getAudioBackend() { - return audioBackend; -} - void setGpuId(s32 selectedGpuId) { gpuId = selectedGpuId; } @@ -385,10 +389,6 @@ void setCheckCompatibilityOnStartup(bool use) { checkCompatibilityOnStartup = use; } -void setAudioBackend(std::string backend) { - audioBackend = backend; -} - void setMainWindowGeometry(u32 x, u32 y, u32 w, u32 h) { main_window_geometry_x = x; main_window_geometry_y = y; @@ -629,12 +629,6 @@ void load(const std::filesystem::path& path) { vkCrashDiagnostic = toml::find_or(vk, "crashDiagnostic", false); } - if (data.contains("Audio")) { - const toml::value& audio = data.at("Audio"); - - audioBackend = toml::find_or(audio, "backend", "cubeb"); - } - if (data.contains("Debug")) { const toml::value& debug = data.at("Debug"); @@ -677,6 +671,11 @@ void load(const std::filesystem::path& path) { m_language = toml::find_or(settings, "consoleLanguage", 1); } + + if (data.contains("Keys")) { + const toml::value& keys = data.at("Keys"); + trophyKey = toml::find_or(keys, "TrophyKey", ""); + } } void save(const std::filesystem::path& path) { @@ -734,10 +733,11 @@ void save(const std::filesystem::path& path) { data["Vulkan"]["rdocEnable"] = rdocEnable; data["Vulkan"]["rdocMarkersEnable"] = vkMarkers; data["Vulkan"]["crashDiagnostic"] = vkCrashDiagnostic; - data["Audio"]["backend"] = audioBackend; data["Debug"]["DebugDump"] = isDebugDump; data["Debug"]["CollectShader"] = isShaderDebug; + data["Keys"]["TrophyKey"] = trophyKey; + std::vector install_dirs; for (const auto& dirString : settings_install_dirs) { install_dirs.emplace_back(std::string{fmt::UTF(dirString.u8string()).data}); @@ -840,7 +840,6 @@ void setDefaultValues() { separateupdatefolder = false; compatibilityData = false; checkCompatibilityOnStartup = false; - audioBackend = "cubeb"; } } // namespace Config diff --git a/src/common/config.h b/src/common/config.h index 07871ae53..c8bfc8f28 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -15,6 +15,9 @@ void load(const std::filesystem::path& path); void save(const std::filesystem::path& path); void saveMainWindow(const std::filesystem::path& path); +std::string getTrophyKey(); +void setTrophyKey(std::string key); + bool isNeoMode(); bool isFullscreenMode(); bool getPlayBGM(); @@ -24,7 +27,6 @@ bool getEnableDiscordRPC(); bool getSeparateUpdateEnabled(); bool getCompatibilityEnabled(); bool getCheckCompatibilityOnStartup(); -std::string getAudioBackend(); std::string getLogFilter(); std::string getLogType(); @@ -77,7 +79,6 @@ void setSeparateUpdateEnabled(bool use); void setGameInstallDirs(const std::vector& settings_install_dirs_config); void setCompatibilityEnabled(bool use); void setCheckCompatibilityOnStartup(bool use); -void setAudioBackend(std::string backend); void setCursorState(s16 cursorState); void setCursorHideTimeout(int newcursorHideTimeout); diff --git a/src/common/elf_info.h b/src/common/elf_info.h index 5a2c914e0..6eb144e9a 100644 --- a/src/common/elf_info.h +++ b/src/common/elf_info.h @@ -34,6 +34,7 @@ public: static constexpr u32 FW_20 = 0x2000000; static constexpr u32 FW_25 = 0x2500000; static constexpr u32 FW_30 = 0x3000000; + static constexpr u32 FW_35 = 0x3500000; static constexpr u32 FW_40 = 0x4000000; static constexpr u32 FW_45 = 0x4500000; static constexpr u32 FW_50 = 0x5000000; diff --git a/src/common/native_clock.cpp b/src/common/native_clock.cpp index c3fa637aa..0c05dbe84 100644 --- a/src/common/native_clock.cpp +++ b/src/common/native_clock.cpp @@ -4,11 +4,6 @@ #include "common/native_clock.h" #include "common/rdtsc.h" #include "common/uint128.h" -#ifdef _WIN64 -#include -#else -#include -#endif namespace Common { @@ -34,10 +29,4 @@ u64 NativeClock::GetUptime() const { return FencedRDTSC(); } -u64 NativeClock::GetProcessTimeUS() const { - timespec ret; - clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ret); - return ret.tv_nsec / 1000 + ret.tv_sec * 1000000; -} - } // namespace Common diff --git a/src/common/native_clock.h b/src/common/native_clock.h index b5e389452..1542c2f3a 100644 --- a/src/common/native_clock.h +++ b/src/common/native_clock.h @@ -20,7 +20,6 @@ public: u64 GetTimeUS(u64 base_ptc = 0) const; u64 GetTimeMS(u64 base_ptc = 0) const; u64 GetUptime() const; - u64 GetProcessTimeUS() const; private: u64 rdtsc_frequency; diff --git a/src/common/ringbuffer.h b/src/common/ringbuffer.h deleted file mode 100644 index 6a71c2888..000000000 --- a/src/common/ringbuffer.h +++ /dev/null @@ -1,374 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2016 Mozilla Foundation -// SPDX-License-Identifier: ISC - -#pragma once - -#include -#include -#include -#include -#include -#include "common/assert.h" - -/** - * Single producer single consumer lock-free and wait-free ring buffer. - * - * This data structure allows producing data from one thread, and consuming it - * on another thread, safely and without explicit synchronization. If used on - * two threads, this data structure uses atomics for thread safety. It is - * possible to disable the use of atomics at compile time and only use this data - * structure on one thread. - * - * The role for the producer and the consumer must be constant, i.e., the - * producer should always be on one thread and the consumer should always be on - * another thread. - * - * Some words about the inner workings of this class: - * - Capacity is fixed. Only one allocation is performed, in the constructor. - * When reading and writing, the return value of the method allows checking if - * the ring buffer is empty or full. - * - We always keep the read index at least one element ahead of the write - * index, so we can distinguish between an empty and a full ring buffer: an - * empty ring buffer is when the write index is at the same position as the - * read index. A full buffer is when the write index is exactly one position - * before the read index. - * - We synchronize updates to the read index after having read the data, and - * the write index after having written the data. This means that the each - * thread can only touch a portion of the buffer that is not touched by the - * other thread. - * - Callers are expected to provide buffers. When writing to the queue, - * elements are copied into the internal storage from the buffer passed in. - * When reading from the queue, the user is expected to provide a buffer. - * Because this is a ring buffer, data might not be contiguous in memory, - * providing an external buffer to copy into is an easy way to have linear - * data for further processing. - */ -template -class RingBuffer { -public: - /** - * Constructor for a ring buffer. - * - * This performs an allocation, but is the only allocation that will happen - * for the life time of a `RingBuffer`. - * - * @param capacity The maximum number of element this ring buffer will hold. - */ - RingBuffer(int capacity) - /* One more element to distinguish from empty and full buffer. */ - : capacity_(capacity + 1) { - ASSERT(storage_capacity() < std::numeric_limits::max() / 2 && - "buffer too large for the type of index used."); - ASSERT(capacity_ > 0); - - data_.reset(new T[storage_capacity()]); - /* If this queue is using atomics, initializing those members as the last - * action in the constructor acts as a full barrier, and allow capacity() to - * be thread-safe. */ - write_index_ = 0; - read_index_ = 0; - } - /** - * Push `count` zero or default constructed elements in the array. - * - * Only safely called on the producer thread. - * - * @param count The number of elements to enqueue. - * @return The number of element enqueued. - */ - int enqueue_default(int count) { - return enqueue(nullptr, count); - } - /** - * @brief Put an element in the queue - * - * Only safely called on the producer thread. - * - * @param element The element to put in the queue. - * - * @return 1 if the element was inserted, 0 otherwise. - */ - int enqueue(T& element) { - return enqueue(&element, 1); - } - /** - * Push `count` elements in the ring buffer. - * - * Only safely called on the producer thread. - * - * @param elements a pointer to a buffer containing at least `count` elements. - * If `elements` is nullptr, zero or default constructed elements are - * enqueued. - * @param count The number of elements to read from `elements` - * @return The number of elements successfully coped from `elements` and - * inserted into the ring buffer. - */ - int enqueue(T* elements, int count) { -#ifndef NDEBUG - assert_correct_thread(producer_id); -#endif - - int wr_idx = write_index_.load(std::memory_order_relaxed); - int rd_idx = read_index_.load(std::memory_order_acquire); - - if (full_internal(rd_idx, wr_idx)) { - return 0; - } - - int to_write = std::min(available_write_internal(rd_idx, wr_idx), count); - - /* First part, from the write index to the end of the array. */ - int first_part = std::min(storage_capacity() - wr_idx, to_write); - /* Second part, from the beginning of the array */ - int second_part = to_write - first_part; - - if (elements) { - Copy(data_.get() + wr_idx, elements, first_part); - Copy(data_.get(), elements + first_part, second_part); - } else { - ConstructDefault(data_.get() + wr_idx, first_part); - ConstructDefault(data_.get(), second_part); - } - - write_index_.store(increment_index(wr_idx, to_write), std::memory_order_release); - - return to_write; - } - /** - * Retrieve at most `count` elements from the ring buffer, and copy them to - * `elements`, if non-null. - * - * Only safely called on the consumer side. - * - * @param elements A pointer to a buffer with space for at least `count` - * elements. If `elements` is `nullptr`, `count` element will be discarded. - * @param count The maximum number of elements to dequeue. - * @return The number of elements written to `elements`. - */ - int dequeue(T* elements, int count) { -#ifndef NDEBUG - assert_correct_thread(consumer_id); -#endif - - int rd_idx = read_index_.load(std::memory_order_relaxed); - int wr_idx = write_index_.load(std::memory_order_acquire); - - if (empty_internal(rd_idx, wr_idx)) { - return 0; - } - - int to_read = std::min(available_read_internal(rd_idx, wr_idx), count); - - int first_part = std::min(storage_capacity() - rd_idx, to_read); - int second_part = to_read - first_part; - - if (elements) { - Copy(elements, data_.get() + rd_idx, first_part); - Copy(elements + first_part, data_.get(), second_part); - } - - read_index_.store(increment_index(rd_idx, to_read), std::memory_order_release); - - return to_read; - } - /** - * Get the number of available element for consuming. - * - * Only safely called on the consumer thread. - * - * @return The number of available elements for reading. - */ - int available_read() const { -#ifndef NDEBUG - assert_correct_thread(consumer_id); -#endif - return available_read_internal(read_index_.load(std::memory_order_relaxed), - write_index_.load(std::memory_order_acquire)); - } - /** - * Get the number of available elements for consuming. - * - * Only safely called on the producer thread. - * - * @return The number of empty slots in the buffer, available for writing. - */ - int available_write() const { -#ifndef NDEBUG - assert_correct_thread(producer_id); -#endif - return available_write_internal(read_index_.load(std::memory_order_acquire), - write_index_.load(std::memory_order_relaxed)); - } - /** - * Get the total capacity, for this ring buffer. - * - * Can be called safely on any thread. - * - * @return The maximum capacity of this ring buffer. - */ - int capacity() const { - return storage_capacity() - 1; - } - /** - * Reset the consumer and producer thread identifier, in case the thread are - * being changed. This has to be externally synchronized. This is no-op when - * asserts are disabled. - */ - void reset_thread_ids() { -#ifndef NDEBUG - consumer_id = producer_id = std::thread::id(); -#endif - } - -private: - /** Return true if the ring buffer is empty. - * - * @param read_index the read index to consider - * @param write_index the write index to consider - * @return true if the ring buffer is empty, false otherwise. - **/ - bool empty_internal(int read_index, int write_index) const { - return write_index == read_index; - } - /** Return true if the ring buffer is full. - * - * This happens if the write index is exactly one element behind the read - * index. - * - * @param read_index the read index to consider - * @param write_index the write index to consider - * @return true if the ring buffer is full, false otherwise. - **/ - bool full_internal(int read_index, int write_index) const { - return (write_index + 1) % storage_capacity() == read_index; - } - /** - * Return the size of the storage. It is one more than the number of elements - * that can be stored in the buffer. - * - * @return the number of elements that can be stored in the buffer. - */ - int storage_capacity() const { - return capacity_; - } - /** - * Returns the number of elements available for reading. - * - * @return the number of available elements for reading. - */ - int available_read_internal(int read_index, int write_index) const { - if (write_index >= read_index) { - return write_index - read_index; - } else { - return write_index + storage_capacity() - read_index; - } - } - /** - * Returns the number of empty elements, available for writing. - * - * @return the number of elements that can be written into the array. - */ - int available_write_internal(int read_index, int write_index) const { - /* We substract one element here to always keep at least one sample - * free in the buffer, to distinguish between full and empty array. */ - int rv = read_index - write_index - 1; - if (write_index >= read_index) { - rv += storage_capacity(); - } - return rv; - } - /** - * Increments an index, wrapping it around the storage. - * - * @param index a reference to the index to increment. - * @param increment the number by which `index` is incremented. - * @return the new index. - */ - int increment_index(int index, int increment) const { - ASSERT(increment >= 0); - return (index + increment) % storage_capacity(); - } - /** - * @brief This allows checking that enqueue (resp. dequeue) are always called - * by the right thread. - * - * @param id the id of the thread that has called the calling method first. - */ -#ifndef NDEBUG - static void assert_correct_thread(std::thread::id& id) { - if (id == std::thread::id()) { - id = std::this_thread::get_id(); - return; - } - ASSERT(id == std::this_thread::get_id()); - } -#endif - /** Similar to memcpy, but accounts for the size of an element. */ - template - void PodCopy(CopyT* destination, const CopyT* source, size_t count) { - static_assert(std::is_trivial::value, "Requires trivial type"); - ASSERT(destination && source); - memcpy(destination, source, count * sizeof(CopyT)); - } - /** Similar to a memset to zero, but accounts for the size of an element. */ - template - void PodZero(ZeroT* destination, size_t count) { - static_assert(std::is_trivial::value, "Requires trivial type"); - ASSERT(destination); - memset(destination, 0, count * sizeof(ZeroT)); - } - template - void Copy(CopyT* destination, const CopyT* source, size_t count, Trait) { - for (size_t i = 0; i < count; i++) { - destination[i] = source[i]; - } - } - template - void Copy(CopyT* destination, const CopyT* source, size_t count, std::true_type) { - PodCopy(destination, source, count); - } - /** - * This allows copying a number of elements from a `source` pointer to a - * `destination` pointer, using `memcpy` if it is safe to do so, or a loop that - * calls the constructors and destructors otherwise. - */ - template - void Copy(CopyT* destination, const T* source, size_t count) { - ASSERT(destination && source); - Copy(destination, source, count, typename std::is_trivial::type()); - } - template - void ConstructDefault(ConstructT* destination, size_t count, Trait) { - for (size_t i = 0; i < count; i++) { - destination[i] = ConstructT(); - } - } - template - void ConstructDefault(ConstructT* destination, size_t count, std::true_type) { - PodZero(destination, count); - } - /** - * This allows zeroing (using memset) or default-constructing a number of - * elements calling the constructors and destructors if necessary. - */ - template - void ConstructDefault(ConstructT* destination, size_t count) { - ASSERT(destination); - ConstructDefault(destination, count, typename std::is_arithmetic::type()); - } - /** Index at which the oldest element is at, in samples. */ - std::atomic read_index_; - /** Index at which to write new elements. `write_index` is always at - * least one element ahead of `read_index_`. */ - std::atomic write_index_; - /** Maximum number of elements that can be stored in the ring buffer. */ - const int capacity_; - /** Data storage */ - std::unique_ptr data_; -#ifndef NDEBUG - /** The id of the only thread that is allowed to read from the queue. */ - mutable std::thread::id consumer_id; - /** The id of the only thread that is allowed to write from the queue. */ - mutable std::thread::id producer_id; -#endif -}; diff --git a/src/core/crypto/crypto.cpp b/src/core/crypto/crypto.cpp index 00f1dea46..4020edfd8 100644 --- a/src/core/crypto/crypto.cpp +++ b/src/core/crypto/crypto.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include + #include "crypto.h" CryptoPP::RSA::PrivateKey Crypto::key_pkg_derived_key3_keyset_init() { @@ -137,17 +138,20 @@ void Crypto::aesCbcCfb128DecryptEntry(std::span ivkey, } } -void Crypto::decryptEFSM(std::span NPcommID, +void Crypto::decryptEFSM(std::span trophyKey, + std::span NPcommID, std::span efsmIv, std::span ciphertext, std::span decrypted) { - std::vector TrophyIV = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; // step 1: Encrypt NPcommID CryptoPP::CBC_Mode::Encryption encrypt; + std::vector trophyIv(16, 0); std::vector trpKey(16); + encrypt.SetKeyWithIV(trophyKey.data(), trophyKey.size(), trophyIv.data()); encrypt.ProcessData(trpKey.data(), NPcommID.data(), 16); + // step 2: decrypt efsm. CryptoPP::CBC_Mode::Decryption decrypt; decrypt.SetKeyWithIV(trpKey.data(), trpKey.size(), efsmIv.data()); diff --git a/src/core/crypto/crypto.h b/src/core/crypto/crypto.h index 83249bd7d..b5d8104b5 100644 --- a/src/core/crypto/crypto.h +++ b/src/core/crypto/crypto.h @@ -32,7 +32,8 @@ public: void aesCbcCfb128DecryptEntry(std::span ivkey, std::span ciphertext, std::span decrypted); - void decryptEFSM(std::span, std::span efsmIv, + void decryptEFSM(std::span trophyKey, + std::span NPcommID, std::span efsmIv, std::span ciphertext, std::span decrypted); void PfsGenCryptoKey(std::span ekpfs, std::span seed, diff --git a/src/core/devtools/widget/reg_popup.cpp b/src/core/devtools/widget/reg_popup.cpp index 2727e1745..fae620901 100644 --- a/src/core/devtools/widget/reg_popup.cpp +++ b/src/core/devtools/widget/reg_popup.cpp @@ -66,7 +66,7 @@ void RegPopup::DrawColorBuffer(const AmdGpu::Liverpool::ColorBuffer& buffer) { "GetColorSliceSize()", buffer.GetColorSliceSize(), "GetTilingMode()", buffer.GetTilingMode(), "IsTiled()", buffer.IsTiled(), - "NumFormat()", buffer.NumFormat() + "NumFormat()", buffer.GetNumberFmt() ); // clang-format on diff --git a/src/core/file_format/trp.cpp b/src/core/file_format/trp.cpp index 2ca88c778..d25c93c3f 100644 --- a/src/core/file_format/trp.cpp +++ b/src/core/file_format/trp.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/config.h" #include "common/logging/log.h" #include "common/path_util.h" #include "trp.h" @@ -33,12 +34,29 @@ static void removePadding(std::vector& vec) { } } +static void hexToBytes(const char* hex, unsigned char* dst) { + for (size_t i = 0; hex[i] != 0; i++) { + const unsigned char value = (hex[i] < 0x3A) ? (hex[i] - 0x30) : (hex[i] - 0x37); + dst[i / 2] |= ((i % 2) == 0) ? (value << 4) : (value); + } +} + bool TRP::Extract(const std::filesystem::path& trophyPath, const std::string titleId) { std::filesystem::path gameSysDir = trophyPath / "sce_sys/trophy/"; if (!std::filesystem::exists(gameSysDir)) { LOG_CRITICAL(Common_Filesystem, "Game sce_sys directory doesn't exist"); return false; } + + const auto user_key_str = Config::getTrophyKey(); + if (user_key_str.size() != 32) { + LOG_CRITICAL(Common_Filesystem, "Trophy decryption key is not specified"); + return false; + } + + std::array user_key{}; + hexToBytes(user_key_str.c_str(), user_key.data()); + for (int index = 0; const auto& it : std::filesystem::directory_iterator(gameSysDir)) { if (it.is_regular_file()) { GetNPcommID(trophyPath, index); @@ -97,7 +115,7 @@ bool TRP::Extract(const std::filesystem::path& trophyPath, const std::string tit return false; } file.Read(ESFM); - crypto.decryptEFSM(np_comm_id, esfmIv, ESFM, XML); // decrypt + crypto.decryptEFSM(user_key, np_comm_id, esfmIv, ESFM, XML); // decrypt removePadding(XML); std::string xml_name = entry.entry_name; size_t pos = xml_name.find("ESFM"); diff --git a/src/core/libraries/ajm/ajm.cpp b/src/core/libraries/ajm/ajm.cpp index 3184fa64f..5c55d2c06 100644 --- a/src/core/libraries/ajm/ajm.cpp +++ b/src/core/libraries/ajm/ajm.cpp @@ -183,13 +183,15 @@ int PS4_SYSV_ABI sceAjmInstanceSwitch() { return ORBIS_OK; } -int PS4_SYSV_ABI sceAjmMemoryRegister() { - LOG_ERROR(Lib_Ajm, "(STUBBED) called"); +int PS4_SYSV_ABI sceAjmMemoryRegister(u32 context_id, void* ptr, size_t num_pages) { + // All memory is already shared with our implementation since we do not use any hardware. + LOG_TRACE(Lib_Ajm, "(STUBBED) called"); return ORBIS_OK; } -int PS4_SYSV_ABI sceAjmMemoryUnregister() { - LOG_ERROR(Lib_Ajm, "(STUBBED) called"); +int PS4_SYSV_ABI sceAjmMemoryUnregister(u32 context_id, void* ptr) { + // All memory is already shared with our implementation since we do not use any hardware. + LOG_TRACE(Lib_Ajm, "(STUBBED) called"); return ORBIS_OK; } diff --git a/src/core/libraries/ajm/ajm.h b/src/core/libraries/ajm/ajm.h index 1ac7c7629..34aeb9aa4 100644 --- a/src/core/libraries/ajm/ajm.h +++ b/src/core/libraries/ajm/ajm.h @@ -74,6 +74,26 @@ union AjmJobFlags { }; }; +enum class AjmStatisticsFlags : u64 { + Memory = 1 << 0, + EnginePerCodec = 1 << 15, + Engine = 1 << 16, +}; +DECLARE_ENUM_FLAG_OPERATORS(AjmStatisticsFlags) + +union AjmStatisticsJobFlags { + AjmStatisticsJobFlags(AjmJobFlags job_flags) : raw(job_flags.raw) {} + + u64 raw; + struct { + u64 version : 3; + u64 : 12; + AjmStatisticsFlags statistics_flags : 17; + u64 : 32; + }; +}; +static_assert(sizeof(AjmStatisticsJobFlags) == 8); + struct AjmSidebandResult { s32 result; s32 internal_result; @@ -126,6 +146,31 @@ union AjmSidebandInitParameters { u8 reserved[8]; }; +struct AjmSidebandStatisticsEngine { + float usage_batch; + float usage_interval[3]; +}; + +struct AjmSidebandStatisticsEnginePerCodec { + u8 codec_count; + u8 codec_id[3]; + float codec_percentage[3]; +}; + +struct AjmSidebandStatisticsMemory { + u32 instance_free; + u32 buffer_free; + u32 batch_size; + u32 input_size; + u32 output_size; + u32 small_size; +}; + +struct AjmSidebandStatisticsEngineParameters { + u32 interval_count; + float interval[3]; +}; + union AjmInstanceFlags { u64 raw; struct { @@ -178,8 +223,8 @@ int PS4_SYSV_ABI sceAjmInstanceCreate(u32 context, AjmCodecType codec_type, AjmI int PS4_SYSV_ABI sceAjmInstanceDestroy(u32 context, u32 instance); int PS4_SYSV_ABI sceAjmInstanceExtend(); int PS4_SYSV_ABI sceAjmInstanceSwitch(); -int PS4_SYSV_ABI sceAjmMemoryRegister(); -int PS4_SYSV_ABI sceAjmMemoryUnregister(); +int PS4_SYSV_ABI sceAjmMemoryRegister(u32 context_id, void* ptr, size_t num_pages); +int PS4_SYSV_ABI sceAjmMemoryUnregister(u32 context_id, void* ptr); int PS4_SYSV_ABI sceAjmModuleRegister(u32 context, AjmCodecType codec_type, s64 reserved); int PS4_SYSV_ABI sceAjmModuleUnregister(); int PS4_SYSV_ABI sceAjmStrError(); diff --git a/src/core/libraries/ajm/ajm_batch.cpp b/src/core/libraries/ajm/ajm_batch.cpp index b1cec88b3..30e1deb71 100644 --- a/src/core/libraries/ajm/ajm_batch.cpp +++ b/src/core/libraries/ajm/ajm_batch.cpp @@ -54,6 +54,8 @@ public: : m_p_begin(begin), m_p_current(m_p_begin), m_size(size) {} AjmBatchBuffer(std::span data) : m_p_begin(data.data()), m_p_current(m_p_begin), m_size(data.size()) {} + AjmBatchBuffer(AjmChunkBuffer& buffer) + : AjmBatchBuffer(reinterpret_cast(buffer.p_address), buffer.size) {} AjmBatchBuffer SubBuffer(size_t size = s_dynamic_extent) { auto current = m_p_current; @@ -113,6 +115,88 @@ private: size_t m_size{}; }; +AjmJob AjmStatisticsJobFromBatchBuffer(u32 instance_id, AjmBatchBuffer batch_buffer) { + std::optional job_flags = {}; + std::optional input_control_buffer = {}; + std::optional output_control_buffer = {}; + + AjmJob job; + job.instance_id = instance_id; + + while (!batch_buffer.IsEmpty()) { + auto& header = batch_buffer.Peek(); + switch (header.ident) { + case Identifier::AjmIdentInputControlBuf: { + ASSERT_MSG(!input_control_buffer.has_value(), + "Only one instance of input control buffer is allowed per job"); + const auto& buffer = batch_buffer.Consume(); + if (buffer.p_address != nullptr && buffer.size != 0) { + input_control_buffer = buffer; + } + break; + } + case Identifier::AjmIdentControlFlags: { + ASSERT_MSG(!job_flags.has_value(), "Only one instance of job flags is allowed per job"); + auto& chunk = batch_buffer.Consume(); + job_flags = AjmJobFlags{ + .raw = (u64(chunk.header.payload) << 32) + chunk.flags_low, + }; + break; + } + case Identifier::AjmIdentReturnAddressBuf: { + // Ignore return address buffers. + batch_buffer.Skip(); + break; + } + case Identifier::AjmIdentOutputControlBuf: { + ASSERT_MSG(!output_control_buffer.has_value(), + "Only one instance of output control buffer is allowed per job"); + const auto& buffer = batch_buffer.Consume(); + if (buffer.p_address != nullptr && buffer.size != 0) { + output_control_buffer = buffer; + } + break; + } + default: + UNREACHABLE_MSG("Unknown chunk: {}", header.ident); + } + } + + ASSERT(job_flags.has_value()); + job.flags = job_flags.value(); + + AjmStatisticsJobFlags flags(job.flags); + if (input_control_buffer.has_value()) { + AjmBatchBuffer input_batch(input_control_buffer.value()); + if (True(flags.statistics_flags & AjmStatisticsFlags::Engine)) { + job.input.statistics_engine_parameters = + input_batch.Consume(); + } + } + + if (output_control_buffer.has_value()) { + AjmBatchBuffer output_batch(output_control_buffer.value()); + job.output.p_result = &output_batch.Consume(); + *job.output.p_result = AjmSidebandResult{}; + + if (True(flags.statistics_flags & AjmStatisticsFlags::Engine)) { + job.output.p_engine = &output_batch.Consume(); + *job.output.p_engine = AjmSidebandStatisticsEngine{}; + } + if (True(flags.statistics_flags & AjmStatisticsFlags::EnginePerCodec)) { + job.output.p_engine_per_codec = + &output_batch.Consume(); + *job.output.p_engine_per_codec = AjmSidebandStatisticsEnginePerCodec{}; + } + if (True(flags.statistics_flags & AjmStatisticsFlags::Memory)) { + job.output.p_memory = &output_batch.Consume(); + *job.output.p_memory = AjmSidebandStatisticsMemory{}; + } + } + + return job; +} + AjmJob AjmJobFromBatchBuffer(u32 instance_id, AjmBatchBuffer batch_buffer) { std::optional job_flags = {}; std::optional input_control_buffer = {}; @@ -155,15 +239,6 @@ AjmJob AjmJobFromBatchBuffer(u32 instance_id, AjmBatchBuffer batch_buffer) { batch_buffer.Skip(); break; } - case Identifier::AjmIdentInlineBuf: { - ASSERT_MSG(!output_control_buffer.has_value(), - "Only one instance of inline buffer is allowed per job"); - const auto& buffer = batch_buffer.Consume(); - if (buffer.p_address != nullptr && buffer.size != 0) { - inline_buffer = buffer; - } - break; - } case Identifier::AjmIdentOutputRunBuf: { auto& buffer = batch_buffer.Consume(); u8* p_begin = reinterpret_cast(buffer.p_address); @@ -186,13 +261,12 @@ AjmJob AjmJobFromBatchBuffer(u32 instance_id, AjmBatchBuffer batch_buffer) { } } + ASSERT(job_flags.has_value()); job.flags = job_flags.value(); // Initialize sideband input parameters if (input_control_buffer.has_value()) { - AjmBatchBuffer input_batch(reinterpret_cast(input_control_buffer->p_address), - input_control_buffer->size); - + AjmBatchBuffer input_batch(input_control_buffer.value()); const auto sideband_flags = job_flags->sideband_flags; if (True(sideband_flags & AjmJobSidebandFlags::Format) && !input_batch.IsEmpty()) { job.input.format = input_batch.Consume(); @@ -202,6 +276,9 @@ AjmJob AjmJobFromBatchBuffer(u32 instance_id, AjmBatchBuffer batch_buffer) { } const auto control_flags = job_flags.value().control_flags; + if (True(control_flags & AjmJobControlFlags::Resample)) { + job.input.resample_parameters = input_batch.Consume(); + } if (True(control_flags & AjmJobControlFlags::Initialize)) { job.input.init_params = AjmDecAt9InitializeParameters{}; std::memcpy(&job.input.init_params.value(), input_batch.GetCurrent(), @@ -209,21 +286,9 @@ AjmJob AjmJobFromBatchBuffer(u32 instance_id, AjmBatchBuffer batch_buffer) { } } - if (inline_buffer.has_value()) { - AjmBatchBuffer inline_batch(reinterpret_cast(inline_buffer->p_address), - inline_buffer->size); - - const auto control_flags = job_flags.value().control_flags; - if (True(control_flags & AjmJobControlFlags::Resample)) { - job.input.resample_parameters = inline_batch.Consume(); - } - } - // Initialize sideband output parameters if (output_control_buffer.has_value()) { - AjmBatchBuffer output_batch(reinterpret_cast(output_control_buffer->p_address), - output_control_buffer->size); - + AjmBatchBuffer output_batch(output_control_buffer.value()); job.output.p_result = &output_batch.Consume(); *job.output.p_result = AjmSidebandResult{}; @@ -260,9 +325,21 @@ std::shared_ptr AjmBatch::FromBatchBuffer(std::span data) { AjmBatchBuffer buffer(data); while (!buffer.IsEmpty()) { auto& job_chunk = buffer.Consume(); + if (job_chunk.header.ident == AjmIdentInlineBuf) { + // Inline buffers are used to store sideband input data. + // We should just skip them as they do not require any special handling. + buffer.Advance(job_chunk.size); + continue; + } ASSERT(job_chunk.header.ident == AjmIdentJob); auto instance_id = job_chunk.header.payload; - batch->jobs.push_back(AjmJobFromBatchBuffer(instance_id, buffer.SubBuffer(job_chunk.size))); + if (instance_id == AJM_INSTANCE_STATISTICS) { + batch->jobs.push_back( + AjmStatisticsJobFromBatchBuffer(instance_id, buffer.SubBuffer(job_chunk.size))); + } else { + batch->jobs.push_back( + AjmJobFromBatchBuffer(instance_id, buffer.SubBuffer(job_chunk.size))); + } } return batch; diff --git a/src/core/libraries/ajm/ajm_batch.h b/src/core/libraries/ajm/ajm_batch.h index 3c586b773..09daa630d 100644 --- a/src/core/libraries/ajm/ajm_batch.h +++ b/src/core/libraries/ajm/ajm_batch.h @@ -23,6 +23,7 @@ struct AjmJob { struct Input { std::optional init_params; std::optional resample_parameters; + std::optional statistics_engine_parameters; std::optional format; std::optional gapless_decode; std::vector buffer; @@ -33,6 +34,9 @@ struct AjmJob { AjmSidebandResult* p_result = nullptr; AjmSidebandStream* p_stream = nullptr; AjmSidebandFormat* p_format = nullptr; + AjmSidebandStatisticsMemory* p_memory = nullptr; + AjmSidebandStatisticsEnginePerCodec* p_engine_per_codec = nullptr; + AjmSidebandStatisticsEngine* p_engine = nullptr; AjmSidebandGaplessDecode* p_gapless_decode = nullptr; AjmSidebandMFrame* p_mframe = nullptr; u8* p_codec_info = nullptr; diff --git a/src/core/libraries/ajm/ajm_context.cpp b/src/core/libraries/ajm/ajm_context.cpp index 09255110c..8992dd83b 100644 --- a/src/core/libraries/ajm/ajm_context.cpp +++ b/src/core/libraries/ajm/ajm_context.cpp @@ -9,6 +9,7 @@ #include "core/libraries/ajm/ajm_context.h" #include "core/libraries/ajm/ajm_error.h" #include "core/libraries/ajm/ajm_instance.h" +#include "core/libraries/ajm/ajm_instance_statistics.h" #include "core/libraries/ajm/ajm_mp3.h" #include "core/libraries/error_codes.h" @@ -70,15 +71,19 @@ void AjmContext::ProcessBatch(u32 id, std::span jobs) { LOG_TRACE(Lib_Ajm, "Processing job {} for instance {}. flags = {:#x}", id, job.instance_id, job.flags.raw); - std::shared_ptr instance; - { - std::shared_lock lock(instances_mutex); - auto* p_instance = instances.Get(job.instance_id); - ASSERT_MSG(p_instance != nullptr, "Attempting to execute job on null instance"); - instance = *p_instance; - } + if (job.instance_id == AJM_INSTANCE_STATISTICS) { + AjmInstanceStatistics::Getinstance().ExecuteJob(job); + } else { + std::shared_ptr instance; + { + std::shared_lock lock(instances_mutex); + auto* p_instance = instances.Get(job.instance_id); + ASSERT_MSG(p_instance != nullptr, "Attempting to execute job on null instance"); + instance = *p_instance; + } - instance->ExecuteJob(job); + instance->ExecuteJob(job); + } } } diff --git a/src/core/libraries/ajm/ajm_instance.cpp b/src/core/libraries/ajm/ajm_instance.cpp index ea7fd5617..8af105c77 100644 --- a/src/core/libraries/ajm/ajm_instance.cpp +++ b/src/core/libraries/ajm/ajm_instance.cpp @@ -68,11 +68,11 @@ void AjmInstance::ExecuteJob(AjmJob& job) { m_codec->Initialize(¶ms, sizeof(params)); } if (job.input.resample_parameters.has_value()) { - UNREACHABLE_MSG("Unimplemented: resample parameters"); + LOG_ERROR(Lib_Ajm, "Unimplemented: resample parameters"); m_resample_parameters = job.input.resample_parameters.value(); } if (job.input.format.has_value()) { - UNREACHABLE_MSG("Unimplemented: format parameters"); + LOG_ERROR(Lib_Ajm, "Unimplemented: format parameters"); m_format = job.input.format.value(); } if (job.input.gapless_decode.has_value()) { diff --git a/src/core/libraries/ajm/ajm_instance_statistics.cpp b/src/core/libraries/ajm/ajm_instance_statistics.cpp new file mode 100644 index 000000000..c0c1af8bb --- /dev/null +++ b/src/core/libraries/ajm/ajm_instance_statistics.cpp @@ -0,0 +1,37 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "core/libraries/ajm/ajm.h" +#include "core/libraries/ajm/ajm_instance_statistics.h" + +namespace Libraries::Ajm { + +void AjmInstanceStatistics::ExecuteJob(AjmJob& job) { + if (job.output.p_engine) { + job.output.p_engine->usage_batch = 0.01; + const auto ic = job.input.statistics_engine_parameters->interval_count; + for (u32 idx = 0; idx < ic; ++idx) { + job.output.p_engine->usage_interval[idx] = 0.01; + } + } + if (job.output.p_engine_per_codec) { + job.output.p_engine_per_codec->codec_count = 1; + job.output.p_engine_per_codec->codec_id[0] = static_cast(AjmCodecType::At9Dec); + job.output.p_engine_per_codec->codec_percentage[0] = 0.01; + } + if (job.output.p_memory) { + job.output.p_memory->instance_free = 0x400000; + job.output.p_memory->buffer_free = 0x400000; + job.output.p_memory->batch_size = 0x4200; + job.output.p_memory->input_size = 0x2000; + job.output.p_memory->output_size = 0x2000; + job.output.p_memory->small_size = 0x200; + } +} + +AjmInstanceStatistics& AjmInstanceStatistics::Getinstance() { + static AjmInstanceStatistics instance; + return instance; +} + +} // namespace Libraries::Ajm diff --git a/src/core/libraries/ajm/ajm_instance_statistics.h b/src/core/libraries/ajm/ajm_instance_statistics.h new file mode 100644 index 000000000..ea70c9d56 --- /dev/null +++ b/src/core/libraries/ajm/ajm_instance_statistics.h @@ -0,0 +1,17 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "core/libraries/ajm/ajm_batch.h" + +namespace Libraries::Ajm { + +class AjmInstanceStatistics { +public: + void ExecuteJob(AjmJob& job); + + static AjmInstanceStatistics& Getinstance(); +}; + +} // namespace Libraries::Ajm diff --git a/src/core/libraries/audio/audioout.cpp b/src/core/libraries/audio/audioout.cpp index 89ea1d3f5..f0ad59c3b 100644 --- a/src/core/libraries/audio/audioout.cpp +++ b/src/core/libraries/audio/audioout.cpp @@ -3,12 +3,14 @@ #include #include -#include +#include +#include #include #include "common/assert.h" #include "common/config.h" #include "common/logging/log.h" +#include "common/thread.h" #include "core/libraries/audio/audioout.h" #include "core/libraries/audio/audioout_backend.h" #include "core/libraries/audio/audioout_error.h" @@ -16,116 +18,33 @@ namespace Libraries::AudioOut { -std::shared_mutex ports_mutex; +std::mutex port_open_mutex{}; std::array ports_out{}; static std::unique_ptr audio; -static std::string_view GetAudioOutPort(OrbisAudioOutPort port) { - switch (port) { - case OrbisAudioOutPort::Main: - return "MAIN"; - case OrbisAudioOutPort::Bgm: - return "BGM"; - case OrbisAudioOutPort::Voice: - return "VOICE"; - case OrbisAudioOutPort::Personal: - return "PERSONAL"; - case OrbisAudioOutPort::Padspk: - return "PADSPK"; - case OrbisAudioOutPort::Aux: - return "AUX"; - default: - return "INVALID"; - } -} - -static std::string_view GetAudioOutParamFormat(OrbisAudioOutParamFormat param) { - switch (param) { - case OrbisAudioOutParamFormat::S16Mono: - return "S16_MONO"; - case OrbisAudioOutParamFormat::S16Stereo: - return "S16_STEREO"; - case OrbisAudioOutParamFormat::S16_8CH: - return "S16_8CH"; - case OrbisAudioOutParamFormat::FloatMono: - return "FLOAT_MONO"; - case OrbisAudioOutParamFormat::FloatStereo: - return "FLOAT_STEREO"; - case OrbisAudioOutParamFormat::Float_8CH: - return "FLOAT_8CH"; - case OrbisAudioOutParamFormat::S16_8CH_Std: - return "S16_8CH_STD"; - case OrbisAudioOutParamFormat::Float_8CH_Std: - return "FLOAT_8CH_STD"; - default: - return "INVALID"; - } -} - -static std::string_view GetAudioOutParamAttr(OrbisAudioOutParamAttr attr) { - switch (attr) { - case OrbisAudioOutParamAttr::None: - return "NONE"; - case OrbisAudioOutParamAttr::Restricted: - return "RESTRICTED"; - case OrbisAudioOutParamAttr::MixToMain: - return "MIX_TO_MAIN"; - default: - return "INVALID"; - } -} - -static bool IsFormatFloat(const OrbisAudioOutParamFormat format) { - switch (format) { - case OrbisAudioOutParamFormat::S16Mono: - case OrbisAudioOutParamFormat::S16Stereo: - case OrbisAudioOutParamFormat::S16_8CH: - case OrbisAudioOutParamFormat::S16_8CH_Std: - return false; - case OrbisAudioOutParamFormat::FloatMono: - case OrbisAudioOutParamFormat::FloatStereo: - case OrbisAudioOutParamFormat::Float_8CH: - case OrbisAudioOutParamFormat::Float_8CH_Std: - return true; - default: - UNREACHABLE_MSG("Unknown format"); - } -} - -static u8 GetFormatNumChannels(const OrbisAudioOutParamFormat format) { - switch (format) { - case OrbisAudioOutParamFormat::S16Mono: - case OrbisAudioOutParamFormat::FloatMono: - return 1; - case OrbisAudioOutParamFormat::S16Stereo: - case OrbisAudioOutParamFormat::FloatStereo: - return 2; - case OrbisAudioOutParamFormat::S16_8CH: - case OrbisAudioOutParamFormat::Float_8CH: - case OrbisAudioOutParamFormat::S16_8CH_Std: - case OrbisAudioOutParamFormat::Float_8CH_Std: - return 8; - default: - UNREACHABLE_MSG("Unknown format"); - } -} - -static u8 GetFormatSampleSize(const OrbisAudioOutParamFormat format) { - switch (format) { - case OrbisAudioOutParamFormat::S16Mono: - case OrbisAudioOutParamFormat::S16Stereo: - case OrbisAudioOutParamFormat::S16_8CH: - case OrbisAudioOutParamFormat::S16_8CH_Std: - return 2; - case OrbisAudioOutParamFormat::FloatMono: - case OrbisAudioOutParamFormat::FloatStereo: - case OrbisAudioOutParamFormat::Float_8CH: - case OrbisAudioOutParamFormat::Float_8CH_Std: - return 4; - default: - UNREACHABLE_MSG("Unknown format"); - } +static AudioFormatInfo GetFormatInfo(const OrbisAudioOutParamFormat format) { + static constexpr std::array format_infos = {{ + // S16Mono + {false, 2, 1, {0}}, + // S16Stereo + {false, 2, 2, {0, 1}}, + // S16_8CH + {false, 2, 8, {0, 1, 2, 3, 4, 5, 6, 7}}, + // FloatMono + {true, 4, 1, {0}}, + // FloatStereo + {true, 4, 2, {0, 1}}, + // Float_8CH + {true, 4, 8, {0, 1, 2, 3, 4, 5, 6, 7}}, + // S16_8CH_Std + {false, 2, 8, {0, 1, 2, 3, 6, 7, 4, 5}}, + // Float_8CH_Std + {true, 4, 8, {0, 1, 2, 3, 6, 7, 4, 5}}, + }}; + const auto index = static_cast(format); + ASSERT_MSG(index < format_infos.size(), "Unknown audio format {}", index); + return format_infos[index]; } int PS4_SYSV_ABI sceAudioOutDeviceIdOpen() { @@ -174,13 +93,20 @@ int PS4_SYSV_ABI sceAudioOutClose(s32 handle) { return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; } - std::scoped_lock lock(ports_mutex); + std::unique_lock open_lock{port_open_mutex}; auto& port = ports_out.at(handle - 1); - if (!port.impl) { - return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; + { + std::unique_lock lock{port.mutex}; + if (!port.IsOpen()) { + return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; + } + std::free(port.output_buffer); + port.output_buffer = nullptr; + port.output_ready = false; + port.impl = nullptr; } - - port.impl = nullptr; + // Stop outside of port lock scope to prevent deadlocks. + port.output_thread.Stop(); return ORBIS_OK; } @@ -249,35 +175,34 @@ int PS4_SYSV_ABI sceAudioOutGetPortState(s32 handle, OrbisAudioOutPortState* sta return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; } - std::scoped_lock lock(ports_mutex); - const auto& port = ports_out.at(handle - 1); - if (!port.impl) { - return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; + auto& port = ports_out.at(handle - 1); + { + std::unique_lock lock{port.mutex}; + if (!port.IsOpen()) { + return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; + } + switch (port.type) { + case OrbisAudioOutPort::Main: + case OrbisAudioOutPort::Bgm: + case OrbisAudioOutPort::Voice: + state->output = 1; + state->channel = port.format_info.num_channels > 2 ? 2 : port.format_info.num_channels; + break; + case OrbisAudioOutPort::Personal: + case OrbisAudioOutPort::Padspk: + state->output = 4; + state->channel = 1; + break; + case OrbisAudioOutPort::Aux: + state->output = 0; + state->channel = 0; + break; + default: + UNREACHABLE(); + } + state->rerouteCounter = 0; + state->volume = 127; } - - state->rerouteCounter = 0; - state->volume = 127; - - switch (port.type) { - case OrbisAudioOutPort::Main: - case OrbisAudioOutPort::Bgm: - case OrbisAudioOutPort::Voice: - state->output = 1; - state->channel = port.channels_num > 2 ? 2 : port.channels_num; - break; - case OrbisAudioOutPort::Personal: - case OrbisAudioOutPort::Padspk: - state->output = 4; - state->channel = 1; - break; - case OrbisAudioOutPort::Aux: - state->output = 0; - state->channel = 0; - break; - default: - UNREACHABLE(); - } - return ORBIS_OK; } @@ -311,16 +236,7 @@ int PS4_SYSV_ABI sceAudioOutInit() { if (audio != nullptr) { return ORBIS_AUDIO_OUT_ERROR_ALREADY_INIT; } - const auto backend = Config::getAudioBackend(); - if (backend == "cubeb") { - audio = std::make_unique(); - } else if (backend == "sdl") { - audio = std::make_unique(); - } else { - // Cubeb as a default fallback. - LOG_ERROR(Lib_AudioOut, "Invalid audio backend '{}', defaulting to cubeb.", backend); - audio = std::make_unique(); - } + audio = std::make_unique(); return ORBIS_OK; } @@ -354,6 +270,31 @@ int PS4_SYSV_ABI sceAudioOutMbusInit() { return ORBIS_OK; } +static void AudioOutputThread(PortOut* port, const std::stop_token& stop) { + { + const auto thread_name = fmt::format("shadPS4:AudioOutputThread:{}", fmt::ptr(port)); + Common::SetCurrentThreadName(thread_name.c_str()); + } + + Common::AccurateTimer timer( + std::chrono::nanoseconds(1000000000ULL * port->buffer_frames / port->sample_rate)); + while (true) { + timer.Start(); + { + std::unique_lock lock{port->mutex}; + if (port->output_cv.wait(lock, stop, [&] { return port->output_ready; })) { + port->impl->Output(port->output_buffer); + port->output_ready = false; + } + } + port->output_cv.notify_one(); + if (stop.stop_requested()) { + break; + } + timer.End(); + } +} + s32 PS4_SYSV_ABI sceAudioOutOpen(UserService::OrbisUserServiceUserId user_id, OrbisAudioOutPort port_type, s32 index, u32 length, u32 sample_rate, @@ -361,9 +302,9 @@ s32 PS4_SYSV_ABI sceAudioOutOpen(UserService::OrbisUserServiceUserId user_id, LOG_INFO(Lib_AudioOut, "id = {} port_type = {} index = {} length = {} sample_rate = {} " "param_type = {} attr = {}", - user_id, GetAudioOutPort(port_type), index, length, sample_rate, - GetAudioOutParamFormat(param_type.data_format), - GetAudioOutParamAttr(param_type.attributes)); + user_id, magic_enum::enum_name(port_type), index, length, sample_rate, + magic_enum::enum_name(param_type.data_format.Value()), + magic_enum::enum_name(param_type.attributes.Value())); if ((port_type < OrbisAudioOutPort::Main || port_type > OrbisAudioOutPort::Padspk) && (port_type != OrbisAudioOutPort::Aux)) { LOG_ERROR(Lib_AudioOut, "Invalid port type"); @@ -394,26 +335,30 @@ s32 PS4_SYSV_ABI sceAudioOutOpen(UserService::OrbisUserServiceUserId user_id, return ORBIS_AUDIO_OUT_ERROR_INVALID_FORMAT; } - std::scoped_lock lock{ports_mutex}; + std::unique_lock open_lock{port_open_mutex}; const auto port = - std::ranges::find_if(ports_out, [&](const PortOut& p) { return p.impl == nullptr; }); + std::ranges::find_if(ports_out, [&](const PortOut& p) { return !p.IsOpen(); }); if (port == ports_out.end()) { LOG_ERROR(Lib_AudioOut, "Audio ports are full"); return ORBIS_AUDIO_OUT_ERROR_PORT_FULL; } - port->type = port_type; - port->format = format; - port->is_float = IsFormatFloat(format); - port->sample_size = GetFormatSampleSize(format); - port->channels_num = GetFormatNumChannels(format); - port->samples_num = length; - port->frame_size = port->sample_size * port->channels_num; - port->buffer_size = port->frame_size * port->samples_num; - port->freq = sample_rate; - port->volume.fill(SCE_AUDIO_OUT_VOLUME_0DB); - port->impl = audio->Open(*port); + { + std::unique_lock port_lock(port->mutex); + port->type = port_type; + port->format_info = GetFormatInfo(format); + port->sample_rate = sample_rate; + port->buffer_frames = length; + port->volume.fill(SCE_AUDIO_OUT_VOLUME_0DB); + + port->impl = audio->Open(*port); + + port->output_buffer = std::malloc(port->BufferSize()); + port->output_ready = false; + port->output_thread.Run( + [port](const std::stop_token& stop) { AudioOutputThread(&*port, stop); }); + } return std::distance(ports_out.begin(), port) + 1; } @@ -426,24 +371,29 @@ s32 PS4_SYSV_ABI sceAudioOutOutput(s32 handle, void* ptr) { if (handle < 1 || handle > SCE_AUDIO_OUT_NUM_PORTS) { return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; } - if (ptr == nullptr) { - // Nothing to output - return ORBIS_OK; - } auto& port = ports_out.at(handle - 1); - if (!port.impl) { - return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; + { + std::unique_lock lock{port.mutex}; + if (!port.IsOpen()) { + return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; + } + port.output_cv.wait(lock, [&] { return !port.output_ready; }); + if (ptr != nullptr && port.IsOpen()) { + std::memcpy(port.output_buffer, ptr, port.BufferSize()); + port.output_ready = true; + } } - - port.impl->Output(ptr, port.buffer_size); + port.output_cv.notify_one(); return ORBIS_OK; } int PS4_SYSV_ABI sceAudioOutOutputs(OrbisAudioOutOutputParam* param, u32 num) { for (u32 i = 0; i < num; i++) { - if (const auto err = sceAudioOutOutput(param[i].handle, param[i].ptr); err != 0) - return err; + const auto [handle, ptr] = param[i]; + if (const auto ret = sceAudioOutOutput(handle, ptr); ret != ORBIS_OK) { + return ret; + } } return ORBIS_OK; } @@ -543,40 +493,19 @@ s32 PS4_SYSV_ABI sceAudioOutSetVolume(s32 handle, s32 flag, s32* vol) { return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; } - std::scoped_lock lock(ports_mutex); auto& port = ports_out.at(handle - 1); - if (!port.impl) { - return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; - } - - for (int i = 0; i < port.channels_num; i++, flag >>= 1u) { - auto bit = flag & 0x1u; - if (bit == 1) { - int src_index = i; - if (port.format == OrbisAudioOutParamFormat::Float_8CH_Std || - port.format == OrbisAudioOutParamFormat::S16_8CH_Std) { - switch (i) { - case 4: - src_index = 6; - break; - case 5: - src_index = 7; - break; - case 6: - src_index = 4; - break; - case 7: - src_index = 5; - break; - default: - break; - } - } - port.volume[i] = vol[src_index]; + { + std::unique_lock lock{port.mutex}; + if (!port.IsOpen()) { + return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; } + for (int i = 0; i < port.format_info.num_channels; i++, flag >>= 1u) { + if (flag & 0x1u) { + port.volume[i] = vol[i]; + } + } + port.impl->SetVolume(port.volume); } - - port.impl->SetVolume(port.volume); return ORBIS_OK; } diff --git a/src/core/libraries/audio/audioout.h b/src/core/libraries/audio/audioout.h index 58c77db99..5eafb43a1 100644 --- a/src/core/libraries/audio/audioout.h +++ b/src/core/libraries/audio/audioout.h @@ -3,9 +3,12 @@ #pragma once +#include #include +#include #include "common/bit_field.h" +#include "core/libraries/kernel/threads.h" #include "core/libraries/system/userservice.h" namespace Libraries::AudioOut { @@ -14,12 +17,12 @@ class PortBackend; // Main up to 8 ports, BGM 1 port, voice up to 4 ports, // personal up to 4 ports, padspk up to 5 ports, aux 1 port -constexpr int SCE_AUDIO_OUT_NUM_PORTS = 22; -constexpr int SCE_AUDIO_OUT_VOLUME_0DB = 32768; // max volume value +constexpr s32 SCE_AUDIO_OUT_NUM_PORTS = 22; +constexpr s32 SCE_AUDIO_OUT_VOLUME_0DB = 32768; // max volume value enum class OrbisAudioOutPort { Main = 0, Bgm = 1, Voice = 2, Personal = 3, Padspk = 4, Aux = 127 }; -enum class OrbisAudioOutParamFormat { +enum class OrbisAudioOutParamFormat : u32 { S16Mono = 0, S16Stereo = 1, S16_8CH = 2, @@ -30,7 +33,7 @@ enum class OrbisAudioOutParamFormat { Float_8CH_Std = 7 }; -enum class OrbisAudioOutParamAttr { +enum class OrbisAudioOutParamAttr : u32 { None = 0, Restricted = 1, MixToMain = 2, @@ -59,19 +62,41 @@ struct OrbisAudioOutPortState { u64 reserved64[2]; }; -struct PortOut { - std::unique_ptr impl{}; - - OrbisAudioOutPort type; - OrbisAudioOutParamFormat format; +struct AudioFormatInfo { bool is_float; u8 sample_size; - u8 channels_num; - u32 samples_num; - u32 frame_size; - u32 buffer_size; - u32 freq; - std::array volume; + u8 num_channels; + /// Layout array remapping channel indices, specified in this order: + /// FL, FR, FC, LFE, BL, BR, SL, SR + std::array channel_layout; + + [[nodiscard]] u16 FrameSize() const { + return sample_size * num_channels; + } +}; + +struct PortOut { + std::mutex mutex; + std::unique_ptr impl{}; + + void* output_buffer; + std::condition_variable_any output_cv; + bool output_ready; + Kernel::Thread output_thread{}; + + OrbisAudioOutPort type; + AudioFormatInfo format_info; + u32 sample_rate; + u32 buffer_frames; + std::array volume; + + [[nodiscard]] bool IsOpen() const { + return impl != nullptr; + } + + [[nodiscard]] u32 BufferSize() const { + return buffer_frames * format_info.FrameSize(); + } }; int PS4_SYSV_ABI sceAudioOutDeviceIdOpen(); diff --git a/src/core/libraries/audio/audioout_backend.h b/src/core/libraries/audio/audioout_backend.h index ecc4cf7c6..0f36f19c8 100644 --- a/src/core/libraries/audio/audioout_backend.h +++ b/src/core/libraries/audio/audioout_backend.h @@ -3,8 +3,6 @@ #pragma once -typedef struct cubeb cubeb; - namespace Libraries::AudioOut { struct PortOut; @@ -13,7 +11,10 @@ class PortBackend { public: virtual ~PortBackend() = default; - virtual void Output(void* ptr, size_t size) = 0; + /// Guaranteed to be called in intervals of at least port buffer time, + /// with size equal to port buffer size. + virtual void Output(void* ptr) = 0; + virtual void SetVolume(const std::array& ch_volumes) = 0; }; @@ -25,17 +26,6 @@ public: virtual std::unique_ptr Open(PortOut& port) = 0; }; -class CubebAudioOut final : public AudioOutBackend { -public: - CubebAudioOut(); - ~CubebAudioOut() override; - - std::unique_ptr Open(PortOut& port) override; - -private: - cubeb* ctx = nullptr; -}; - class SDLAudioOut final : public AudioOutBackend { public: std::unique_ptr Open(PortOut& port) override; diff --git a/src/core/libraries/audio/cubeb_audio.cpp b/src/core/libraries/audio/cubeb_audio.cpp deleted file mode 100644 index e1195558a..000000000 --- a/src/core/libraries/audio/cubeb_audio.cpp +++ /dev/null @@ -1,163 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include -#include -#include - -#include "common/logging/log.h" -#include "common/ringbuffer.h" -#include "core/libraries/audio/audioout.h" -#include "core/libraries/audio/audioout_backend.h" - -namespace Libraries::AudioOut { - -constexpr int AUDIO_STREAM_BUFFER_THRESHOLD = 65536; // Define constant for buffer threshold - -class CubebPortBackend : public PortBackend { -public: - CubebPortBackend(cubeb* ctx, const PortOut& port) - : frame_size(port.frame_size), buffer(static_cast(port.buffer_size) * 4) { - if (!ctx) { - return; - } - const auto get_channel_layout = [&port] -> cubeb_channel_layout { - switch (port.channels_num) { - case 1: - return CUBEB_LAYOUT_MONO; - case 2: - return CUBEB_LAYOUT_STEREO; - case 8: - return CUBEB_LAYOUT_3F4_LFE; - default: - UNREACHABLE(); - } - }; - cubeb_stream_params stream_params = { - .format = port.is_float ? CUBEB_SAMPLE_FLOAT32LE : CUBEB_SAMPLE_S16LE, - .rate = port.freq, - .channels = port.channels_num, - .layout = get_channel_layout(), - .prefs = CUBEB_STREAM_PREF_NONE, - }; - u32 latency_frames = 512; - if (const auto ret = cubeb_get_min_latency(ctx, &stream_params, &latency_frames); - ret != CUBEB_OK) { - LOG_WARNING(Lib_AudioOut, - "Could not get minimum cubeb audio latency, falling back to default: {}", - ret); - } - char stream_name[64]; - snprintf(stream_name, sizeof(stream_name), "shadPS4 stream %p", this); - if (const auto ret = cubeb_stream_init(ctx, &stream, stream_name, nullptr, nullptr, nullptr, - &stream_params, latency_frames, &DataCallback, - &StateCallback, this); - ret != CUBEB_OK) { - LOG_ERROR(Lib_AudioOut, "Failed to create cubeb stream: {}", ret); - return; - } - if (const auto ret = cubeb_stream_start(stream); ret != CUBEB_OK) { - LOG_ERROR(Lib_AudioOut, "Failed to start cubeb stream: {}", ret); - cubeb_stream_destroy(stream); - stream = nullptr; - return; - } - } - - ~CubebPortBackend() override { - if (!stream) { - return; - } - if (const auto ret = cubeb_stream_stop(stream); ret != CUBEB_OK) { - LOG_WARNING(Lib_AudioOut, "Failed to stop cubeb stream: {}", ret); - } - cubeb_stream_destroy(stream); - stream = nullptr; - } - - void Output(void* ptr, size_t size) override { - if (!stream) { - return; - } - auto* data = static_cast(ptr); - - std::unique_lock lock{buffer_mutex}; - buffer_cv.wait(lock, [&] { return buffer.available_write() >= size; }); - buffer.enqueue(data, static_cast(size)); - } - - void SetVolume(const std::array& ch_volumes) override { - if (!stream) { - return; - } - // Cubeb does not have per-channel volumes, for now just take the maximum of the channels. - const auto vol = *std::ranges::max_element(ch_volumes); - if (const auto ret = - cubeb_stream_set_volume(stream, static_cast(vol) / SCE_AUDIO_OUT_VOLUME_0DB); - ret != CUBEB_OK) { - LOG_WARNING(Lib_AudioOut, "Failed to change cubeb stream volume: {}", ret); - } - } - -private: - static long DataCallback(cubeb_stream* stream, void* user_data, const void* in, void* out, - long num_frames) { - auto* stream_data = static_cast(user_data); - const auto out_data = static_cast(out); - const auto requested_size = static_cast(num_frames * stream_data->frame_size); - - std::unique_lock lock{stream_data->buffer_mutex}; - const auto dequeued_size = stream_data->buffer.dequeue(out_data, requested_size); - lock.unlock(); - stream_data->buffer_cv.notify_one(); - - if (dequeued_size < requested_size) { - // Need to fill remaining space with silence. - std::memset(out_data + dequeued_size, 0, requested_size - dequeued_size); - } - return num_frames; - } - - static void StateCallback(cubeb_stream* stream, void* user_data, cubeb_state state) { - switch (state) { - case CUBEB_STATE_STARTED: - LOG_INFO(Lib_AudioOut, "Cubeb stream started"); - break; - case CUBEB_STATE_STOPPED: - LOG_INFO(Lib_AudioOut, "Cubeb stream stopped"); - break; - case CUBEB_STATE_DRAINED: - LOG_INFO(Lib_AudioOut, "Cubeb stream drained"); - break; - case CUBEB_STATE_ERROR: - LOG_ERROR(Lib_AudioOut, "Cubeb stream encountered an error"); - break; - } - } - - size_t frame_size; - RingBuffer buffer; - std::mutex buffer_mutex; - std::condition_variable buffer_cv; - cubeb_stream* stream{}; -}; - -CubebAudioOut::CubebAudioOut() { - if (const auto ret = cubeb_init(&ctx, "shadPS4", nullptr); ret != CUBEB_OK) { - LOG_CRITICAL(Lib_AudioOut, "Failed to create cubeb context: {}", ret); - } -} - -CubebAudioOut::~CubebAudioOut() { - if (!ctx) { - return; - } - cubeb_destroy(ctx); - ctx = nullptr; -} - -std::unique_ptr CubebAudioOut::Open(PortOut& port) { - return std::make_unique(ctx, port); -} - -} // namespace Libraries::AudioOut diff --git a/src/core/libraries/audio/sdl_audio.cpp b/src/core/libraries/audio/sdl_audio.cpp index 598941ba7..762a9f682 100644 --- a/src/core/libraries/audio/sdl_audio.cpp +++ b/src/core/libraries/audio/sdl_audio.cpp @@ -3,6 +3,7 @@ #include #include +#include #include "common/logging/log.h" #include "core/libraries/audio/audioout.h" @@ -10,15 +11,21 @@ namespace Libraries::AudioOut { -constexpr int AUDIO_STREAM_BUFFER_THRESHOLD = 65536; // Define constant for buffer threshold - class SDLPortBackend : public PortBackend { public: - explicit SDLPortBackend(const PortOut& port) { + explicit SDLPortBackend(const PortOut& port) + : frame_size(port.format_info.FrameSize()), guest_buffer_size(port.BufferSize()) { + // We want the latency for delivering frames out to be as small as possible, + // so set the sample frames hint to the number of frames per buffer. + const auto samples_num_str = std::to_string(port.buffer_frames); + if (!SDL_SetHint(SDL_HINT_AUDIO_DEVICE_SAMPLE_FRAMES, samples_num_str.c_str())) { + LOG_WARNING(Lib_AudioOut, "Failed to set SDL audio sample frames hint to {}: {}", + samples_num_str, SDL_GetError()); + } const SDL_AudioSpec fmt = { - .format = port.is_float ? SDL_AUDIO_F32 : SDL_AUDIO_S16, - .channels = port.channels_num, - .freq = static_cast(port.freq), + .format = port.format_info.is_float ? SDL_AUDIO_F32LE : SDL_AUDIO_S16LE, + .channels = port.format_info.num_channels, + .freq = static_cast(port.sample_rate), }; stream = SDL_OpenAudioDeviceStream(SDL_AUDIO_DEVICE_DEFAULT_PLAYBACK, &fmt, nullptr, nullptr); @@ -26,6 +33,15 @@ public: LOG_ERROR(Lib_AudioOut, "Failed to create SDL audio stream: {}", SDL_GetError()); return; } + CalculateQueueThreshold(); + if (!SDL_SetAudioStreamInputChannelMap(stream, port.format_info.channel_layout.data(), + port.format_info.num_channels)) { + LOG_ERROR(Lib_AudioOut, "Failed to configure SDL audio stream channel map: {}", + SDL_GetError()); + SDL_DestroyAudioStream(stream); + stream = nullptr; + return; + } if (!SDL_ResumeAudioStreamDevice(stream)) { LOG_ERROR(Lib_AudioOut, "Failed to resume SDL audio stream: {}", SDL_GetError()); SDL_DestroyAudioStream(stream); @@ -42,14 +58,23 @@ public: stream = nullptr; } - void Output(void* ptr, size_t size) override { + void Output(void* ptr) override { if (!stream) { return; } - SDL_PutAudioStreamData(stream, ptr, static_cast(size)); - while (SDL_GetAudioStreamAvailable(stream) > AUDIO_STREAM_BUFFER_THRESHOLD) { - // Yield to allow the stream to drain. - std::this_thread::yield(); + // AudioOut library manages timing, but we still need to guard against the SDL + // audio queue stalling, which may happen during device changes, for example. + // Otherwise, latency may grow over time unbounded. + if (const auto queued = SDL_GetAudioStreamQueued(stream); queued >= queue_threshold) { + LOG_WARNING(Lib_AudioOut, + "SDL audio queue backed up ({} queued, {} threshold), clearing.", queued, + queue_threshold); + SDL_ClearAudioStream(stream); + // Recalculate the threshold in case this happened because of a device change. + CalculateQueueThreshold(); + } + if (!SDL_PutAudioStreamData(stream, ptr, static_cast(guest_buffer_size))) { + LOG_ERROR(Lib_AudioOut, "Failed to output to SDL audio stream: {}", SDL_GetError()); } } @@ -66,7 +91,31 @@ public: } private: - SDL_AudioStream* stream; + void CalculateQueueThreshold() { + SDL_AudioSpec discard; + int sdl_buffer_frames; + if (!SDL_GetAudioDeviceFormat(SDL_GetAudioStreamDevice(stream), &discard, + &sdl_buffer_frames)) { + LOG_WARNING(Lib_AudioOut, "Failed to get SDL audio stream buffer size: {}", + SDL_GetError()); + sdl_buffer_frames = 0; + } + const auto sdl_buffer_size = sdl_buffer_frames * frame_size; + const auto new_threshold = std::max(guest_buffer_size, sdl_buffer_size) * 4; + if (host_buffer_size != sdl_buffer_size || queue_threshold != new_threshold) { + host_buffer_size = sdl_buffer_size; + queue_threshold = new_threshold; + LOG_INFO(Lib_AudioOut, + "SDL audio buffers: guest = {} bytes, host = {} bytes, threshold = {} bytes", + guest_buffer_size, host_buffer_size, queue_threshold); + } + } + + u32 frame_size; + u32 guest_buffer_size; + u32 host_buffer_size{}; + u32 queue_threshold{}; + SDL_AudioStream* stream{}; }; std::unique_ptr SDLAudioOut::Open(PortOut& port) { diff --git a/src/core/libraries/fiber/fiber.cpp b/src/core/libraries/fiber/fiber.cpp index 7bb81b61e..b77b5b5b6 100644 --- a/src/core/libraries/fiber/fiber.cpp +++ b/src/core/libraries/fiber/fiber.cpp @@ -3,6 +3,7 @@ #include "fiber.h" +#include "common/elf_info.h" #include "common/logging/log.h" #include "core/libraries/fiber/fiber_error.h" #include "core/libraries/libs.h" @@ -41,6 +42,41 @@ void PS4_SYSV_ABI _sceFiberCheckStackOverflow(OrbisFiberContext* ctx) { } } +s32 PS4_SYSV_ABI _sceFiberAttachContext(OrbisFiber* fiber, void* addr_context, u64 size_context) { + if (size_context && size_context < ORBIS_FIBER_CONTEXT_MINIMUM_SIZE) { + return ORBIS_FIBER_ERROR_RANGE; + } + if (size_context & 15) { + return ORBIS_FIBER_ERROR_INVALID; + } + if (!addr_context || !size_context) { + return ORBIS_FIBER_ERROR_INVALID; + } + if (fiber->addr_context) { + return ORBIS_FIBER_ERROR_INVALID; + } + + fiber->addr_context = addr_context; + fiber->size_context = size_context; + fiber->context_start = addr_context; + fiber->context_end = reinterpret_cast(addr_context) + size_context; + + /* Apply signature to start of stack */ + *(u64*)addr_context = kFiberStackSignature; + + if (fiber->flags & FiberFlags::ContextSizeCheck) { + u64* stack_start = reinterpret_cast(fiber->context_start); + u64* stack_end = reinterpret_cast(fiber->context_end); + + u64* stack_ptr = stack_start + 1; + while (stack_ptr < stack_end) { + *stack_ptr++ = kFiberStackSizeCheck; + } + } + + return ORBIS_OK; +} + void PS4_SYSV_ABI _sceFiberSwitchToFiber(OrbisFiber* fiber, u64 arg_on_run_to, OrbisFiberContext* ctx) { OrbisFiberContext* fiber_ctx = fiber->context; @@ -62,8 +98,7 @@ void PS4_SYSV_ABI _sceFiberSwitchToFiber(OrbisFiber* fiber, u64 arg_on_run_to, data.entry = fiber->entry; data.arg_on_initialize = fiber->arg_on_initialize; data.arg_on_run_to = arg_on_run_to; - data.stack_addr = - reinterpret_cast(reinterpret_cast(fiber->addr_context) + fiber->size_context); + data.stack_addr = reinterpret_cast(fiber->addr_context) + fiber->size_context; if (fiber->flags & FiberFlags::SetFpuRegs) { data.fpucw = 0x037f; data.mxcsr = 0x9fc0; @@ -111,9 +146,10 @@ void PS4_SYSV_ABI _sceFiberTerminate(OrbisFiber* fiber, u64 arg_on_return, Orbis __builtin_trap(); } -s32 PS4_SYSV_ABI sceFiberInitialize(OrbisFiber* fiber, const char* name, OrbisFiberEntry entry, - u64 arg_on_initialize, void* addr_context, u64 size_context, - const OrbisFiberOptParam* opt_param, u32 build_ver) { +s32 PS4_SYSV_ABI sceFiberInitializeImpl(OrbisFiber* fiber, const char* name, OrbisFiberEntry entry, + u64 arg_on_initialize, void* addr_context, u64 size_context, + const OrbisFiberOptParam* opt_param, u32 flags, + u32 build_ver) { if (!fiber || !name || !entry) { return ORBIS_FIBER_ERROR_NULL; } @@ -139,12 +175,12 @@ s32 PS4_SYSV_ABI sceFiberInitialize(OrbisFiber* fiber, const char* name, OrbisFi return ORBIS_FIBER_ERROR_INVALID; } - u32 flags = FiberFlags::None; - if (build_ver >= 0x3500000) { - flags |= FiberFlags::SetFpuRegs; + u32 user_flags = flags; + if (build_ver >= Common::ElfInfo::FW_35) { + user_flags |= FiberFlags::SetFpuRegs; } if (context_size_check) { - flags |= FiberFlags::ContextSizeCheck; + user_flags |= FiberFlags::ContextSizeCheck; } strncpy(fiber->name, name, ORBIS_FIBER_MAX_NAME_LENGTH); @@ -154,7 +190,7 @@ s32 PS4_SYSV_ABI sceFiberInitialize(OrbisFiber* fiber, const char* name, OrbisFi fiber->addr_context = addr_context; fiber->size_context = size_context; fiber->context = nullptr; - fiber->flags = flags; + fiber->flags = user_flags; /* A low stack area is problematic, as we can easily @@ -169,8 +205,7 @@ s32 PS4_SYSV_ABI sceFiberInitialize(OrbisFiber* fiber, const char* name, OrbisFi if (addr_context != nullptr) { fiber->context_start = addr_context; - fiber->context_end = - reinterpret_cast(reinterpret_cast(addr_context) + size_context); + fiber->context_end = reinterpret_cast(addr_context) + size_context; /* Apply signature to start of stack */ *(u64*)addr_context = kFiberStackSignature; @@ -221,11 +256,12 @@ s32 PS4_SYSV_ABI sceFiberFinalize(OrbisFiber* fiber) { return ORBIS_OK; } -s32 PS4_SYSV_ABI sceFiberRun(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_return) { +s32 PS4_SYSV_ABI sceFiberRunImpl(OrbisFiber* fiber, void* addr_context, u64 size_context, + u64 arg_on_run_to, u64* arg_on_return) { if (!fiber) { return ORBIS_FIBER_ERROR_NULL; } - if ((u64)fiber & 7) { + if ((u64)fiber & 7 || (u64)addr_context & 15) { return ORBIS_FIBER_ERROR_ALIGNMENT; } if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) { @@ -237,6 +273,14 @@ s32 PS4_SYSV_ABI sceFiberRun(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_r return ORBIS_FIBER_ERROR_PERMISSION; } + /* Caller wants to attach context and run. */ + if (addr_context != nullptr || size_context != 0) { + s32 res = _sceFiberAttachContext(fiber, addr_context, size_context); + if (res < 0) { + return res; + } + } + FiberState expected = FiberState::Idle; if (!fiber->state.compare_exchange_strong(expected, FiberState::Run)) { return ORBIS_FIBER_ERROR_STATE; @@ -288,11 +332,12 @@ s32 PS4_SYSV_ABI sceFiberRun(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_r return ORBIS_OK; } -s32 PS4_SYSV_ABI sceFiberSwitch(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_run) { +s32 PS4_SYSV_ABI sceFiberSwitchImpl(OrbisFiber* fiber, void* addr_context, u64 size_context, + u64 arg_on_run_to, u64* arg_on_run) { if (!fiber) { return ORBIS_FIBER_ERROR_NULL; } - if ((u64)fiber & 7) { + if ((u64)fiber & 7 || (u64)addr_context & 15) { return ORBIS_FIBER_ERROR_ALIGNMENT; } if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) { @@ -304,6 +349,14 @@ s32 PS4_SYSV_ABI sceFiberSwitch(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_o return ORBIS_FIBER_ERROR_PERMISSION; } + /* Caller wants to attach context and switch. */ + if (addr_context != nullptr || size_context != 0) { + s32 res = _sceFiberAttachContext(fiber, addr_context, size_context); + if (res < 0) { + return res; + } + } + FiberState expected = FiberState::Idle; if (!fiber->state.compare_exchange_strong(expected, FiberState::Run)) { return ORBIS_FIBER_ERROR_STATE; @@ -462,9 +515,39 @@ s32 PS4_SYSV_ABI sceFiberRename(OrbisFiber* fiber, const char* name) { return ORBIS_OK; } +s32 PS4_SYSV_ABI sceFiberGetThreadFramePointerAddress(u64* addr_frame_pointer) { + if (!addr_frame_pointer) { + return ORBIS_FIBER_ERROR_NULL; + } + + OrbisFiberContext* g_ctx = GetFiberContext(); + if (!g_ctx) { + return ORBIS_FIBER_ERROR_PERMISSION; + } + + *addr_frame_pointer = g_ctx->rbp; + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceFiberInitialize(OrbisFiber* fiber, const char* name, OrbisFiberEntry entry, + u64 arg_on_initialize, void* addr_context, u64 size_context, + const OrbisFiberOptParam* opt_param, u32 build_ver) { + return sceFiberInitializeImpl(fiber, name, entry, arg_on_initialize, addr_context, size_context, + opt_param, 0, build_ver); +} + +s32 PS4_SYSV_ABI sceFiberRun(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_return) { + return sceFiberRunImpl(fiber, nullptr, 0, arg_on_run_to, arg_on_return); +} + +s32 PS4_SYSV_ABI sceFiberSwitch(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_run) { + return sceFiberSwitchImpl(fiber, nullptr, 0, arg_on_run_to, arg_on_run); +} + void RegisterlibSceFiber(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("hVYD7Ou2pCQ", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberInitialize); - LIB_FUNCTION("7+OJIpko9RY", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberInitialize); + LIB_FUNCTION("7+OJIpko9RY", "libSceFiber", 1, "libSceFiber", 1, 1, + sceFiberInitializeImpl); // _sceFiberInitializeWithInternalOptionImpl LIB_FUNCTION("asjUJJ+aa8s", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberOptParamInitialize); LIB_FUNCTION("JeNX5F-NzQU", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberFinalize); @@ -473,12 +556,20 @@ void RegisterlibSceFiber(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("p+zLIOg27zU", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberGetSelf); LIB_FUNCTION("B0ZX2hx9DMw", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberReturnToThread); + LIB_FUNCTION("avfGJ94g36Q", "libSceFiber", 1, "libSceFiber", 1, 1, + sceFiberRunImpl); // _sceFiberAttachContextAndRun + LIB_FUNCTION("ZqhZFuzKT6U", "libSceFiber", 1, "libSceFiber", 1, 1, + sceFiberSwitchImpl); // _sceFiberAttachContextAndSwitch + LIB_FUNCTION("uq2Y5BFz0PE", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberGetInfo); LIB_FUNCTION("Lcqty+QNWFc", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberStartContextSizeCheck); LIB_FUNCTION("Kj4nXMpnM8Y", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberStopContextSizeCheck); LIB_FUNCTION("JzyT91ucGDc", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberRename); + + LIB_FUNCTION("0dy4JtMUcMQ", "libSceFiber", 1, "libSceFiber", 1, 1, + sceFiberGetThreadFramePointerAddress); } } // namespace Libraries::Fiber diff --git a/src/core/libraries/fiber/fiber.h b/src/core/libraries/fiber/fiber.h index 3c4e3b70e..edcd9afe8 100644 --- a/src/core/libraries/fiber/fiber.h +++ b/src/core/libraries/fiber/fiber.h @@ -114,5 +114,7 @@ s32 PS4_SYSV_ABI sceFiberStopContextSizeCheck(void); s32 PS4_SYSV_ABI sceFiberRename(OrbisFiber* fiber, const char* name); +s32 PS4_SYSV_ABI sceFiberGetThreadFramePointerAddress(u64* addr_frame_pointer); + void RegisterlibSceFiber(Core::Loader::SymbolsResolver* sym); } // namespace Libraries::Fiber \ No newline at end of file diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 91a1329e5..805c9124e 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -1015,11 +1015,7 @@ int PS4_SYSV_ABI sceGnmGetDebugTimestamp() { int PS4_SYSV_ABI sceGnmGetEqEventType(const SceKernelEvent* ev) { LOG_TRACE(Lib_GnmDriver, "called"); - - auto data = sceKernelGetEventData(ev); - ASSERT(static_cast(data) == GnmEventType::GfxEop); - - return data; + return sceKernelGetEventData(ev); } int PS4_SYSV_ABI sceGnmGetEqTimeStamp() { diff --git a/src/core/libraries/kernel/equeue.cpp b/src/core/libraries/kernel/equeue.cpp index 3ae77e46b..64d4966c0 100644 --- a/src/core/libraries/kernel/equeue.cpp +++ b/src/core/libraries/kernel/equeue.cpp @@ -283,6 +283,19 @@ s32 PS4_SYSV_ABI sceKernelAddHRTimerEvent(SceKernelEqueue eq, int id, timespec* return ORBIS_OK; } +int PS4_SYSV_ABI sceKernelDeleteHRTimerEvent(SceKernelEqueue eq, int id) { + if (eq == nullptr) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + if (eq->HasSmallTimer()) { + return eq->RemoveSmallTimer(id) ? ORBIS_OK : ORBIS_KERNEL_ERROR_ENOENT; + } else { + return eq->RemoveEvent(id, SceKernelEvent::Filter::HrTimer) ? ORBIS_OK + : ORBIS_KERNEL_ERROR_ENOENT; + } +} + int PS4_SYSV_ABI sceKernelAddUserEvent(SceKernelEqueue eq, int id) { if (eq == nullptr) { return ORBIS_KERNEL_ERROR_EBADF; @@ -346,7 +359,7 @@ int PS4_SYSV_ABI sceKernelDeleteUserEvent(SceKernelEqueue eq, int id) { return ORBIS_OK; } -s16 PS4_SYSV_ABI sceKernelGetEventFilter(const SceKernelEvent* ev) { +int PS4_SYSV_ABI sceKernelGetEventFilter(const SceKernelEvent* ev) { return ev->filter; } @@ -362,6 +375,7 @@ void RegisterEventQueue(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("4R6-OvI2cEA", "libkernel", 1, "libkernel", 1, 1, sceKernelAddUserEvent); LIB_FUNCTION("WDszmSbWuDk", "libkernel", 1, "libkernel", 1, 1, sceKernelAddUserEventEdge); LIB_FUNCTION("R74tt43xP6k", "libkernel", 1, "libkernel", 1, 1, sceKernelAddHRTimerEvent); + LIB_FUNCTION("J+LF6LwObXU", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteHRTimerEvent); LIB_FUNCTION("F6e0kwo4cnk", "libkernel", 1, "libkernel", 1, 1, sceKernelTriggerUserEvent); LIB_FUNCTION("LJDwdSNTnDg", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteUserEvent); LIB_FUNCTION("mJ7aghmgvfc", "libkernel", 1, "libkernel", 1, 1, sceKernelGetEventId); diff --git a/src/core/libraries/kernel/equeue.h b/src/core/libraries/kernel/equeue.h index f8759137c..2db5e6ca7 100644 --- a/src/core/libraries/kernel/equeue.h +++ b/src/core/libraries/kernel/equeue.h @@ -111,6 +111,13 @@ public: bool HasSmallTimer() const { return small_timer_event.event.data != 0; } + bool RemoveSmallTimer(u64 id) { + if (HasSmallTimer() && small_timer_event.event.ident == id) { + small_timer_event = {}; + return true; + } + return false; + } int WaitForSmallTimer(SceKernelEvent* ev, int num, u32 micros); diff --git a/src/core/libraries/kernel/memory.cpp b/src/core/libraries/kernel/memory.cpp index 7d326cbbf..8deefb496 100644 --- a/src/core/libraries/kernel/memory.cpp +++ b/src/core/libraries/kernel/memory.cpp @@ -505,6 +505,41 @@ int PS4_SYSV_ABI posix_munmap(void* addr, size_t len) { return result; } +static constexpr int MAX_PRT_APERTURES = 3; +static constexpr VAddr PRT_AREA_START_ADDR = 0x1000000000; +static constexpr size_t PRT_AREA_SIZE = 0xec00000000; +static std::array, MAX_PRT_APERTURES> PrtApertures{}; + +int PS4_SYSV_ABI sceKernelSetPrtAperture(int id, VAddr address, size_t size) { + if (id < 0 || id >= MAX_PRT_APERTURES) { + return ORBIS_KERNEL_ERROR_EINVAL; + } + + if (address < PRT_AREA_START_ADDR || address + size > PRT_AREA_START_ADDR + PRT_AREA_SIZE) { + return ORBIS_KERNEL_ERROR_EINVAL; + } + + if (address % 4096 != 0) { + return ORBIS_KERNEL_ERROR_EINVAL; + } + + LOG_WARNING(Kernel_Vmm, + "PRT aperture id = {}, address = {:#x}, size = {:#x} is set but not used", id, + address, size); + + PrtApertures[id] = {address, size}; + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceKernelGetPrtAperture(int id, VAddr* address, size_t* size) { + if (id < 0 || id >= MAX_PRT_APERTURES) { + return ORBIS_KERNEL_ERROR_EINVAL; + } + + std::tie(*address, *size) = PrtApertures[id]; + return ORBIS_OK; +} + void RegisterMemory(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("rTXw65xmLIA", "libkernel", 1, "libkernel", 1, 1, sceKernelAllocateDirectMemory); LIB_FUNCTION("B+vc2AO2Zrc", "libkernel", 1, "libkernel", 1, 1, @@ -551,6 +586,10 @@ void RegisterMemory(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("BPE9s9vQQXo", "libScePosix", 1, "libkernel", 1, 1, posix_mmap); LIB_FUNCTION("UqDGjXA5yUM", "libkernel", 1, "libkernel", 1, 1, posix_munmap); LIB_FUNCTION("UqDGjXA5yUM", "libScePosix", 1, "libkernel", 1, 1, posix_munmap); + + // PRT memory management + LIB_FUNCTION("BohYr-F7-is", "libkernel", 1, "libkernel", 1, 1, sceKernelSetPrtAperture); + LIB_FUNCTION("L0v2Go5jOuM", "libkernel", 1, "libkernel", 1, 1, sceKernelGetPrtAperture); } } // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/threads.h b/src/core/libraries/kernel/threads.h index ad1393599..409136968 100644 --- a/src/core/libraries/kernel/threads.h +++ b/src/core/libraries/kernel/threads.h @@ -55,6 +55,9 @@ public: stop.request_stop(); Join(); } + thread = nullptr; + func = nullptr; + stop = std::stop_source{}; } static void* PS4_SYSV_ABI RunWrapper(void* arg) { diff --git a/src/core/libraries/kernel/threads/pthread.cpp b/src/core/libraries/kernel/threads/pthread.cpp index 761d13346..e81207a0d 100644 --- a/src/core/libraries/kernel/threads/pthread.cpp +++ b/src/core/libraries/kernel/threads/pthread.cpp @@ -244,10 +244,9 @@ int PS4_SYSV_ABI posix_pthread_create_name_np(PthreadT* thread, const PthreadAtt new_thread->tid = ++TidCounter; if (new_thread->attr.stackaddr_attr == 0) { - /* Enforce minimum stack size of 128 KB */ - static constexpr size_t MinimumStack = 128_KB; - auto& stacksize = new_thread->attr.stacksize_attr; - stacksize = std::max(stacksize, MinimumStack); + /* Add additional stack space for HLE */ + static constexpr size_t AdditionalStack = 128_KB; + new_thread->attr.stacksize_attr += AdditionalStack; } if (thread_state->CreateStack(&new_thread->attr) != 0) { diff --git a/src/core/libraries/libs.cpp b/src/core/libraries/libs.cpp index c30c2d7c3..49cd54a5b 100644 --- a/src/core/libraries/libs.cpp +++ b/src/core/libraries/libs.cpp @@ -47,6 +47,8 @@ #include "core/libraries/videodec/videodec.h" #include "core/libraries/videodec/videodec2.h" #include "core/libraries/videoout/video_out.h" +#include "fiber/fiber.h" +#include "jpeg/jpegenc.h" namespace Libraries { @@ -93,6 +95,8 @@ void InitHLELibs(Core::Loader::SymbolsResolver* sym) { Libraries::Videodec::RegisterlibSceVideodec(sym); Libraries::RazorCpu::RegisterlibSceRazorCpu(sym); Libraries::Move::RegisterlibSceMove(sym); + Libraries::Fiber::RegisterlibSceFiber(sym); + Libraries::JpegEnc::RegisterlibSceJpegEnc(sym); } } // namespace Libraries diff --git a/src/core/libraries/np_manager/np_manager.cpp b/src/core/libraries/np_manager/np_manager.cpp index ec9cc6bf5..87d752c69 100644 --- a/src/core/libraries/np_manager/np_manager.cpp +++ b/src/core/libraries/np_manager/np_manager.cpp @@ -972,11 +972,8 @@ int PS4_SYSV_ABI sceNpGetGamePresenceStatusA() { } int PS4_SYSV_ABI sceNpGetNpId(OrbisUserServiceUserId user_id, OrbisNpId* np_id) { - LOG_INFO(Lib_NpManager, "user_id {}", user_id); - const auto name = Config::getUserName(); - std::memset(np_id, 0, sizeof(OrbisNpId)); - name.copy(np_id->handle.data, sizeof(np_id->handle.data)); - return ORBIS_OK; + LOG_DEBUG(Lib_NpManager, "user_id {}", user_id); + return ORBIS_NP_ERROR_SIGNED_OUT; } int PS4_SYSV_ABI sceNpGetNpReachabilityState() { @@ -986,10 +983,7 @@ int PS4_SYSV_ABI sceNpGetNpReachabilityState() { int PS4_SYSV_ABI sceNpGetOnlineId(s32 user_id, OrbisNpOnlineId* online_id) { LOG_DEBUG(Lib_NpManager, "user_id {}", user_id); - const auto name = Config::getUserName(); - std::memset(online_id, 0, sizeof(OrbisNpOnlineId)); - name.copy(online_id->data, sizeof(online_id->data)); - return ORBIS_OK; + return ORBIS_NP_ERROR_SIGNED_OUT; } int PS4_SYSV_ABI sceNpGetParentalControlInfo() { diff --git a/src/core/libraries/pad/pad.cpp b/src/core/libraries/pad/pad.cpp index 98f086dd9..7eb628a90 100644 --- a/src/core/libraries/pad/pad.cpp +++ b/src/core/libraries/pad/pad.cpp @@ -286,6 +286,7 @@ int PS4_SYSV_ABI scePadOutputReport() { } int PS4_SYSV_ABI scePadRead(s32 handle, OrbisPadData* pData, s32 num) { + LOG_TRACE(Lib_Pad, "called"); int connected_count = 0; bool connected = false; Input::State states[64]; @@ -304,16 +305,15 @@ int PS4_SYSV_ABI scePadRead(s32 handle, OrbisPadData* pData, s32 num) { pData[i].rightStick.y = states[i].axes[static_cast(Input::Axis::RightY)]; pData[i].analogButtons.l2 = states[i].axes[static_cast(Input::Axis::TriggerLeft)]; pData[i].analogButtons.r2 = states[i].axes[static_cast(Input::Axis::TriggerRight)]; - pData[i].orientation.x = 0.0f; - pData[i].orientation.y = 0.0f; - pData[i].orientation.z = 0.0f; - pData[i].orientation.w = 1.0f; - pData[i].acceleration.x = 0.0f; - pData[i].acceleration.y = 0.0f; - pData[i].acceleration.z = 0.0f; - pData[i].angularVelocity.x = 0.0f; - pData[i].angularVelocity.y = 0.0f; - pData[i].angularVelocity.z = 0.0f; + pData[i].acceleration.x = states[i].acceleration.x; + pData[i].acceleration.y = states[i].acceleration.y; + pData[i].acceleration.z = states[i].acceleration.z; + pData[i].angularVelocity.x = states[i].angularVelocity.x; + pData[i].angularVelocity.y = states[i].angularVelocity.y; + pData[i].angularVelocity.z = states[i].angularVelocity.z; + Input::GameController::CalculateOrientation(pData[i].acceleration, pData[i].angularVelocity, + 1.0f / controller->accel_poll_rate, + pData[i].orientation); pData[i].touchData.touchNum = (states[i].touchpad[0].state ? 1 : 0) + (states[i].touchpad[1].state ? 1 : 0); pData[i].touchData.touch[0].x = states[i].touchpad[0].x; @@ -352,6 +352,7 @@ int PS4_SYSV_ABI scePadReadHistory() { } int PS4_SYSV_ABI scePadReadState(s32 handle, OrbisPadData* pData) { + LOG_TRACE(Lib_Pad, "called"); if (handle == ORBIS_PAD_ERROR_DEVICE_NO_HANDLE) { return ORBIS_PAD_ERROR_INVALID_HANDLE; } @@ -367,16 +368,15 @@ int PS4_SYSV_ABI scePadReadState(s32 handle, OrbisPadData* pData) { pData->rightStick.y = state.axes[static_cast(Input::Axis::RightY)]; pData->analogButtons.l2 = state.axes[static_cast(Input::Axis::TriggerLeft)]; pData->analogButtons.r2 = state.axes[static_cast(Input::Axis::TriggerRight)]; - pData->orientation.x = 0; - pData->orientation.y = 0; - pData->orientation.z = 0; - pData->orientation.w = 1; - pData->acceleration.x = 0.0f; - pData->acceleration.y = 0.0f; - pData->acceleration.z = 0.0f; - pData->angularVelocity.x = 0.0f; - pData->angularVelocity.y = 0.0f; - pData->angularVelocity.z = 0.0f; + pData->acceleration.x = state.acceleration.x; + pData->acceleration.y = state.acceleration.y; + pData->acceleration.z = state.acceleration.z; + pData->angularVelocity.x = state.angularVelocity.x; + pData->angularVelocity.y = state.angularVelocity.y; + pData->angularVelocity.z = state.angularVelocity.z; + Input::GameController::CalculateOrientation(pData->acceleration, pData->angularVelocity, + 1.0f / controller->accel_poll_rate, + pData->orientation); pData->touchData.touchNum = (state.touchpad[0].state ? 1 : 0) + (state.touchpad[1].state ? 1 : 0); pData->touchData.touch[0].x = state.touchpad[0].x; @@ -498,6 +498,8 @@ int PS4_SYSV_ABI scePadSetLoginUserNumber() { int PS4_SYSV_ABI scePadSetMotionSensorState(s32 handle, bool bEnable) { LOG_ERROR(Lib_Pad, "(STUBBED) called"); return ORBIS_OK; + // it's already handled by the SDL backend and will be on no matter what + // (assuming the controller supports it) } int PS4_SYSV_ABI scePadSetProcessFocus() { diff --git a/src/core/libraries/save_data/savedata.cpp b/src/core/libraries/save_data/savedata.cpp index 66899fb34..b573ded1e 100644 --- a/src/core/libraries/save_data/savedata.cpp +++ b/src/core/libraries/save_data/savedata.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include #include @@ -1139,10 +1140,6 @@ Error PS4_SYSV_ABI sceSaveDataGetSaveDataMemory2(OrbisSaveDataMemoryGet2* getPar LOG_INFO(Lib_SaveData, "called without save memory initialized"); return Error::MEMORY_NOT_READY; } - if (SaveMemory::IsSaving()) { - LOG_TRACE(Lib_SaveData, "called while saving"); - return Error::BUSY_FOR_SAVING; - } LOG_DEBUG(Lib_SaveData, "called"); auto data = getParam->data; if (data != nullptr) { @@ -1502,8 +1499,14 @@ Error PS4_SYSV_ABI sceSaveDataSetSaveDataMemory2(const OrbisSaveDataMemorySet2* return Error::MEMORY_NOT_READY; } if (SaveMemory::IsSaving()) { - LOG_TRACE(Lib_SaveData, "called while saving"); - return Error::BUSY_FOR_SAVING; + int count = 0; + while (++count < 100 && SaveMemory::IsSaving()) { // try for more 10 seconds + std::this_thread::sleep_for(chrono::milliseconds(100)); + } + if (SaveMemory::IsSaving()) { + LOG_TRACE(Lib_SaveData, "called while saving"); + return Error::BUSY_FOR_SAVING; + } } LOG_DEBUG(Lib_SaveData, "called"); auto data = setParam->data; @@ -1584,8 +1587,8 @@ Error PS4_SYSV_ABI sceSaveDataSetupSaveDataMemory2(const OrbisSaveDataMemorySetu } else { SaveMemory::SetIcon(nullptr, 0); } + SaveMemory::TriggerSaveWithoutEvent(); } - SaveMemory::TriggerSaveWithoutEvent(); if (g_fw_ver >= ElfInfo::FW_45 && result != nullptr) { result->existedMemorySize = existed_size; } diff --git a/src/core/libraries/usbd/usbd.cpp b/src/core/libraries/usbd/usbd.cpp index c0e1b7ea8..fdfa50b23 100644 --- a/src/core/libraries/usbd/usbd.cpp +++ b/src/core/libraries/usbd/usbd.cpp @@ -10,327 +10,327 @@ namespace Libraries::Usbd { int PS4_SYSV_ABI sceUsbdAllocTransfer() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdAttachKernelDriver() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdBulkTransfer() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdCancelTransfer() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdCheckConnected() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdClaimInterface() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdClearHalt() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdClose() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdControlTransfer() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdControlTransferGetData() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdControlTransferGetSetup() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdDetachKernelDriver() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdEventHandlerActive() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdEventHandlingOk() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdExit() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdFillBulkTransfer() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdFillControlSetup() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdFillControlTransfer() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdFillInterruptTransfer() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdFillIsoTransfer() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdFreeConfigDescriptor() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdFreeDeviceList() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdFreeTransfer() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetActiveConfigDescriptor() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetBusNumber() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetConfigDescriptor() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetConfigDescriptorByValue() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetConfiguration() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetDescriptor() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetDevice() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetDeviceAddress() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetDeviceDescriptor() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetDeviceList() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetDeviceSpeed() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetIsoPacketBuffer() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetMaxIsoPacketSize() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetMaxPacketSize() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetStringDescriptor() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdGetStringDescriptorAscii() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdHandleEvents() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdHandleEventsLocked() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdHandleEventsTimeout() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_DEBUG(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdInit() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return 0x80240005; // Skip } int PS4_SYSV_ABI sceUsbdInterruptTransfer() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdKernelDriverActive() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdLockEvents() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdLockEventWaiters() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdOpen() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdOpenDeviceWithVidPid() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdRefDevice() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdReleaseInterface() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdResetDevice() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdSetConfiguration() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdSetInterfaceAltSetting() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdSetIsoPacketLengths() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdSubmitTransfer() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdTryLockEvents() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdUnlockEvents() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdUnlockEventWaiters() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdUnrefDevice() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI sceUsbdWaitForEvent() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI Func_65F6EF33E38FFF50() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI Func_97F056BAD90AADE7() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI Func_C55104A33B35B264() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } int PS4_SYSV_ABI Func_D56B43060720B1E0() { - LOG_ERROR(Lib_Usbd, "(STUBBED)called"); + LOG_ERROR(Lib_Usbd, "(STUBBED) called"); return ORBIS_OK; } diff --git a/src/core/libraries/videoout/video_out.cpp b/src/core/libraries/videoout/video_out.cpp index f36de6ade..78a2b11a4 100644 --- a/src/core/libraries/videoout/video_out.cpp +++ b/src/core/libraries/videoout/video_out.cpp @@ -52,8 +52,7 @@ s32 PS4_SYSV_ABI sceVideoOutAddFlipEvent(Kernel::SceKernelEqueue eq, s32 handle, Kernel::EqueueEvent event{}; event.event.ident = u64(OrbisVideoOutEventId::Flip); event.event.filter = Kernel::SceKernelEvent::Filter::VideoOut; - // The library only sets EV_ADD but kernel driver forces EV_CLEAR - event.event.flags = Kernel::SceKernelEvent::Flags::Clear; + event.event.flags = Kernel::SceKernelEvent::Flags::Add; event.event.udata = udata; event.event.fflags = 0; event.event.data = 0; @@ -79,8 +78,7 @@ s32 PS4_SYSV_ABI sceVideoOutAddVblankEvent(Kernel::SceKernelEqueue eq, s32 handl Kernel::EqueueEvent event{}; event.event.ident = u64(OrbisVideoOutEventId::Vblank); event.event.filter = Kernel::SceKernelEvent::Filter::VideoOut; - // The library only sets EV_ADD but kernel driver forces EV_CLEAR - event.event.flags = Kernel::SceKernelEvent::Flags::Clear; + event.event.flags = Kernel::SceKernelEvent::Flags::Add; event.event.udata = udata; event.event.fflags = 0; event.event.data = 0; diff --git a/src/emulator.cpp b/src/emulator.cpp index 10d17a2db..4f0c61236 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -45,10 +45,6 @@ Frontend::WindowSDL* g_window = nullptr; namespace Core { Emulator::Emulator() { - // Read configuration file. - const auto config_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); - Config::load(config_dir / "config.toml"); - // Initialize NT API functions and set high priority #ifdef _WIN32 Common::NtApi::Initialize(); @@ -286,16 +282,14 @@ void Emulator::Run(const std::filesystem::path& file) { } void Emulator::LoadSystemModules(const std::filesystem::path& file, std::string game_serial) { - constexpr std::array ModulesToLoad{ + constexpr std::array ModulesToLoad{ {{"libSceNgs2.sprx", &Libraries::Ngs2::RegisterlibSceNgs2}, - {"libSceFiber.sprx", &Libraries::Fiber::RegisterlibSceFiber}, {"libSceUlt.sprx", nullptr}, {"libSceJson.sprx", nullptr}, {"libSceJson2.sprx", nullptr}, {"libSceLibcInternal.sprx", &Libraries::LibcInternal::RegisterlibSceLibcInternal}, {"libSceDiscMap.sprx", &Libraries::DiscMap::RegisterlibSceDiscMap}, {"libSceRtc.sprx", &Libraries::Rtc::RegisterlibSceRtc}, - {"libSceJpegEnc.sprx", &Libraries::JpegEnc::RegisterlibSceJpegEnc}, {"libSceCesCs.sprx", nullptr}, {"libSceFont.sprx", nullptr}, {"libSceFontFt.sprx", nullptr}, diff --git a/src/input/controller.cpp b/src/input/controller.cpp index 3927b096f..daef9c940 100644 --- a/src/input/controller.cpp +++ b/src/input/controller.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include "common/logging/log.h" #include "core/libraries/kernel/time.h" #include "core/libraries/pad/pad.h" #include "input/controller.h" @@ -116,6 +117,103 @@ void GameController::Axis(int id, Input::Axis axis, int value) { AddState(state); } +void GameController::Gyro(int id, const float gyro[3]) { + std::scoped_lock lock{m_mutex}; + auto state = GetLastState(); + state.time = Libraries::Kernel::sceKernelGetProcessTime(); + + // Update the angular velocity (gyro data) + state.angularVelocity.x = gyro[0]; // X-axis + state.angularVelocity.y = gyro[1]; // Y-axis + state.angularVelocity.z = gyro[2]; // Z-axis + + AddState(state); +} +void GameController::Acceleration(int id, const float acceleration[3]) { + std::scoped_lock lock{m_mutex}; + auto state = GetLastState(); + state.time = Libraries::Kernel::sceKernelGetProcessTime(); + + // Update the acceleration values + state.acceleration.x = acceleration[0]; // X-axis + state.acceleration.y = acceleration[1]; // Y-axis + state.acceleration.z = acceleration[2]; // Z-axis + + AddState(state); +} + +// Stolen from +// https://github.com/xioTechnologies/Open-Source-AHRS-With-x-IMU/blob/master/x-IMU%20IMU%20and%20AHRS%20Algorithms/x-IMU%20IMU%20and%20AHRS%20Algorithms/AHRS/MahonyAHRS.cs +float eInt[3] = {0.0f, 0.0f, 0.0f}; // Integral error terms +const float Kp = 50.0f; // Proportional gain +const float Ki = 1.0f; // Integral gain +Libraries::Pad::OrbisFQuaternion o = {1, 0, 0, 0}; +void GameController::CalculateOrientation(Libraries::Pad::OrbisFVector3& acceleration, + Libraries::Pad::OrbisFVector3& angularVelocity, + float deltaTime, + Libraries::Pad::OrbisFQuaternion& orientation) { + float ax = acceleration.x, ay = acceleration.y, az = acceleration.z; + float gx = angularVelocity.x, gy = angularVelocity.y, gz = angularVelocity.z; + + float q1 = o.w, q2 = o.x, q3 = o.y, q4 = o.z; + + // Normalize accelerometer measurement + float norm = std::sqrt(ax * ax + ay * ay + az * az); + if (norm == 0.0f) + return; // Handle NaN + norm = 1.0f / norm; + ax *= norm; + ay *= norm; + az *= norm; + + // Estimated direction of gravity + float vx = 2.0f * (q2 * q4 - q1 * q3); + float vy = 2.0f * (q1 * q2 + q3 * q4); + float vz = q1 * q1 - q2 * q2 - q3 * q3 + q4 * q4; + + // Error is cross product between estimated direction and measured direction of gravity + float ex = (ay * vz - az * vy); + float ey = (az * vx - ax * vz); + float ez = (ax * vy - ay * vx); + if (Ki > 0.0f) { + eInt[0] += ex * deltaTime; // Accumulate integral error + eInt[1] += ey * deltaTime; + eInt[2] += ez * deltaTime; + } else { + eInt[0] = eInt[1] = eInt[2] = 0.0f; // Prevent integral wind-up + } + + // Apply feedback terms + gx += Kp * ex + Ki * eInt[0]; + gy += Kp * ey + Ki * eInt[1]; + gz += Kp * ez + Ki * eInt[2]; + + //// Integrate rate of change of quaternion + // float pa = q2, pb = q3, pc = q4; + // q1 += (-q2 * gx - q3 * gy - q4 * gz) * (0.5f * deltaTime); + // q2 += (pa * gx + pb * gz - pc * gy) * (0.5f * deltaTime); + // q3 += (pb * gy - pa * gz + pc * gx) * (0.5f * deltaTime); + // q4 += (pc * gz + pa * gy - pb * gx) * (0.5f * deltaTime); + q1 += (-q2 * gx - q3 * gy - q4 * gz) * (0.5f * deltaTime); + q2 += (q1 * gx + q3 * gz - q4 * gy) * (0.5f * deltaTime); + q3 += (q1 * gy - q2 * gz + q4 * gx) * (0.5f * deltaTime); + q4 += (q1 * gz + q2 * gy - q3 * gx) * (0.5f * deltaTime); + + // Normalize quaternion + norm = std::sqrt(q1 * q1 + q2 * q2 + q3 * q3 + q4 * q4); + norm = 1.0f / norm; + orientation.w = q1 * norm; + orientation.x = q2 * norm; + orientation.y = q3 * norm; + orientation.z = q4 * norm; + o.w = q1 * norm; + o.x = q2 * norm; + o.y = q3 * norm; + o.z = q4 * norm; + LOG_DEBUG(Lib_Pad, "Calculated orientation: {:.2f} {:.2f} {:.2f} {:.2f}", orientation.x, + orientation.y, orientation.z, orientation.w); +} + void GameController::SetLightBarRGB(u8 r, u8 g, u8 b) { if (m_sdl_gamepad != nullptr) { SDL_SetGamepadLED(m_sdl_gamepad, r, g, b); @@ -149,6 +247,18 @@ void GameController::TryOpenSDLController() { int gamepad_count; SDL_JoystickID* gamepads = SDL_GetGamepads(&gamepad_count); m_sdl_gamepad = gamepad_count > 0 ? SDL_OpenGamepad(gamepads[0]) : nullptr; + if (SDL_SetGamepadSensorEnabled(m_sdl_gamepad, SDL_SENSOR_GYRO, true)) { + gyro_poll_rate = SDL_GetGamepadSensorDataRate(m_sdl_gamepad, SDL_SENSOR_GYRO); + LOG_INFO(Input, "Gyro initialized, poll rate: {}", gyro_poll_rate); + } else { + LOG_ERROR(Input, "Failed to initialize gyro controls for gamepad"); + } + if (SDL_SetGamepadSensorEnabled(m_sdl_gamepad, SDL_SENSOR_ACCEL, true)) { + accel_poll_rate = SDL_GetGamepadSensorDataRate(m_sdl_gamepad, SDL_SENSOR_ACCEL); + LOG_INFO(Input, "Accel initialized, poll rate: {}", accel_poll_rate); + } else { + LOG_ERROR(Input, "Failed to initialize accel controls for gamepad"); + } SDL_free(gamepads); SetLightBarRGB(0, 0, 255); diff --git a/src/input/controller.h b/src/input/controller.h index d425fb46c..c6fc02c24 100644 --- a/src/input/controller.h +++ b/src/input/controller.h @@ -33,6 +33,9 @@ struct State { u64 time = 0; int axes[static_cast(Axis::AxisMax)] = {128, 128, 128, 128, 0, 0}; TouchpadEntry touchpad[2] = {{false, 0, 0}, {false, 0, 0}}; + Libraries::Pad::OrbisFVector3 acceleration = {0.0f, 0.0f, 0.0f}; + Libraries::Pad::OrbisFVector3 angularVelocity = {0.0f, 0.0f, 0.0f}; + Libraries::Pad::OrbisFQuaternion orientation = {0.0f, 0.0f, 0.0f, 1.0f}; }; inline int GetAxis(int min, int max, int value) { @@ -53,12 +56,21 @@ public: void CheckButton(int id, Libraries::Pad::OrbisPadButtonDataOffset button, bool isPressed); void AddState(const State& state); void Axis(int id, Input::Axis axis, int value); + void Gyro(int id, const float gyro[3]); + void Acceleration(int id, const float acceleration[3]); void SetLightBarRGB(u8 r, u8 g, u8 b); bool SetVibration(u8 smallMotor, u8 largeMotor); void SetTouchpadState(int touchIndex, bool touchDown, float x, float y); void TryOpenSDLController(); u32 Poll(); + float gyro_poll_rate; + float accel_poll_rate; + static void CalculateOrientation(Libraries::Pad::OrbisFVector3& acceleration, + Libraries::Pad::OrbisFVector3& angularVelocity, + float deltaTime, + Libraries::Pad::OrbisFQuaternion& orientation); + private: struct StateInternal { bool obtained = false; diff --git a/src/qt_gui/check_update.cpp b/src/qt_gui/check_update.cpp index edd55b804..e3e019144 100644 --- a/src/qt_gui/check_update.cpp +++ b/src/qt_gui/check_update.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -24,11 +25,9 @@ #include #include #include -#include #include "check_update.h" using namespace Common::FS; -namespace fs = std::filesystem; CheckUpdate::CheckUpdate(const bool showMessage, QWidget* parent) : QDialog(parent), networkManager(new QNetworkAccessManager(this)) { @@ -254,7 +253,11 @@ void CheckUpdate::setupUI(const QString& downloadUrl, const QString& latestDate, connect(noButton, &QPushButton::clicked, this, [this]() { close(); }); autoUpdateCheckBox->setChecked(Config::autoUpdate()); +#if (QT_VERSION < QT_VERSION_CHECK(6, 7, 0)) connect(autoUpdateCheckBox, &QCheckBox::stateChanged, this, [](int state) { +#else + connect(autoUpdateCheckBox, &QCheckBox::checkStateChanged, this, [](Qt::CheckState state) { +#endif const auto user_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); Config::setAutoUpdate(state == Qt::Checked); Config::save(user_dir / "config.toml"); diff --git a/src/qt_gui/settings_dialog.cpp b/src/qt_gui/settings_dialog.cpp index b82f41bcf..06ad81c39 100644 --- a/src/qt_gui/settings_dialog.cpp +++ b/src/qt_gui/settings_dialog.cpp @@ -3,18 +3,22 @@ #include #include +#include #include +#include -#include #include "common/config.h" +#include "common/version.h" #include "qt_gui/compatibility_info.h" #ifdef ENABLE_DISCORD_RPC #include "common/discord_rpc_handler.h" +#include "common/singleton.h" #endif #ifdef ENABLE_UPDATER #include "check_update.h" #endif #include +#include "background_music_player.h" #include "common/logging/backend.h" #include "common/logging/filter.h" #include "common/logging/formatter.h" @@ -131,8 +135,13 @@ SettingsDialog::SettingsDialog(std::span physical_devices, // GENERAL TAB { #ifdef ENABLE_UPDATER +#if (QT_VERSION < QT_VERSION_CHECK(6, 7, 0)) connect(ui->updateCheckBox, &QCheckBox::stateChanged, this, [](int state) { Config::setAutoUpdate(state == Qt::Checked); }); +#else + connect(ui->updateCheckBox, &QCheckBox::checkStateChanged, this, + [](Qt::CheckState state) { Config::setAutoUpdate(state == Qt::Checked); }); +#endif connect(ui->updateComboBox, &QComboBox::currentTextChanged, this, [](const QString& channel) { Config::setUpdateChannel(channel.toStdString()); }); @@ -151,7 +160,12 @@ SettingsDialog::SettingsDialog(std::span physical_devices, emit CompatibilityChanged(); }); +#if (QT_VERSION < QT_VERSION_CHECK(6, 7, 0)) connect(ui->enableCompatibilityCheckBox, &QCheckBox::stateChanged, this, [this](int state) { +#else + connect(ui->enableCompatibilityCheckBox, &QCheckBox::checkStateChanged, this, + [this](Qt::CheckState state) { +#endif Config::setCompatibilityEnabled(state); emit CompatibilityChanged(); }); @@ -202,6 +216,8 @@ SettingsDialog::SettingsDialog(std::span physical_devices, ui->showSplashCheckBox->installEventFilter(this); ui->discordRPCCheckbox->installEventFilter(this); ui->userName->installEventFilter(this); + ui->label_Trophy->installEventFilter(this); + ui->trophyKeyLineEdit->installEventFilter(this); ui->logTypeGroupBox->installEventFilter(this); ui->logFilter->installEventFilter(this); #ifdef ENABLE_UPDATER @@ -213,7 +229,6 @@ SettingsDialog::SettingsDialog(std::span physical_devices, ui->enableCompatibilityCheckBox->installEventFilter(this); ui->checkCompatibilityOnStartupCheckBox->installEventFilter(this); ui->updateCompatibilityButton->installEventFilter(this); - ui->audioBackendComboBox->installEventFilter(this); // Input ui->hideCursorGroupBox->installEventFilter(this); @@ -302,6 +317,9 @@ void SettingsDialog::LoadValuesFromConfig() { QString::fromStdString(toml::find_or(data, "General", "logFilter", ""))); ui->userNameLineEdit->setText( QString::fromStdString(toml::find_or(data, "General", "userName", "shadPS4"))); + ui->trophyKeyLineEdit->setText( + QString::fromStdString(toml::find_or(data, "Keys", "TrophyKey", ""))); + ui->trophyKeyLineEdit->setEchoMode(QLineEdit::Password); ui->debugDump->setChecked(toml::find_or(data, "Debug", "DebugDump", false)); ui->vkValidationCheckBox->setChecked(toml::find_or(data, "Vulkan", "validation", false)); ui->vkSyncValidationCheckBox->setChecked( @@ -311,8 +329,6 @@ void SettingsDialog::LoadValuesFromConfig() { toml::find_or(data, "General", "compatibilityEnabled", false)); ui->checkCompatibilityOnStartupCheckBox->setChecked( toml::find_or(data, "General", "checkCompatibilityOnStartup", false)); - ui->audioBackendComboBox->setCurrentText( - QString::fromStdString(toml::find_or(data, "Audio", "backend", "cubeb"))); #ifdef ENABLE_UPDATER ui->updateCheckBox->setChecked(toml::find_or(data, "General", "autoUpdate", false)); @@ -367,7 +383,7 @@ void SettingsDialog::InitializeEmulatorLanguages() { idx++; } - connect(ui->emulatorLanguageComboBox, qOverload(&QComboBox::currentIndexChanged), this, + connect(ui->emulatorLanguageComboBox, &QComboBox::currentIndexChanged, this, &SettingsDialog::OnLanguageChanged); } @@ -416,6 +432,10 @@ void SettingsDialog::updateNoteTextEdit(const QString& elementName) { text = tr("discordRPCCheckbox"); } else if (elementName == "userName") { text = tr("userName"); + } else if (elementName == "label_Trophy") { + text = tr("TrophyKey"); + } else if (elementName == "trophyKeyLineEdit") { + text = tr("TrophyKey"); } else if (elementName == "logTypeGroupBox") { text = tr("logTypeGroupBox"); } else if (elementName == "logFilter") { @@ -436,8 +456,6 @@ void SettingsDialog::updateNoteTextEdit(const QString& elementName) { text = tr("checkCompatibilityOnStartupCheckBox"); } else if (elementName == "updateCompatibilityButton") { text = tr("updateCompatibilityButton"); - } else if (elementName == "audioBackendGroupBox") { - text = tr("audioBackendGroupBox"); } // Input @@ -530,6 +548,7 @@ void SettingsDialog::UpdateSettings() { Config::setLogType(ui->logTypeComboBox->currentText().toStdString()); Config::setLogFilter(ui->logFilterLineEdit->text().toStdString()); Config::setUserName(ui->userNameLineEdit->text().toStdString()); + Config::setTrophyKey(ui->trophyKeyLineEdit->text().toStdString()); Config::setCursorState(ui->hideCursorComboBox->currentIndex()); Config::setCursorHideTimeout(ui->idleTimeoutSpinBox->value()); Config::setGpuId(ui->graphicsAdapterBox->currentIndex() - 1); @@ -552,7 +571,6 @@ void SettingsDialog::UpdateSettings() { Config::setUpdateChannel(ui->updateComboBox->currentText().toStdString()); Config::setCompatibilityEnabled(ui->enableCompatibilityCheckBox->isChecked()); Config::setCheckCompatibilityOnStartup(ui->checkCompatibilityOnStartupCheckBox->isChecked()); - Config::setAudioBackend(ui->audioBackendComboBox->currentText().toStdString()); #ifdef ENABLE_DISCORD_RPC auto* rpc = Common::Singleton::Instance(); @@ -603,4 +621,4 @@ void SettingsDialog::ResetInstallFolders() { } Config::setGameInstallDirs(settings_install_dirs_config); } -} \ No newline at end of file +} diff --git a/src/qt_gui/settings_dialog.ui b/src/qt_gui/settings_dialog.ui index b7931215a..f253c57d9 100644 --- a/src/qt_gui/settings_dialog.ui +++ b/src/qt_gui/settings_dialog.ui @@ -77,87 +77,7 @@ 0 - - - - - - Logger - - - - - - - 0 - - - 0 - - - 0 - - - 0 - - - - - Log Type - - - - - - - async - - - - - sync - - - - - - - - - - - - - - 6 - - - 0 - - - - - - - Log Filter - - - - - - - - - - - - - - - - - - + @@ -194,7 +114,7 @@ - + @@ -263,35 +183,12 @@ - - - - Audio Backend - - - - - - - cubeb - - - - - sdl - - - - - - - - + 6 @@ -459,9 +356,9 @@ - - - + + + 0 @@ -475,9 +372,12 @@ - GUI Settings + Game Compatibility - + + + 10 + 1 @@ -553,12 +453,75 @@ - + - Disable Trophy Pop-ups + Display Compatibility Data + + + + Update Compatibility Database On Startup + + + + + + + + 0 + 0 + + + + + 0 + 0 + + + + + 16777215 + 16777215 + + + + Update Compatibility Database + + + + + + + + + + + + + + + 0 + 0 + + + + + 0 + 0 + + + + GUI Settings + + + + 1 + + + 11 + @@ -581,23 +544,7 @@ 0 - - - Qt::Orientation::Vertical - - - QSizePolicy::Policy::Fixed - - - - 20 - 13 - - - - - - + 0 @@ -649,6 +596,53 @@ + + + + 6 + + + 0 + + + + + + + Trophy + + + + + + Disable Trophy Pop-ups + + + + + + + Trophy Key + + + + + + + + 0 + 0 + + + + + + + + + + + @@ -656,79 +650,6 @@ - - - - - - - 0 - 0 - - - - - 0 - 0 - - - - Game Compatibility - - - - 10 - - - 1 - - - 11 - - - - - Display Compatibility Data - - - - - - - Update Compatibility Database On Startup - - - - - - - - 0 - 0 - - - - - 0 - 0 - - - - - 16777215 - 16777215 - - - - Update Compatibility Database - - - - - - - - @@ -1302,18 +1223,25 @@ - - - Remove + + + 0 - - - - - - Add... - - + + + + Add... + + + + + + + Remove + + + + @@ -1344,65 +1272,139 @@ - + + + 0 + + + 0 + - - - true - - - General - - - Qt::AlignmentFlag::AlignLeading|Qt::AlignmentFlag::AlignLeft|Qt::AlignmentFlag::AlignTop - - - - - - Enable Debug Dumping - - - - - - - Qt::Orientation::Vertical - - - QSizePolicy::Policy::MinimumExpanding - - - - 0 - 0 - - - - - - - - Enable Vulkan Validation Layers - - - - - - - Enable Vulkan Synchronization Validation - - - - - - - Enable RenderDoc Debugging - - - - - + + + + + true + + + General + + + Qt::AlignmentFlag::AlignLeading|Qt::AlignmentFlag::AlignLeft|Qt::AlignmentFlag::AlignTop + + + + + + Enable Debug Dumping + + + + + + + Enable Vulkan Validation Layers + + + + + + + Enable Vulkan Synchronization Validation + + + + + + + Enable RenderDoc Debugging + + + + + + + + + + + + + + Logger + + + + + + + 0 + + + 0 + + + 0 + + + 0 + + + + + Log Type + + + + + + + async + + + + + sync + + + + + + + + + + + + + + 6 + + + 0 + + + + + + + Log Filter + + + + + + + + + + + + + + + + diff --git a/src/qt_gui/translations/ar.ts b/src/qt_gui/translations/ar.ts index 1f65db04a..e851f59a7 100644 --- a/src/qt_gui/translations/ar.ts +++ b/src/qt_gui/translations/ar.ts @@ -537,6 +537,16 @@ Username اسم المستخدم + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName اسم المستخدم:\nيضبط اسم حساب PS4، الذي قد يتم عرضه في بعض الألعاب. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. + logTypeGroupBox diff --git a/src/qt_gui/translations/da_DK.ts b/src/qt_gui/translations/da_DK.ts index 943e2d092..41319c7ff 100644 --- a/src/qt_gui/translations/da_DK.ts +++ b/src/qt_gui/translations/da_DK.ts @@ -537,6 +537,16 @@ Username Username + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Brugernavn:\nIndstiller PS4-kontoens navn, som kan blive vist i nogle spil. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. + logTypeGroupBox diff --git a/src/qt_gui/translations/de.ts b/src/qt_gui/translations/de.ts index cbbef8215..62897fe24 100644 --- a/src/qt_gui/translations/de.ts +++ b/src/qt_gui/translations/de.ts @@ -537,6 +537,16 @@ Username Benutzername + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Benutzername:\nLegt den Namen des PS4-Kontos fest, der in einigen Spielen angezeigt werden kann. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. + logTypeGroupBox diff --git a/src/qt_gui/translations/el.ts b/src/qt_gui/translations/el.ts index 8737f5216..43ed81c33 100644 --- a/src/qt_gui/translations/el.ts +++ b/src/qt_gui/translations/el.ts @@ -537,6 +537,16 @@ Username Username + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Όνομα Χρήστη:\nΟρίζει το όνομα του λογαριασμού PS4, το οποίο μπορεί να εμφανιστεί σε ορισμένα παιχνίδια. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. + logTypeGroupBox diff --git a/src/qt_gui/translations/en.ts b/src/qt_gui/translations/en.ts index b0d49591c..f05a3c4ee 100644 --- a/src/qt_gui/translations/en.ts +++ b/src/qt_gui/translations/en.ts @@ -537,6 +537,16 @@ Username Username + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Username:\nSets the PS4's account username, which may be displayed by some games. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. + logTypeGroupBox diff --git a/src/qt_gui/translations/es_ES.ts b/src/qt_gui/translations/es_ES.ts index 70be2253d..096e104e3 100644 --- a/src/qt_gui/translations/es_ES.ts +++ b/src/qt_gui/translations/es_ES.ts @@ -537,6 +537,16 @@ Username Nombre de usuario + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Nombre de Usuario:\nEstablece el nombre de usuario de la cuenta de PS4, que puede ser mostrado por algunos juegos. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. + logTypeGroupBox diff --git a/src/qt_gui/translations/fa_IR.ts b/src/qt_gui/translations/fa_IR.ts index 54187cf9b..7b93c6769 100644 --- a/src/qt_gui/translations/fa_IR.ts +++ b/src/qt_gui/translations/fa_IR.ts @@ -537,6 +537,16 @@ Username نام کاربری + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName نام کاربری:\nنام کاربری حساب PS4 را تنظیم می‌کند که ممکن است توسط برخی بازی‌ها نمایش داده شود. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. + logTypeGroupBox diff --git a/src/qt_gui/translations/fi.ts b/src/qt_gui/translations/fi.ts index bdc1eb703..cdf331796 100644 --- a/src/qt_gui/translations/fi.ts +++ b/src/qt_gui/translations/fi.ts @@ -537,6 +537,16 @@ Username Username + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Käyttäjänimi:\nAsettaa PS4-tilin käyttäjänimen, joka voi näkyä joissakin peleissä. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. + logTypeGroupBox diff --git a/src/qt_gui/translations/fr.ts b/src/qt_gui/translations/fr.ts index 19b0f9358..441eaddb1 100644 --- a/src/qt_gui/translations/fr.ts +++ b/src/qt_gui/translations/fr.ts @@ -537,6 +537,16 @@ Username Nom d'utilisateur + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Nom d'utilisateur:\nDéfinit le nom d'utilisateur du compte PS4, qui peut être affiché par certains jeux. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. + logTypeGroupBox diff --git a/src/qt_gui/translations/hu_HU.ts b/src/qt_gui/translations/hu_HU.ts index bc337f2cd..f6b853e4b 100644 --- a/src/qt_gui/translations/hu_HU.ts +++ b/src/qt_gui/translations/hu_HU.ts @@ -537,6 +537,16 @@ Username Felhasználónév + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Felhasználónév:\nBeállítja a PS4 fiók felhasználónevét, amelyet egyes játékok megjeleníthetnek. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. + logTypeGroupBox diff --git a/src/qt_gui/translations/id.ts b/src/qt_gui/translations/id.ts index 7a0bf5d05..bee61083c 100644 --- a/src/qt_gui/translations/id.ts +++ b/src/qt_gui/translations/id.ts @@ -537,6 +537,16 @@ Username Username + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Nama Pengguna:\nMenetapkan nama pengguna akun PS4, yang mungkin ditampilkan oleh beberapa permainan. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. + logTypeGroupBox diff --git a/src/qt_gui/translations/it.ts b/src/qt_gui/translations/it.ts index 1391fbc55..9e375a45e 100644 --- a/src/qt_gui/translations/it.ts +++ b/src/qt_gui/translations/it.ts @@ -537,6 +537,16 @@ Username Nome Utente + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Nome Utente:\nImposta il nome utente dell'account PS4, che potrebbe essere visualizzato da alcuni giochi. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. + logTypeGroupBox diff --git a/src/qt_gui/translations/ja_JP.ts b/src/qt_gui/translations/ja_JP.ts index 58f213e03..409900ade 100644 --- a/src/qt_gui/translations/ja_JP.ts +++ b/src/qt_gui/translations/ja_JP.ts @@ -537,6 +537,16 @@ Username ユーザー名 + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName ユーザー名:\nPS4のアカウントユーザー名を設定します。これは、一部のゲームで表示される場合があります。 + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. + logTypeGroupBox diff --git a/src/qt_gui/translations/ko_KR.ts b/src/qt_gui/translations/ko_KR.ts index 75a1b53cf..ab6404a7e 100644 --- a/src/qt_gui/translations/ko_KR.ts +++ b/src/qt_gui/translations/ko_KR.ts @@ -537,6 +537,16 @@ Username Username + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Username:\nSets the PS4's account username, which may be displayed by some games. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. + logTypeGroupBox diff --git a/src/qt_gui/translations/lt_LT.ts b/src/qt_gui/translations/lt_LT.ts index 092521fdf..0b9c5b542 100644 --- a/src/qt_gui/translations/lt_LT.ts +++ b/src/qt_gui/translations/lt_LT.ts @@ -537,6 +537,16 @@ Username Username + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Vartotojo vardas:\nNustato PS4 paskyros vartotojo vardą, kuris gali būti rodomas kai kuriuose žaidimuose. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. + logTypeGroupBox diff --git a/src/qt_gui/translations/nb.ts b/src/qt_gui/translations/nb.ts index cc41573db..4d3c4f5af 100644 --- a/src/qt_gui/translations/nb.ts +++ b/src/qt_gui/translations/nb.ts @@ -537,6 +537,16 @@ Username Brukernavn + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Brukernavn:\nAngir brukernavnet for PS4-kontoen, som kan vises av enkelte spill. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. + logTypeGroupBox diff --git a/src/qt_gui/translations/nl.ts b/src/qt_gui/translations/nl.ts index 5cd4a4224..0cb890186 100644 --- a/src/qt_gui/translations/nl.ts +++ b/src/qt_gui/translations/nl.ts @@ -537,6 +537,16 @@ Username Username + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Gebruikersnaam:\nStelt de gebruikersnaam van het PS4-account in, die door sommige games kan worden weergegeven. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. + logTypeGroupBox diff --git a/src/qt_gui/translations/pl_PL.ts b/src/qt_gui/translations/pl_PL.ts index b85393bb0..1aed08394 100644 --- a/src/qt_gui/translations/pl_PL.ts +++ b/src/qt_gui/translations/pl_PL.ts @@ -537,6 +537,16 @@ Username Nazwa użytkownika + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Nazwa użytkownika:\nUstala nazwę użytkownika konta PS4, która może być wyświetlana w niektórych grach. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. + logTypeGroupBox diff --git a/src/qt_gui/translations/pt_BR.ts b/src/qt_gui/translations/pt_BR.ts index 8ab8db093..cce66c105 100644 --- a/src/qt_gui/translations/pt_BR.ts +++ b/src/qt_gui/translations/pt_BR.ts @@ -537,6 +537,16 @@ Username Nome de usuário + + + Trophy Key + Trophy Key + + + + Trophy + Troféus + Logger @@ -1236,6 +1246,11 @@ userName Nome de usuário:\nDefine o nome de usuário da conta PS4 que pode ser exibido por alguns jogos. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. + logTypeGroupBox diff --git a/src/qt_gui/translations/ro_RO.ts b/src/qt_gui/translations/ro_RO.ts index 00547d6ba..63df2ff80 100644 --- a/src/qt_gui/translations/ro_RO.ts +++ b/src/qt_gui/translations/ro_RO.ts @@ -537,6 +537,16 @@ Username Username + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Nume utilizator:\nSetează numele de utilizator al contului PS4, care poate fi afișat de unele jocuri. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. + logTypeGroupBox diff --git a/src/qt_gui/translations/ru_RU.ts b/src/qt_gui/translations/ru_RU.ts index 505a05a3e..88eff1aeb 100644 --- a/src/qt_gui/translations/ru_RU.ts +++ b/src/qt_gui/translations/ru_RU.ts @@ -537,6 +537,16 @@ Username Имя пользователя + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Имя пользователя:\nУстановите имя пользователя аккаунта PS4. Это может отображаться в некоторых играх. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. + logTypeGroupBox diff --git a/src/qt_gui/translations/sq.ts b/src/qt_gui/translations/sq.ts index 0c318f4f7..1df2a40e2 100644 --- a/src/qt_gui/translations/sq.ts +++ b/src/qt_gui/translations/sq.ts @@ -537,6 +537,16 @@ Username Përdoruesi + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Përdoruesi:\nPërcakton emrin e përdoruesit të llogarisë PS4, i cili mund të shfaqet nga disa lojra. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. + logTypeGroupBox diff --git a/src/qt_gui/translations/tr_TR.ts b/src/qt_gui/translations/tr_TR.ts index 2845af462..a03a48660 100644 --- a/src/qt_gui/translations/tr_TR.ts +++ b/src/qt_gui/translations/tr_TR.ts @@ -537,6 +537,16 @@ Username Kullanıcı Adı + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Kullanıcı Adı:\nBazı oyunlar tarafından gösterilebilen PS4 hesabının kullanıcı adını ayarlar. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. + logTypeGroupBox diff --git a/src/qt_gui/translations/uk_UA.ts b/src/qt_gui/translations/uk_UA.ts index 8abfca435..7e0a58ffb 100644 --- a/src/qt_gui/translations/uk_UA.ts +++ b/src/qt_gui/translations/uk_UA.ts @@ -537,6 +537,16 @@ Username Ім'я користувача + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Ім'я користувача:\nВстановіть ім'я користувача акаунта PS4. Це може відображатися в деяких іграх. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. + logTypeGroupBox diff --git a/src/qt_gui/translations/vi_VN.ts b/src/qt_gui/translations/vi_VN.ts index 7d0e9a2cd..997c3d3f9 100644 --- a/src/qt_gui/translations/vi_VN.ts +++ b/src/qt_gui/translations/vi_VN.ts @@ -537,6 +537,16 @@ Username Username + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName Tên người dùng:\nChọn tên người dùng của tài khoản PS4, có thể được một số trò chơi hiển thị. + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. + logTypeGroupBox diff --git a/src/qt_gui/translations/zh_CN.ts b/src/qt_gui/translations/zh_CN.ts index 32b838fac..fecb8857f 100644 --- a/src/qt_gui/translations/zh_CN.ts +++ b/src/qt_gui/translations/zh_CN.ts @@ -537,6 +537,16 @@ Username 用户名 + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName 用户名:\n设置 PS4 帐户的用户名,某些游戏中可能会显示此名称。 + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. + logTypeGroupBox diff --git a/src/qt_gui/translations/zh_TW.ts b/src/qt_gui/translations/zh_TW.ts index 3d27267b6..293ed81a6 100644 --- a/src/qt_gui/translations/zh_TW.ts +++ b/src/qt_gui/translations/zh_TW.ts @@ -537,6 +537,16 @@ Username Username + + + Trophy Key + Trophy Key + + + + Trophy + Trophy + Logger @@ -1236,6 +1246,11 @@ userName 用戶名:\n設定PS4帳號的用戶名,某些遊戲中可能會顯示。 + + + TrophyKey + Trophy Key:\nKey used to decrypt trophies. Must be obtained from your jailbroken console.\nMust contain only hex characters. + logTypeGroupBox diff --git a/src/sdl_window.cpp b/src/sdl_window.cpp index 4b13844b8..d694b0939 100644 --- a/src/sdl_window.cpp +++ b/src/sdl_window.cpp @@ -92,6 +92,7 @@ WindowSDL::WindowSDL(s32 width_, s32 height_, Input::GameController* controller_ UNREACHABLE_MSG("Failed to create window handle: {}", SDL_GetError()); } + SDL_SetWindowMinimumSize(window, 640, 360); SDL_SetWindowFullscreen(window, Config::isFullscreenMode()); SDL_InitSubSystem(SDL_INIT_GAMEPAD); @@ -160,6 +161,20 @@ void WindowSDL::WaitEvent() { case SDL_EVENT_GAMEPAD_TOUCHPAD_MOTION: OnGamepadEvent(&event); break; + // i really would have appreciated ANY KIND OF DOCUMENTATION ON THIS + // AND IT DOESN'T EVEN USE PROPER ENUMS + case SDL_EVENT_GAMEPAD_SENSOR_UPDATE: + switch ((SDL_SensorType)event.gsensor.sensor) { + case SDL_SENSOR_GYRO: + controller->Gyro(0, event.gsensor.data); + break; + case SDL_SENSOR_ACCEL: + controller->Acceleration(0, event.gsensor.data); + break; + default: + break; + } + break; case SDL_EVENT_QUIT: is_open = false; break; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp index 74e736cf6..d064b5d05 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -6,16 +6,22 @@ namespace Shader::Backend::SPIRV { -Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2) { - return ctx.OpCompositeConstruct(ctx.U32[2], e1, e2); +template +Id EmitCompositeConstruct(EmitContext& ctx, IR::Inst* inst, Args&&... args) { + return inst->AreAllArgsImmediates() ? ctx.ConstantComposite(args...) + : ctx.OpCompositeConstruct(args...); } -Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3) { - return ctx.OpCompositeConstruct(ctx.U32[3], e1, e2, e3); +Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { + return EmitCompositeConstruct(ctx, inst, ctx.U32[2], e1, e2); } -Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { - return ctx.OpCompositeConstruct(ctx.U32[4], e1, e2, e3, e4); +Id EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3) { + return EmitCompositeConstruct(ctx, inst, ctx.U32[3], e1, e2, e3); +} + +Id EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4) { + return EmitCompositeConstruct(ctx, inst, ctx.U32[4], e1, e2, e3, e4); } Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index) { @@ -42,16 +48,30 @@ Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index); } -Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) { - return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2); +Id EmitCompositeShuffleU32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) { + return ctx.OpVectorShuffle(ctx.U32[2], composite1, composite2, comp0, comp1); } -Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3) { - return ctx.OpCompositeConstruct(ctx.F16[3], e1, e2, e3); +Id EmitCompositeShuffleU32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2) { + return ctx.OpVectorShuffle(ctx.U32[3], composite1, composite2, comp0, comp1, comp2); } -Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { - return ctx.OpCompositeConstruct(ctx.F16[4], e1, e2, e3, e4); +Id EmitCompositeShuffleU32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3) { + return ctx.OpVectorShuffle(ctx.U32[4], composite1, composite2, comp0, comp1, comp2, comp3); +} + +Id EmitCompositeConstructF16x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { + return EmitCompositeConstruct(ctx, inst, ctx.F16[2], e1, e2); +} + +Id EmitCompositeConstructF16x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3) { + return EmitCompositeConstruct(ctx, inst, ctx.F16[3], e1, e2, e3); +} + +Id EmitCompositeConstructF16x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4) { + return EmitCompositeConstruct(ctx, inst, ctx.F16[4], e1, e2, e3, e4); } Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) { @@ -78,16 +98,30 @@ Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index); } -Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) { - return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2); +Id EmitCompositeShuffleF16x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) { + return ctx.OpVectorShuffle(ctx.F16[2], composite1, composite2, comp0, comp1); } -Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3) { - return ctx.OpCompositeConstruct(ctx.F32[3], e1, e2, e3); +Id EmitCompositeShuffleF16x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2) { + return ctx.OpVectorShuffle(ctx.F16[3], composite1, composite2, comp0, comp1, comp2); } -Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { - return ctx.OpCompositeConstruct(ctx.F32[4], e1, e2, e3, e4); +Id EmitCompositeShuffleF16x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3) { + return ctx.OpVectorShuffle(ctx.F16[4], composite1, composite2, comp0, comp1, comp2, comp3); +} + +Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { + return EmitCompositeConstruct(ctx, inst, ctx.F32[2], e1, e2); +} + +Id EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3) { + return EmitCompositeConstruct(ctx, inst, ctx.F32[3], e1, e2, e3); +} + +Id EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4) { + return EmitCompositeConstruct(ctx, inst, ctx.F32[4], e1, e2, e3, e4); } Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) { @@ -114,6 +148,20 @@ Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index return ctx.OpCompositeInsert(ctx.F32[4], object, composite, index); } +Id EmitCompositeShuffleF32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) { + return ctx.OpVectorShuffle(ctx.F32[2], composite1, composite2, comp0, comp1); +} + +Id EmitCompositeShuffleF32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2) { + return ctx.OpVectorShuffle(ctx.F32[3], composite1, composite2, comp0, comp1, comp2); +} + +Id EmitCompositeShuffleF32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3) { + return ctx.OpVectorShuffle(ctx.F32[4], composite1, composite2, comp0, comp1, comp2, comp3); +} + void EmitCompositeConstructF64x2(EmitContext&) { UNREACHABLE_MSG("SPIR-V Instruction"); } @@ -150,4 +198,18 @@ Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index); } +Id EmitCompositeShuffleF64x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) { + return ctx.OpVectorShuffle(ctx.F64[2], composite1, composite2, comp0, comp1); +} + +Id EmitCompositeShuffleF64x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2) { + return ctx.OpVectorShuffle(ctx.F64[3], composite1, composite2, comp0, comp1, comp2); +} + +Id EmitCompositeShuffleF64x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3) { + return ctx.OpVectorShuffle(ctx.F64[4], composite1, composite2, comp0, comp1, comp2, comp3); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index f3db6af56..4550440bb 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -217,14 +217,6 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) { const auto pointer{ ctx.OpAccessChain(component_ptr, ctx.tess_coord, ctx.ConstU32(component))}; return ctx.OpLoad(ctx.F32[1], pointer); - } else if (IR::IsParam(attr)) { - const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)}; - const auto param = ctx.input_params.at(param_id).id; - const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); - const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)}; - const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); - return ctx.OpLoad(ctx.F32[1], - ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); } UNREACHABLE(); } @@ -351,6 +343,13 @@ Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, vertex_index, attr_index, comp_index)); } +Id EmitReadTcsGenericOuputAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, + Id comp_index) { + const auto attr_comp_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]); + return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(attr_comp_ptr, ctx.output_attr_array, + vertex_index, attr_index, comp_index)); +} + void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index) { // Implied vertex index is invocation_id const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 2946edab3..c3d937fe7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -238,7 +238,7 @@ Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod } texel = ctx.OpImageRead(color_type, image, coords, operands.mask, operands.operands); } - return !texture.is_integer ? ctx.OpBitcast(ctx.U32[4], texel) : texel; + return texture.is_integer ? ctx.OpBitcast(ctx.F32[4], texel) : texel; } void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id ms, @@ -253,8 +253,8 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id } else if (Sirit::ValidId(lod)) { LOG_WARNING(Render, "Image write with LOD not supported by driver"); } - ctx.OpImageWrite(image, coords, ctx.OpBitcast(color_type, color), operands.mask, - operands.operands); + const Id texel = texture.is_integer ? ctx.OpBitcast(color_type, color) : color; + ctx.OpImageWrite(image, coords, texel, operands.mask, operands.operands); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 85bed589b..0d9fcff46 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -89,6 +89,8 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp); Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index); void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index); +Id EmitReadTcsGenericOuputAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, + Id comp_index); Id EmitGetPatch(EmitContext& ctx, IR::Patch patch); void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value); void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); @@ -118,33 +120,48 @@ Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value); -Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); -Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); -Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); +Id EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3); +Id EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4); Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2); -Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); -Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeShuffleU32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); +Id EmitCompositeShuffleU32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2); +Id EmitCompositeShuffleU32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3); +Id EmitCompositeConstructF16x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); +Id EmitCompositeConstructF16x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF16x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4); Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2); -Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); -Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeShuffleF16x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); +Id EmitCompositeShuffleF16x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2); +Id EmitCompositeShuffleF16x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3); +Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); +Id EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4); Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeShuffleF32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); +Id EmitCompositeShuffleF32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2); +Id EmitCompositeShuffleF32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3); void EmitCompositeConstructF64x2(EmitContext& ctx); void EmitCompositeConstructF64x3(EmitContext& ctx); void EmitCompositeConstructF64x4(EmitContext& ctx); @@ -154,6 +171,11 @@ void EmitCompositeExtractF64x4(EmitContext& ctx); Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeShuffleF64x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); +Id EmitCompositeShuffleF64x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2); +Id EmitCompositeShuffleF64x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3); Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index def1f816e..70411ecec 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -202,7 +202,14 @@ Id EmitBitCount32(EmitContext& ctx, Id value) { } Id EmitBitCount64(EmitContext& ctx, Id value) { - return ctx.OpBitCount(ctx.U64, value); + // Vulkan restricts some bitwise operations to 32-bit only, so decompose into + // two 32-bit values and add the result. + const Id unpacked{ctx.OpBitcast(ctx.U32[2], value)}; + const Id lo{ctx.OpCompositeExtract(ctx.U32[1], unpacked, 0U)}; + const Id hi{ctx.OpCompositeExtract(ctx.U32[1], unpacked, 1U)}; + const Id lo_count{ctx.OpBitCount(ctx.U32[1], lo)}; + const Id hi_count{ctx.OpBitCount(ctx.U32[1], hi)}; + return ctx.OpIAdd(ctx.U32[1], lo_count, hi_count); } Id EmitBitwiseNot32(EmitContext& ctx, Id value) { @@ -222,7 +229,15 @@ Id EmitFindILsb32(EmitContext& ctx, Id value) { } Id EmitFindILsb64(EmitContext& ctx, Id value) { - return ctx.OpFindILsb(ctx.U64, value); + // Vulkan restricts some bitwise operations to 32-bit only, so decompose into + // two 32-bit values and select the correct result. + const Id unpacked{ctx.OpBitcast(ctx.U32[2], value)}; + const Id lo{ctx.OpCompositeExtract(ctx.U32[1], unpacked, 0U)}; + const Id hi{ctx.OpCompositeExtract(ctx.U32[1], unpacked, 1U)}; + const Id lo_lsb{ctx.OpFindILsb(ctx.U32[1], lo)}; + const Id hi_lsb{ctx.OpFindILsb(ctx.U32[1], hi)}; + const Id found_lo{ctx.OpINotEqual(ctx.U32[1], lo_lsb, ctx.ConstU32(u32(-1)))}; + return ctx.OpSelect(ctx.U32[1], found_lo, lo_lsb, hi_lsb); } Id EmitSMin32(EmitContext& ctx, Id a, Id b) { diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index 4408cae28..62c0423dd 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -205,7 +205,6 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool strid addr, ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0))); ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0); } - emit_ds_read_barrier = true; } void Translator::DS_SWIZZLE_B32(const GcnInst& inst) { @@ -222,11 +221,6 @@ void Translator::DS_SWIZZLE_B32(const GcnInst& inst) { void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst) { - if (emit_ds_read_barrier && profile.needs_lds_barriers) { - ir.Barrier(); - emit_ds_read_barrier = false; - } - const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))}; IR::VectorReg dst_reg{inst.dst[0].code}; if (is_pair) { diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp index 5927aa696..83240e17f 100644 --- a/src/shader_recompiler/frontend/translate/export.cpp +++ b/src/shader_recompiler/frontend/translate/export.cpp @@ -25,34 +25,28 @@ void Translator::EmitExport(const GcnInst& inst) { IR::VectorReg(inst.src[3].code), }; - const auto swizzle = [&](u32 comp) { + const auto set_attribute = [&](u32 comp, IR::F32 value) { if (!IR::IsMrt(attrib)) { - return comp; + ir.SetAttribute(attrib, value, comp); + return; } const u32 index = u32(attrib) - u32(IR::Attribute::RenderTarget0); - switch (runtime_info.fs_info.color_buffers[index].mrt_swizzle) { - case MrtSwizzle::Identity: - return comp; - case MrtSwizzle::Alt: - static constexpr std::array AltSwizzle = {2, 1, 0, 3}; - return AltSwizzle[comp]; - case MrtSwizzle::Reverse: - static constexpr std::array RevSwizzle = {3, 2, 1, 0}; - return RevSwizzle[comp]; - case MrtSwizzle::ReverseAlt: - static constexpr std::array AltRevSwizzle = {3, 0, 1, 2}; - return AltRevSwizzle[comp]; - default: - UNREACHABLE(); + const auto [r, g, b, a] = runtime_info.fs_info.color_buffers[index].swizzle; + const std::array swizzle_array = {r, g, b, a}; + const auto swizzled_comp = swizzle_array[comp]; + if (u32(swizzled_comp) < u32(AmdGpu::CompSwizzle::Red)) { + ir.SetAttribute(attrib, value, comp); + return; } + ir.SetAttribute(attrib, value, u32(swizzled_comp) - u32(AmdGpu::CompSwizzle::Red)); }; const auto unpack = [&](u32 idx) { const IR::Value value = ir.UnpackHalf2x16(ir.GetVectorReg(vsrc[idx])); const IR::F32 r = IR::F32{ir.CompositeExtract(value, 0)}; const IR::F32 g = IR::F32{ir.CompositeExtract(value, 1)}; - ir.SetAttribute(attrib, r, swizzle(idx * 2)); - ir.SetAttribute(attrib, g, swizzle(idx * 2 + 1)); + set_attribute(idx * 2, r); + set_attribute(idx * 2 + 1, g); }; // Components are float16 packed into a VGPR @@ -73,7 +67,7 @@ void Translator::EmitExport(const GcnInst& inst) { continue; } const IR::F32 comp = ir.GetVectorReg(vsrc[i]); - ir.SetAttribute(attrib, comp, swizzle(i)); + set_attribute(i, comp); } } if (IR::IsMrt(attrib)) { diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index 3a2b01a90..e18cda012 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -597,14 +597,13 @@ void Translator::S_BCNT1_I32_B64(const GcnInst& inst) { void Translator::S_FF1_I32_B32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 result{ir.Select(ir.IEqual(src0, ir.Imm32(0U)), ir.Imm32(-1), ir.FindILsb(src0))}; + const IR::U32 result{ir.FindILsb(src0)}; SetDst(inst.dst[0], result); } void Translator::S_FF1_I32_B64(const GcnInst& inst) { const IR::U64 src0{GetSrc64(inst.src[0])}; - const IR::U32 result{ - ir.Select(ir.IEqual(src0, ir.Imm64(u64(0))), ir.Imm32(-1), ir.FindILsb(src0))}; + const IR::U32 result{ir.FindILsb(src0)}; SetDst(inst.dst[0], result); } diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index a14bff706..7f5504663 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -10,6 +10,7 @@ #include "shader_recompiler/info.h" #include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/reg.h" +#include "shader_recompiler/ir/reinterpret.h" #include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/resource.h" #include "video_core/amdgpu/types.h" @@ -124,12 +125,12 @@ void Translator::EmitPrologue() { } break; case LogicalStage::TessellationControl: { + ir.SetVectorReg(IR::VectorReg::V0, ir.GetAttributeU32(IR::Attribute::PrimitiveId)); // Should be laid out like: // [0:8]: patch id within VGT // [8:12]: output control point id ir.SetVectorReg(IR::VectorReg::V1, ir.GetAttributeU32(IR::Attribute::PackedHullInvocationInfo)); - // TODO PrimitiveId is probably V2 but haven't seen it yet break; } case LogicalStage::TessellationEval: @@ -475,26 +476,12 @@ void Translator::EmitFetch(const GcnInst& inst) { // Read the V# of the attribute to figure out component number and type. const auto buffer = info.ReadUdReg(attrib.sgpr_base, attrib.dword_offset); + const auto values = + ir.CompositeConstruct(ir.GetAttribute(attr, 0), ir.GetAttribute(attr, 1), + ir.GetAttribute(attr, 2), ir.GetAttribute(attr, 3)); + const auto swizzled = ApplySwizzle(ir, values, buffer.DstSelect()); for (u32 i = 0; i < 4; i++) { - const IR::F32 comp = [&] { - switch (buffer.GetSwizzle(i)) { - case AmdGpu::CompSwizzle::One: - return ir.Imm32(1.f); - case AmdGpu::CompSwizzle::Zero: - return ir.Imm32(0.f); - case AmdGpu::CompSwizzle::Red: - return ir.GetAttribute(attr, 0); - case AmdGpu::CompSwizzle::Green: - return ir.GetAttribute(attr, 1); - case AmdGpu::CompSwizzle::Blue: - return ir.GetAttribute(attr, 2); - case AmdGpu::CompSwizzle::Alpha: - return ir.GetAttribute(attr, 3); - default: - UNREACHABLE(); - } - }(); - ir.SetVectorReg(dst_reg++, comp); + ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(swizzled, i)}); } // In case of programmable step rates we need to fallback to instance data pulling in diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index e8584ec2f..9da0844e4 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -308,7 +308,6 @@ private: const RuntimeInfo& runtime_info; const Profile& profile; bool opcode_missing = false; - bool emit_ds_read_barrier = false; }; void Translate(IR::Block* block, u32 block_base, std::span inst_list, Info& info, diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 7c3db9551..c5be08b7d 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -255,10 +255,6 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst "Non immediate offset not supported"); } - if (info.stage == Stage::Hull) { - // printf("here\n"); // break - } - IR::Value address = [&] -> IR::Value { if (is_ring) { return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset); @@ -330,7 +326,7 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, const GcnInst& inst) { const IR::VectorReg src_reg{inst.src[1].code}; - std::array comps{}; + std::array comps{}; for (u32 i = 0; i < num_dwords; i++) { comps[i] = ir.GetVectorReg(src_reg + i); } @@ -428,7 +424,7 @@ void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) { if (((mimg.dmask >> i) & 1) == 0) { continue; } - IR::U32 value = IR::U32{ir.CompositeExtract(texel, i)}; + IR::F32 value = IR::F32{ir.CompositeExtract(texel, i)}; ir.SetVectorReg(dest_reg++, value); } } diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index c9d97679f..823f9bdcd 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -288,6 +288,12 @@ void IREmitter::SetTcsGenericAttribute(const F32& value, const U32& attr_index, Inst(Opcode::SetTcsGenericAttribute, value, attr_index, comp_index); } +F32 IREmitter::ReadTcsGenericOuputAttribute(const U32& vertex_index, const U32& attr_index, + const U32& comp_index) { + return Inst(IR::Opcode::ReadTcsGenericOuputAttribute, vertex_index, attr_index, + comp_index); +} + F32 IREmitter::GetPatch(Patch patch) { return Inst(Opcode::GetPatch, patch); } @@ -657,6 +663,86 @@ Value IREmitter::CompositeInsert(const Value& vector, const Value& object, size_ } } +Value IREmitter::CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0, + size_t comp1) { + if (vector1.Type() != vector2.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", vector1.Type(), vector2.Type()); + } + if (comp0 >= 4 || comp1 >= 4) { + UNREACHABLE_MSG("One or more out of bounds elements {}, {}", comp0, comp1); + } + const auto shuffle{[&](Opcode opcode) -> Value { + return Inst(opcode, vector1, vector2, Value{static_cast(comp0)}, + Value{static_cast(comp1)}); + }}; + switch (vector1.Type()) { + case Type::U32x4: + return shuffle(Opcode::CompositeShuffleU32x2); + case Type::F16x4: + return shuffle(Opcode::CompositeShuffleF16x2); + case Type::F32x4: + return shuffle(Opcode::CompositeShuffleF32x2); + case Type::F64x4: + return shuffle(Opcode::CompositeShuffleF64x2); + default: + ThrowInvalidType(vector1.Type()); + } +} + +Value IREmitter::CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0, + size_t comp1, size_t comp2) { + if (vector1.Type() != vector2.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", vector1.Type(), vector2.Type()); + } + if (comp0 >= 6 || comp1 >= 6 || comp2 >= 6) { + UNREACHABLE_MSG("One or more out of bounds elements {}, {}, {}", comp0, comp1, comp2); + } + const auto shuffle{[&](Opcode opcode) -> Value { + return Inst(opcode, vector1, vector2, Value{static_cast(comp0)}, + Value{static_cast(comp1)}, Value{static_cast(comp2)}); + }}; + switch (vector1.Type()) { + case Type::U32x4: + return shuffle(Opcode::CompositeShuffleU32x3); + case Type::F16x4: + return shuffle(Opcode::CompositeShuffleF16x3); + case Type::F32x4: + return shuffle(Opcode::CompositeShuffleF32x3); + case Type::F64x4: + return shuffle(Opcode::CompositeShuffleF64x3); + default: + ThrowInvalidType(vector1.Type()); + } +} + +Value IREmitter::CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0, + size_t comp1, size_t comp2, size_t comp3) { + if (vector1.Type() != vector2.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", vector1.Type(), vector2.Type()); + } + if (comp0 >= 8 || comp1 >= 8 || comp2 >= 8 || comp3 >= 8) { + UNREACHABLE_MSG("One or more out of bounds elements {}, {}, {}, {}", comp0, comp1, comp2, + comp3); + } + const auto shuffle{[&](Opcode opcode) -> Value { + return Inst(opcode, vector1, vector2, Value{static_cast(comp0)}, + Value{static_cast(comp1)}, Value{static_cast(comp2)}, + Value{static_cast(comp3)}); + }}; + switch (vector1.Type()) { + case Type::U32x4: + return shuffle(Opcode::CompositeShuffleU32x4); + case Type::F16x4: + return shuffle(Opcode::CompositeShuffleF16x4); + case Type::F32x4: + return shuffle(Opcode::CompositeShuffleF32x4); + case Type::F64x4: + return shuffle(Opcode::CompositeShuffleF64x4); + default: + ThrowInvalidType(vector1.Type()); + } +} + Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) { if (true_value.Type() != false_value.Type()) { UNREACHABLE_MSG("Mismatching types {} and {}", true_value.Type(), false_value.Type()); diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 4679a0133..9aab9459b 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -90,6 +90,9 @@ public: const U32& comp_index); void SetTcsGenericAttribute(const F32& value, const U32& attr_index, const U32& comp_index); + [[nodiscard]] F32 ReadTcsGenericOuputAttribute(const U32& vertex_index, const U32& attr_index, + const U32& comp_index); + [[nodiscard]] F32 GetPatch(Patch patch); void SetPatch(Patch patch, const F32& value); @@ -152,6 +155,13 @@ public: [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); [[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element); + [[nodiscard]] Value CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0, + size_t comp1); + [[nodiscard]] Value CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0, + size_t comp1, size_t comp2); + [[nodiscard]] Value CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0, + size_t comp1, size_t comp2, size_t comp3); + [[nodiscard]] Value Select(const U1& condition, const Value& true_value, const Value& false_value); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index cf2c3b67e..6242a230e 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -64,6 +64,8 @@ OPCODE(GetPatch, F32, Patc OPCODE(SetPatch, Void, Patch, F32, ) OPCODE(GetTessGenericAttribute, F32, U32, U32, U32, ) OPCODE(SetTcsGenericAttribute, Void, F32, U32, U32, ) +OPCODE(ReadTcsGenericOuputAttribute, F32, U32, U32, U32, ) + // Flags OPCODE(GetScc, U1, Void, ) @@ -97,7 +99,7 @@ OPCODE(StoreBufferU32, Void, Opaq OPCODE(StoreBufferU32x2, Void, Opaque, Opaque, U32x2, ) OPCODE(StoreBufferU32x3, Void, Opaque, Opaque, U32x3, ) OPCODE(StoreBufferU32x4, Void, Opaque, Opaque, U32x4, ) -OPCODE(StoreBufferFormatF32, Void, Opaque, Opaque, U32x4, ) +OPCODE(StoreBufferFormatF32, Void, Opaque, Opaque, F32x4, ) // Buffer atomic operations OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 ) @@ -122,6 +124,9 @@ OPCODE(CompositeExtractU32x4, U32, U32x OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, ) OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, ) OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, ) +OPCODE(CompositeShuffleU32x2, U32x2, U32x2, U32x2, U32, U32, ) +OPCODE(CompositeShuffleU32x3, U32x3, U32x3, U32x3, U32, U32, U32, ) +OPCODE(CompositeShuffleU32x4, U32x4, U32x4, U32x4, U32, U32, U32, U32, ) OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) @@ -131,6 +136,9 @@ OPCODE(CompositeExtractF16x4, F16, F16x OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, ) OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, ) OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, ) +OPCODE(CompositeShuffleF16x2, F16x2, F16x2, F16x2, U32, U32, ) +OPCODE(CompositeShuffleF16x3, F16x3, F16x3, F16x3, U32, U32, U32, ) +OPCODE(CompositeShuffleF16x4, F16x4, F16x4, F16x4, U32, U32, U32, U32, ) OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) @@ -140,6 +148,9 @@ OPCODE(CompositeExtractF32x4, F32, F32x OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, ) OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, ) OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, ) +OPCODE(CompositeShuffleF32x2, F32x2, F32x2, F32x2, U32, U32, ) +OPCODE(CompositeShuffleF32x3, F32x3, F32x3, F32x3, U32, U32, U32, ) +OPCODE(CompositeShuffleF32x4, F32x4, F32x4, F32x4, U32, U32, U32, U32, ) OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) @@ -149,6 +160,9 @@ OPCODE(CompositeExtractF64x4, F64, F64x OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) +OPCODE(CompositeShuffleF64x2, F64x2, F64x2, F64x2, U32, U32, ) +OPCODE(CompositeShuffleF64x3, F64x3, F64x3, F64x3, U32, U32, U32, ) +OPCODE(CompositeShuffleF64x4, F64x4, F64x4, F64x4, U32, U32, U32, U32, ) // Select operations OPCODE(SelectU1, U1, U1, U1, U1, ) @@ -344,8 +358,8 @@ OPCODE(ImageGatherDref, F32x4, Opaq OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, ) OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, F32, ) -OPCODE(ImageRead, U32x4, Opaque, Opaque, U32, U32, ) -OPCODE(ImageWrite, Void, Opaque, Opaque, U32, U32, U32x4, ) +OPCODE(ImageRead, F32x4, Opaque, Opaque, U32, U32, ) +OPCODE(ImageWrite, Void, Opaque, Opaque, U32, U32, F32x4, ) // Image atomic operations OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, ) diff --git a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp index 895c9823e..6164fec12 100644 --- a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp +++ b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp @@ -343,8 +343,8 @@ static IR::U32 TryOptimizeAddressModulo(IR::U32 addr, u32 stride, IR::IREmitter& // TODO: can optimize div in control point index similarly to mod // Read a TCS input (InputCP region) or TES input (OutputCP region) -static IR::F32 ReadTessInputComponent(IR::U32 addr, const u32 stride, IR::IREmitter& ir, - u32 off_dw) { +static IR::F32 ReadTessControlPointAttribute(IR::U32 addr, const u32 stride, IR::IREmitter& ir, + u32 off_dw, bool is_output_read_in_tcs) { if (off_dw > 0) { addr = ir.IAdd(addr, ir.Imm32(off_dw)); } @@ -354,7 +354,11 @@ static IR::F32 ReadTessInputComponent(IR::U32 addr, const u32 stride, IR::IREmit ir.ShiftRightLogical(ir.IMod(addr_for_attrs, ir.Imm32(stride)), ir.Imm32(4u)); const IR::U32 comp_index = ir.ShiftRightLogical(ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u)); - return ir.GetTessGenericAttribute(control_point_index, attr_index, comp_index); + if (is_output_read_in_tcs) { + return ir.ReadTcsGenericOuputAttribute(control_point_index, attr_index, comp_index); + } else { + return ir.GetTessGenericAttribute(control_point_index, attr_index, comp_index); + } } } // namespace @@ -481,21 +485,25 @@ void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) { case IR::Opcode::LoadSharedU128: IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)}; const IR::U32 addr{inst.Arg(0)}; - AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info); + const AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info); const u32 num_dwords = opcode == IR::Opcode::LoadSharedU32 ? 1 : (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4); - ASSERT_MSG(region == AttributeRegion::InputCP, - "Unhandled read of output or patchconst attribute in hull shader"); + ASSERT_MSG(region == AttributeRegion::InputCP || + region == AttributeRegion::OutputCP, + "Unhandled read of patchconst attribute in hull shader"); + const bool is_tcs_output_read = region == AttributeRegion::OutputCP; + const u32 stride = is_tcs_output_read ? runtime_info.hs_info.hs_output_cp_stride + : runtime_info.hs_info.ls_stride; IR::Value attr_read; if (num_dwords == 1) { attr_read = ir.BitCast( - ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, 0)); + ReadTessControlPointAttribute(addr, stride, ir, 0, is_tcs_output_read)); } else { boost::container::static_vector read_components; for (auto i = 0; i < num_dwords; i++) { const IR::F32 component = - ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, i); + ReadTessControlPointAttribute(addr, stride, ir, i, is_tcs_output_read); read_components.push_back(ir.BitCast(component)); } attr_read = ir.CompositeConstruct(read_components); @@ -565,8 +573,8 @@ void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) { : (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4); const auto GetInput = [&](IR::U32 addr, u32 off_dw) -> IR::F32 { if (region == AttributeRegion::OutputCP) { - return ReadTessInputComponent( - addr, runtime_info.vs_info.hs_output_cp_stride, ir, off_dw); + return ReadTessControlPointAttribute( + addr, runtime_info.vs_info.hs_output_cp_stride, ir, off_dw, false); } else { ASSERT(region == AttributeRegion::PatchConst); return ir.GetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw)); diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index e6d23bfe7..636752912 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -8,6 +8,7 @@ #include "shader_recompiler/ir/breadth_first_search.h" #include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/program.h" +#include "shader_recompiler/ir/reinterpret.h" #include "video_core/amdgpu/resource.h" namespace Shader::Optimization { @@ -128,35 +129,6 @@ bool IsImageInstruction(const IR::Inst& inst) { } } -IR::Value SwizzleVector(IR::IREmitter& ir, auto sharp, IR::Value texel) { - boost::container::static_vector comps; - for (u32 i = 0; i < 4; i++) { - switch (sharp.GetSwizzle(i)) { - case AmdGpu::CompSwizzle::Zero: - comps.emplace_back(ir.Imm32(0.f)); - break; - case AmdGpu::CompSwizzle::One: - comps.emplace_back(ir.Imm32(1.f)); - break; - case AmdGpu::CompSwizzle::Red: - comps.emplace_back(ir.CompositeExtract(texel, 0)); - break; - case AmdGpu::CompSwizzle::Green: - comps.emplace_back(ir.CompositeExtract(texel, 1)); - break; - case AmdGpu::CompSwizzle::Blue: - comps.emplace_back(ir.CompositeExtract(texel, 2)); - break; - case AmdGpu::CompSwizzle::Alpha: - comps.emplace_back(ir.CompositeExtract(texel, 3)); - break; - default: - UNREACHABLE(); - } - } - return ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]); -}; - class Descriptors { public: explicit Descriptors(Info& info_) @@ -409,15 +381,6 @@ void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; inst.SetArg(0, ir.Imm32(binding)); ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable); - - // Apply dst_sel swizzle on formatted buffer instructions - if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) { - inst.SetArg(2, SwizzleVector(ir, buffer, inst.Arg(2))); - } else { - const auto inst_info = inst.Flags(); - const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info); - inst.ReplaceUsesWith(SwizzleVector(ir, buffer, texel)); - } } IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t, @@ -765,10 +728,6 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip }(); inst.SetArg(1, coords); - if (inst.GetOpcode() == IR::Opcode::ImageWrite) { - inst.SetArg(4, SwizzleVector(ir, image, inst.Arg(4))); - } - if (inst_info.has_lod) { ASSERT(inst.GetOpcode() == IR::Opcode::ImageRead || inst.GetOpcode() == IR::Opcode::ImageWrite); @@ -783,6 +742,50 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip } } +void PatchTextureBufferInterpretation(IR::Block& block, IR::Inst& inst, Info& info) { + const auto binding = inst.Arg(0).U32(); + const auto buffer_res = info.texture_buffers[binding]; + const auto buffer = buffer_res.GetSharp(info); + if (!buffer.Valid()) { + // Don't need to swizzle invalid buffer. + return; + } + + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) { + inst.SetArg(2, ApplySwizzle(ir, inst.Arg(2), buffer.DstSelect())); + } else if (inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32) { + const auto inst_info = inst.Flags(); + const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info); + const auto swizzled = ApplySwizzle(ir, texel, buffer.DstSelect()); + inst.ReplaceUsesWith(swizzled); + } +} + +void PatchImageInterpretation(IR::Block& block, IR::Inst& inst, Info& info) { + const auto binding = inst.Arg(0).U32(); + const auto image_res = info.images[binding & 0xFFFF]; + const auto image = image_res.GetSharp(info); + if (!image.Valid() || !image_res.IsStorage(image)) { + // Don't need to swizzle invalid or non-storage image. + return; + } + + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + if (inst.GetOpcode() == IR::Opcode::ImageWrite) { + inst.SetArg(4, ApplySwizzle(ir, inst.Arg(4), image.DstSelect())); + } else if (inst.GetOpcode() == IR::Opcode::ImageRead) { + const auto inst_info = inst.Flags(); + const auto lod = inst.Arg(2); + const auto ms = inst.Arg(3); + const auto texel = + ir.ImageRead(inst.Arg(0), inst.Arg(1), lod.IsEmpty() ? IR::U32{} : IR::U32{lod}, + ms.IsEmpty() ? IR::U32{} : IR::U32{ms}, inst_info); + const auto swizzled = ApplySwizzle(ir, texel, image.DstSelect()); + inst.ReplaceUsesWith(swizzled); + } +} + void PatchDataRingInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { // Insert gds binding in the shader if it doesn't exist already. @@ -852,6 +855,19 @@ void ResourceTrackingPass(IR::Program& program) { } } } + // Second pass to reinterpret format read/write where needed, since we now know + // the bindings and their properties. + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (IsTextureBufferInstruction(inst)) { + PatchTextureBufferInterpretation(*block, inst, info); + continue; + } + if (IsImageInstruction(inst)) { + PatchImageInterpretation(*block, inst, info); + } + } + } } } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp b/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp index aad8fb148..ec7d7e986 100644 --- a/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp +++ b/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp @@ -8,6 +8,54 @@ namespace Shader::Optimization { +static void EmitBarrierInBlock(IR::Block* block) { + // This is inteded to insert a barrier when shared memory write and read + // occur in the same basic block. Also checks if branch depth is zero as + // we don't want to insert barrier in potentially divergent code. + bool emit_barrier_on_write = false; + bool emit_barrier_on_read = false; + const auto emit_barrier = [block](bool& emit_cond, IR::Inst& inst) { + if (emit_cond) { + IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)}; + ir.Barrier(); + emit_cond = false; + } + }; + for (IR::Inst& inst : block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::LoadSharedU32 || + inst.GetOpcode() == IR::Opcode::LoadSharedU64) { + emit_barrier(emit_barrier_on_read, inst); + emit_barrier_on_write = true; + } + if (inst.GetOpcode() == IR::Opcode::WriteSharedU32 || + inst.GetOpcode() == IR::Opcode::WriteSharedU64) { + emit_barrier(emit_barrier_on_write, inst); + emit_barrier_on_read = true; + } + } +} + +static void EmitBarrierInMergeBlock(const IR::AbstractSyntaxNode::Data& data) { + // Insert a barrier after divergent conditional blocks. + // This avoids potential softlocks and crashes when some threads + // initialize shared memory and others read from it. + const IR::U1 cond = data.if_node.cond; + const auto insert_barrier = + IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional { + if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 && + inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) { + return true; + } + return std::nullopt; + }); + if (insert_barrier) { + IR::Block* const merge = data.if_node.merge; + auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi); + IR::IREmitter ir{*merge, insert_point}; + ir.Barrier(); + } +} + void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile) { if (!program.info.uses_shared || !profile.needs_lds_barriers) { return; @@ -19,27 +67,12 @@ void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile) { --branch_depth; continue; } - if (node.type != Type::If) { + if (node.type == Type::If && branch_depth++ == 0) { + EmitBarrierInMergeBlock(node.data); continue; } - u32 curr_depth = branch_depth++; - if (curr_depth != 0) { - continue; - } - const IR::U1 cond = node.data.if_node.cond; - const auto insert_barrier = - IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional { - if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 && - inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) { - return true; - } - return std::nullopt; - }); - if (insert_barrier) { - IR::Block* const merge = node.data.if_node.merge; - auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi); - IR::IREmitter ir{*merge, insert_point}; - ir.Barrier(); + if (node.type == Type::Block && branch_depth == 0) { + EmitBarrierInBlock(node.data.block); } } } diff --git a/src/shader_recompiler/ir/reinterpret.h b/src/shader_recompiler/ir/reinterpret.h new file mode 100644 index 000000000..73d587a56 --- /dev/null +++ b/src/shader_recompiler/ir/reinterpret.h @@ -0,0 +1,24 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "shader_recompiler/ir/ir_emitter.h" +#include "video_core/amdgpu/resource.h" + +namespace Shader::IR { + +/// Applies a component swizzle to a vec4. +inline Value ApplySwizzle(IREmitter& ir, const Value& vector, const AmdGpu::CompMapping& swizzle) { + // Constants are indexed as 0 and 1, and components are 4-7. Thus we can apply a swizzle + // using two vectors and a shuffle, using one vector of constants and one of the components. + const auto zero = ir.Imm32(0.f); + const auto one = ir.Imm32(1.f); + const auto constants_vec = ir.CompositeConstruct(zero, one, zero, zero); + const auto swizzled = + ir.CompositeShuffle(constants_vec, vector, size_t(swizzle.r), size_t(swizzle.g), + size_t(swizzle.b), size_t(swizzle.a)); + return swizzled; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index bbf74f5d3..781a0b14a 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -180,7 +180,7 @@ struct FragmentRuntimeInfo { std::array inputs; struct PsColorBuffer { AmdGpu::NumberFormat num_format; - MrtSwizzle mrt_swizzle; + AmdGpu::CompMapping swizzle; auto operator<=>(const PsColorBuffer&) const noexcept = default; }; diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index 5bf97ee51..f8a86c63b 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -31,7 +31,7 @@ struct BufferSpecialization { struct TextureBufferSpecialization { bool is_integer = false; - u32 dst_select = 0; + AmdGpu::CompMapping dst_select{}; auto operator<=>(const TextureBufferSpecialization&) const = default; }; @@ -40,13 +40,9 @@ struct ImageSpecialization { AmdGpu::ImageType type = AmdGpu::ImageType::Color2D; bool is_integer = false; bool is_storage = false; - u32 dst_select = 0; + AmdGpu::CompMapping dst_select{}; - bool operator==(const ImageSpecialization& other) const { - return type == other.type && is_integer == other.is_integer && - is_storage == other.is_storage && - (dst_select != 0 ? dst_select == other.dst_select : true); - } + auto operator<=>(const ImageSpecialization&) const = default; }; struct FMaskSpecialization { diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 5dd3edd6d..985f3c652 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -815,12 +815,31 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, u32 vqid) { } if (rasterizer && (cs_program.dispatch_initiator & 1)) { const auto cmd_address = reinterpret_cast(header); - rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:Dispatch", vqid, cmd_address)); + rasterizer->ScopeMarkerBegin( + fmt::format("acb[{}]:{}:DispatchIndirect", vqid, cmd_address)); rasterizer->DispatchDirect(); rasterizer->ScopeMarkerEnd(); } break; } + case PM4ItOpcode::DispatchIndirect: { + const auto* dispatch_indirect = reinterpret_cast(header); + auto& cs_program = GetCsRegs(); + const auto offset = dispatch_indirect->data_offset; + const auto ib_address = mapped_queues[vqid].indirect_args_addr; + const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions); + if (DebugState.DumpingCurrentReg()) { + DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast(header), + cs_program); + } + if (rasterizer && (cs_program.dispatch_initiator & 1)) { + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:Dispatch", vqid, cmd_address)); + rasterizer->DispatchIndirect(ib_address, offset, size); + rasterizer->ScopeMarkerEnd(); + } + break; + } case PM4ItOpcode::WriteData: { const auto* write_data = reinterpret_cast(header); ASSERT(write_data->dst_sel.Value() == 2 || write_data->dst_sel.Value() == 5); @@ -845,6 +864,10 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, u32 vqid) { release_mem->SignalFence(static_cast(queue.pipe_id)); break; } + case PM4ItOpcode::EventWrite: { + // const auto* event = reinterpret_cast(header); + break; + } default: UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}", static_cast(opcode), count); diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 83271a82d..d2d1aab3c 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -889,10 +889,54 @@ struct Liverpool { return !info.linear_general; } - NumberFormat NumFormat() const { + [[nodiscard]] DataFormat GetDataFmt() const { + return RemapDataFormat(info.format); + } + + [[nodiscard]] NumberFormat GetNumberFmt() const { // There is a small difference between T# and CB number types, account for it. - return info.number_type == AmdGpu::NumberFormat::SnormNz ? AmdGpu::NumberFormat::Srgb - : info.number_type.Value(); + return RemapNumberFormat(info.number_type == NumberFormat::SnormNz + ? NumberFormat::Srgb + : info.number_type.Value()); + } + + [[nodiscard]] CompMapping Swizzle() const { + // clang-format off + static constexpr std::array, 4> mrt_swizzles{{ + // Standard + std::array{{ + {.r = CompSwizzle::Red, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Blue, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Blue, .a = CompSwizzle::Alpha}, + }}, + // Alternate + std::array{{ + {.r = CompSwizzle::Green, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Red, .g = CompSwizzle::Alpha, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Alpha, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Blue, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Alpha}, + }}, + // StandardReverse + std::array{{ + {.r = CompSwizzle::Blue, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Green, .g = CompSwizzle::Red, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Blue, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Alpha, .g = CompSwizzle::Blue, .b = CompSwizzle::Green, .a = CompSwizzle::Red}, + }}, + // AlternateReverse + std::array{{ + {.r = CompSwizzle::Alpha, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Alpha, .g = CompSwizzle::Red, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Alpha, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Alpha, .g = CompSwizzle::Red, .b = CompSwizzle::Green, .a = CompSwizzle::Blue}, + }}, + }}; + // clang-format on + const auto swap_idx = static_cast(info.comp_swap.Value()); + const auto components_idx = NumComponents(info.format) - 1; + const auto mrt_swizzle = mrt_swizzles[swap_idx][components_idx]; + return RemapComponents(info.format, mrt_swizzle); } }; diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index 6bbe1fb7e..208f7f380 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -20,6 +20,87 @@ enum class CompSwizzle : u32 { Alpha = 7, }; +struct CompMapping { + CompSwizzle r : 3; + CompSwizzle g : 3; + CompSwizzle b : 3; + CompSwizzle a : 3; + + auto operator<=>(const CompMapping& other) const = default; + + template + [[nodiscard]] std::array Apply(const std::array& data) const { + return { + ApplySingle(data, r), + ApplySingle(data, g), + ApplySingle(data, b), + ApplySingle(data, a), + }; + } + +private: + template + T ApplySingle(const std::array& data, const CompSwizzle swizzle) const { + switch (swizzle) { + case CompSwizzle::Zero: + return T(0); + case CompSwizzle::One: + return T(1); + case CompSwizzle::Red: + return data[0]; + case CompSwizzle::Green: + return data[1]; + case CompSwizzle::Blue: + return data[2]; + case CompSwizzle::Alpha: + return data[3]; + default: + UNREACHABLE(); + } + } +}; + +inline DataFormat RemapDataFormat(const DataFormat format) { + switch (format) { + case DataFormat::Format11_11_10: + return DataFormat::Format10_11_11; + case DataFormat::Format10_10_10_2: + return DataFormat::Format2_10_10_10; + case DataFormat::Format5_5_5_1: + return DataFormat::Format1_5_5_5; + default: + return format; + } +} + +inline NumberFormat RemapNumberFormat(const NumberFormat format) { + return format; +} + +inline CompMapping RemapComponents(const DataFormat format, const CompMapping components) { + switch (format) { + case DataFormat::Format11_11_10: { + CompMapping result; + result.r = components.b; + result.g = components.g; + result.b = components.r; + result.a = components.a; + return result; + } + case DataFormat::Format10_10_10_2: + case DataFormat::Format5_5_5_1: { + CompMapping result; + result.r = components.a; + result.g = components.b; + result.b = components.g; + result.a = components.r; + return result; + } + default: + return components; + } +} + // Table 8.5 Buffer Resource Descriptor [Sea Islands Series Instruction Set Architecture] struct Buffer { u64 base_address : 44; @@ -52,21 +133,22 @@ struct Buffer { return std::memcmp(this, &other, sizeof(Buffer)) == 0; } - u32 DstSelect() const { - return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9); - } - - CompSwizzle GetSwizzle(u32 comp) const noexcept { - const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w}; - return static_cast(select[comp]); + CompMapping DstSelect() const { + const CompMapping dst_sel{ + .r = CompSwizzle(dst_sel_x), + .g = CompSwizzle(dst_sel_y), + .b = CompSwizzle(dst_sel_z), + .a = CompSwizzle(dst_sel_w), + }; + return RemapComponents(DataFormat(data_format), dst_sel); } NumberFormat GetNumberFmt() const noexcept { - return static_cast(num_format); + return RemapNumberFormat(NumberFormat(num_format)); } DataFormat GetDataFmt() const noexcept { - return static_cast(data_format); + return RemapDataFormat(DataFormat(data_format)); } u32 GetStride() const noexcept { @@ -186,10 +268,11 @@ struct Image { static constexpr Image Null() { Image image{}; image.data_format = u64(DataFormat::Format8_8_8_8); - image.dst_sel_x = 4; - image.dst_sel_y = 5; - image.dst_sel_z = 6; - image.dst_sel_w = 7; + image.num_format = u64(NumberFormat::Unorm); + image.dst_sel_x = u64(CompSwizzle::Red); + image.dst_sel_y = u64(CompSwizzle::Green); + image.dst_sel_z = u64(CompSwizzle::Blue); + image.dst_sel_w = u64(CompSwizzle::Alpha); image.tiling_index = u64(TilingMode::Texture_MicroTiled); image.type = u64(ImageType::Color2D); return image; @@ -207,43 +290,14 @@ struct Image { return base_address != 0; } - u32 DstSelect() const { - return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9); - } - - CompSwizzle GetSwizzle(u32 comp) const noexcept { - const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w}; - return static_cast(select[comp]); - } - - static char SelectComp(u32 sel) { - switch (sel) { - case 0: - return '0'; - case 1: - return '1'; - case 4: - return 'R'; - case 5: - return 'G'; - case 6: - return 'B'; - case 7: - return 'A'; - default: - UNREACHABLE(); - } - } - - std::string DstSelectName() const { - std::string result = "["; - u32 dst_sel = DstSelect(); - for (u32 i = 0; i < 4; i++) { - result += SelectComp(dst_sel & 7); - dst_sel >>= 3; - } - result += ']'; - return result; + CompMapping DstSelect() const { + const CompMapping dst_sel{ + .r = CompSwizzle(dst_sel_x), + .g = CompSwizzle(dst_sel_y), + .b = CompSwizzle(dst_sel_z), + .a = CompSwizzle(dst_sel_w), + }; + return RemapComponents(DataFormat(data_format), dst_sel); } u32 Pitch() const { @@ -285,11 +339,11 @@ struct Image { } DataFormat GetDataFmt() const noexcept { - return static_cast(data_format); + return RemapDataFormat(DataFormat(data_format)); } NumberFormat GetNumberFmt() const noexcept { - return static_cast(num_format); + return RemapNumberFormat(NumberFormat(num_format)); } TilingMode GetTilingMode() const { diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 59c1e0bc3..3e43b4fbc 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -54,18 +54,10 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s BufferCache::~BufferCache() = default; void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) { - std::scoped_lock lk{mutex}; const bool is_tracked = IsRegionRegistered(device_addr, size); - if (!is_tracked) { - return; - } - // Mark the page as CPU modified to stop tracking writes. - SCOPE_EXIT { + if (is_tracked) { + // Mark the page as CPU modified to stop tracking writes. memory_tracker.MarkRegionAsCpuModified(device_addr, size); - }; - if (!memory_tracker.IsRegionGpuModified(device_addr, size)) { - // Page has not been modified by the GPU, nothing to do. - return; } } @@ -234,46 +226,22 @@ bool BufferCache::BindVertexBuffers( return has_step_rate; } -u32 BufferCache::BindIndexBuffer(bool& is_indexed, u32 index_offset) { - // Emulate QuadList and Polygon primitive types with CPU made index buffer. +void BufferCache::BindIndexBuffer(u32 index_offset) { const auto& regs = liverpool->regs; - if (!is_indexed) { - if (regs.primitive_type != AmdGpu::PrimitiveType::Polygon) { - return regs.num_indices; - } - - // Emit indices. - const u32 index_size = 3 * regs.num_indices; - const auto [data, offset] = stream_buffer.Map(index_size); - Vulkan::LiverpoolToVK::EmitPolygonToTriangleListIndices(data, regs.num_indices); - stream_buffer.Commit(); - - // Bind index buffer. - is_indexed = true; - - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.bindIndexBuffer(stream_buffer.Handle(), offset, vk::IndexType::eUint16); - return index_size / sizeof(u16); - } // Figure out index type and size. const bool is_index16 = regs.index_buffer_type.index_type == AmdGpu::Liverpool::IndexType::Index16; const vk::IndexType index_type = is_index16 ? vk::IndexType::eUint16 : vk::IndexType::eUint32; const u32 index_size = is_index16 ? sizeof(u16) : sizeof(u32); - VAddr index_address = regs.index_base_address.Address(); - index_address += index_offset * index_size; - - if (regs.primitive_type == AmdGpu::PrimitiveType::Polygon) { - UNREACHABLE(); - } + const VAddr index_address = + regs.index_base_address.Address() + index_offset * index_size; // Bind index buffer. const u32 index_buffer_size = regs.num_indices * index_size; const auto [vk_buffer, offset] = ObtainBuffer(index_address, index_buffer_size, false); const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.bindIndexBuffer(vk_buffer->Handle(), offset, index_type); - return regs.num_indices; } void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) { @@ -291,7 +259,16 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo const BufferId buffer_id = FindBuffer(address, num_bytes); return &slot_buffers[buffer_id]; }(); - const vk::BufferMemoryBarrier2 buf_barrier = { + const vk::BufferMemoryBarrier2 pre_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, + .buffer = buffer->Handle(), + .offset = buffer->Offset(address), + .size = num_bytes, + }; + const vk::BufferMemoryBarrier2 post_barrier = { .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, @@ -303,9 +280,14 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo cmdbuf.pipelineBarrier2(vk::DependencyInfo{ .dependencyFlags = vk::DependencyFlagBits::eByRegion, .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &buf_barrier, + .pBufferMemoryBarriers = &pre_barrier, + }); + cmdbuf.updateBuffer(buffer->Handle(), buffer->Offset(address), num_bytes, value); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &post_barrier, }); - cmdbuf.updateBuffer(buffer->Handle(), buf_barrier.offset, num_bytes, value); } std::pair BufferCache::ObtainHostUBO(std::span data) { @@ -370,6 +352,7 @@ bool BufferCache::IsRegionRegistered(VAddr addr, size_t size) { ++page; continue; } + std::shared_lock lk{mutex}; Buffer& buffer = slot_buffers[buffer_id]; const VAddr buf_start_addr = buffer.CpuAddr(); const VAddr buf_end_addr = buf_start_addr + buffer.SizeBytes(); @@ -496,21 +479,48 @@ void BufferCache::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, }; scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); - static constexpr vk::MemoryBarrier READ_BARRIER{ - .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, - .dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite, + const std::array pre_barriers = { + vk::BufferMemoryBarrier2{ + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferRead, + .buffer = overlap.Handle(), + .offset = 0, + .size = overlap.SizeBytes(), + }, }; - static constexpr vk::MemoryBarrier WRITE_BARRIER{ - .srcAccessMask = vk::AccessFlagBits::eTransferWrite, - .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + const std::array post_barriers = { + vk::BufferMemoryBarrier2{ + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferRead, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryWrite, + .buffer = overlap.Handle(), + .offset = 0, + .size = overlap.SizeBytes(), + }, + vk::BufferMemoryBarrier2{ + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite, + .buffer = new_buffer.Handle(), + .offset = dst_base_offset, + .size = overlap.SizeBytes(), + }, }; - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, - vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, - READ_BARRIER, {}, {}); - cmdbuf.copyBuffer(overlap.buffer, new_buffer.buffer, copy); - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, - vk::PipelineStageFlagBits::eAllCommands, - vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {}); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = pre_barriers.data(), + }); + cmdbuf.copyBuffer(overlap.Handle(), new_buffer.Handle(), copy); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = static_cast(post_barriers.size()), + .pBufferMemoryBarriers = post_barriers.data(), + }); DeleteBuffer(overlap_id); } @@ -520,8 +530,11 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) { wanted_size = static_cast(device_addr_end - device_addr); const OverlapResult overlap = ResolveOverlaps(device_addr, wanted_size); const u32 size = static_cast(overlap.end - overlap.begin); - const BufferId new_buffer_id = slot_buffers.insert( - instance, scheduler, MemoryUsage::DeviceLocal, overlap.begin, AllFlags, size); + const BufferId new_buffer_id = [&] { + std::scoped_lock lk{mutex}; + return slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, overlap.begin, + AllFlags, size); + }(); auto& new_buffer = slot_buffers[new_buffer_id]; const size_t size_bytes = new_buffer.SizeBytes(); const auto cmdbuf = scheduler.CommandBuffer(); @@ -561,10 +574,8 @@ void BufferCache::ChangeRegister(BufferId buffer_id) { void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, bool is_texel_buffer) { - std::scoped_lock lk{mutex}; boost::container::small_vector copies; u64 total_size_bytes = 0; - u64 largest_copy = 0; VAddr buffer_start = buffer.CpuAddr(); memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) { copies.push_back(vk::BufferCopy{ @@ -573,7 +584,6 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, .size = range_size, }); total_size_bytes += range_size; - largest_copy = std::max(largest_copy, range_size); }); SCOPE_EXIT { if (is_texel_buffer) { @@ -614,21 +624,35 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, } scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); - static constexpr vk::MemoryBarrier READ_BARRIER{ - .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, - .dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite, + const vk::BufferMemoryBarrier2 pre_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, + .buffer = buffer.Handle(), + .offset = 0, + .size = buffer.SizeBytes(), }; - static constexpr vk::MemoryBarrier WRITE_BARRIER{ - .srcAccessMask = vk::AccessFlagBits::eTransferWrite, - .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + const vk::BufferMemoryBarrier2 post_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite, + .buffer = buffer.Handle(), + .offset = 0, + .size = buffer.SizeBytes(), }; - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, - vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, - READ_BARRIER, {}, {}); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &pre_barrier, + }); cmdbuf.copyBuffer(src_buffer, buffer.buffer, copies); - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, - vk::PipelineStageFlagBits::eAllCommands, - vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {}); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &post_barrier, + }); } bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) { @@ -678,10 +702,42 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, } if (!copies.empty()) { scheduler.EndRendering(); - image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); + const vk::BufferMemoryBarrier2 pre_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, + .buffer = buffer.Handle(), + .offset = max_offset - size, + .size = size, + }; + const vk::BufferMemoryBarrier2 post_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, + .buffer = buffer.Handle(), + .offset = max_offset - size, + .size = size, + }; + auto barriers = image.GetBarriers(vk::ImageLayout::eTransferSrcOptimal, + vk::AccessFlagBits2::eTransferRead, + vk::PipelineStageFlagBits2::eTransfer, {}); const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &pre_barrier, + .imageMemoryBarrierCount = static_cast(barriers.size()), + .pImageMemoryBarriers = barriers.data(), + }); + cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.Handle(), copies); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &post_barrier, + }); } return true; } diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index bcbaa45dc..c367795f1 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -3,7 +3,7 @@ #pragma once -#include +#include #include #include #include @@ -83,7 +83,7 @@ public: const std::optional& fetch_shader); /// Bind host index buffer for the current draw. - u32 BindIndexBuffer(bool& is_indexed, u32 index_offset); + void BindIndexBuffer(u32 index_offset); /// Writes a value to GPU buffer. void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); @@ -157,7 +157,7 @@ private: StreamBuffer staging_buffer; StreamBuffer stream_buffer; Buffer gds_buffer; - std::mutex mutex; + std::shared_mutex mutex; Common::SlotVector slot_buffers; RangeSet gpu_modified_ranges; vk::BufferView null_buffer_view; diff --git a/src/video_core/buffer_cache/memory_tracker_base.h b/src/video_core/buffer_cache/memory_tracker_base.h index ae61b55f2..d9166b11c 100644 --- a/src/video_core/buffer_cache/memory_tracker_base.h +++ b/src/video_core/buffer_cache/memory_tracker_base.h @@ -15,13 +15,8 @@ namespace VideoCore { class MemoryTracker { public: static constexpr size_t MAX_CPU_PAGE_BITS = 40; - static constexpr size_t HIGHER_PAGE_BITS = 22; - static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; - static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS); static constexpr size_t MANAGER_POOL_SIZE = 32; - static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD; - using Manager = WordManager; public: explicit MemoryTracker(PageManager* tracker_) : tracker{tracker_} {} @@ -30,7 +25,7 @@ public: /// Returns true if a region has been modified from the CPU [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { return IteratePages( - query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { + query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) { return manager->template IsRegionModified(offset, size); }); } @@ -38,52 +33,34 @@ public: /// Returns true if a region has been modified from the GPU [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { return IteratePages( - query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { + query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) { return manager->template IsRegionModified(offset, size); }); } /// Mark region as CPU modified, notifying the device_tracker about this change void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { - IteratePages(dirty_cpu_addr, query_size, - [](Manager* manager, u64 offset, size_t size) { - manager->template ChangeRegionState( - manager->GetCpuAddr() + offset, size); - }); - } - - /// Unmark region as CPU modified, notifying the device_tracker about this change - void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { - IteratePages(dirty_cpu_addr, query_size, - [](Manager* manager, u64 offset, size_t size) { - manager->template ChangeRegionState( - manager->GetCpuAddr() + offset, size); - }); + IteratePages(dirty_cpu_addr, query_size, + [](RegionManager* manager, u64 offset, size_t size) { + manager->template ChangeRegionState( + manager->GetCpuAddr() + offset, size); + }); } /// Mark region as modified from the host GPU void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { - IteratePages(dirty_cpu_addr, query_size, - [](Manager* manager, u64 offset, size_t size) { - manager->template ChangeRegionState( - manager->GetCpuAddr() + offset, size); - }); - } - - /// Unmark region as modified from the host GPU - void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { - IteratePages(dirty_cpu_addr, query_size, - [](Manager* manager, u64 offset, size_t size) { - manager->template ChangeRegionState( - manager->GetCpuAddr() + offset, size); - }); + IteratePages(dirty_cpu_addr, query_size, + [](RegionManager* manager, u64 offset, size_t size) { + manager->template ChangeRegionState( + manager->GetCpuAddr() + offset, size); + }); } /// Call 'func' for each CPU modified range and unmark those pages as CPU modified template void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, Func&& func) { IteratePages(query_cpu_range, query_size, - [&func](Manager* manager, u64 offset, size_t size) { + [&func](RegionManager* manager, u64 offset, size_t size) { manager->template ForEachModifiedRange( manager->GetCpuAddr() + offset, size, func); }); @@ -93,7 +70,7 @@ public: template void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, Func&& func) { IteratePages(query_cpu_range, query_size, - [&func](Manager* manager, u64 offset, size_t size) { + [&func](RegionManager* manager, u64 offset, size_t size) { if constexpr (clear) { manager->template ForEachModifiedRange( manager->GetCpuAddr() + offset, size, func); @@ -114,7 +91,7 @@ private: */ template bool IteratePages(VAddr cpu_address, size_t size, Func&& func) { - using FuncReturn = typename std::invoke_result::type; + using FuncReturn = typename std::invoke_result::type; static constexpr bool BOOL_BREAK = std::is_same_v; std::size_t remaining_size{size}; std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS}; @@ -155,7 +132,7 @@ private: manager_pool.emplace_back(); auto& last_pool = manager_pool.back(); for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) { - std::construct_at(&last_pool[i], tracker, 0, HIGHER_PAGE_SIZE); + std::construct_at(&last_pool[i], tracker, 0); free_managers.push_back(&last_pool[i]); } } @@ -167,9 +144,9 @@ private: } PageManager* tracker; - std::deque> manager_pool; - std::vector free_managers; - std::array top_tier{}; + std::deque> manager_pool; + std::vector free_managers; + std::array top_tier{}; }; } // namespace VideoCore diff --git a/src/video_core/buffer_cache/word_manager.h b/src/video_core/buffer_cache/word_manager.h index ae85d1eb1..7ad33d7a6 100644 --- a/src/video_core/buffer_cache/word_manager.h +++ b/src/video_core/buffer_cache/word_manager.h @@ -3,10 +3,12 @@ #pragma once -#include +#include +#include #include #include -#include "common/div_ceil.h" + +#include "common/spin_lock.h" #include "common/types.h" #include "video_core/page_manager.h" @@ -16,135 +18,32 @@ constexpr u64 PAGES_PER_WORD = 64; constexpr u64 BYTES_PER_PAGE = 4_KB; constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; +constexpr u64 HIGHER_PAGE_BITS = 22; +constexpr u64 HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; +constexpr u64 HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; +constexpr u64 NUM_REGION_WORDS = HIGHER_PAGE_SIZE / BYTES_PER_WORD; + enum class Type { CPU, GPU, Untracked, }; -/// Vector tracking modified pages tightly packed with small vector optimization -template -struct WordsArray { - /// Returns the pointer to the words state - [[nodiscard]] const u64* Pointer(bool is_short) const noexcept { - return is_short ? stack.data() : heap; - } +using WordsArray = std::array; - /// Returns the pointer to the words state - [[nodiscard]] u64* Pointer(bool is_short) noexcept { - return is_short ? stack.data() : heap; - } - - std::array stack{}; ///< Small buffers storage - u64* heap; ///< Not-small buffers pointer to the storage -}; - -template -struct Words { - explicit Words() = default; - explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} { - num_words = Common::DivCeil(size_bytes, BYTES_PER_WORD); - if (IsShort()) { - cpu.stack.fill(~u64{0}); - gpu.stack.fill(0); - untracked.stack.fill(~u64{0}); - } else { - // Share allocation between CPU and GPU pages and set their default values - u64* const alloc = new u64[num_words * 3]; - cpu.heap = alloc; - gpu.heap = alloc + num_words; - untracked.heap = alloc + num_words * 2; - std::fill_n(cpu.heap, num_words, ~u64{0}); - std::fill_n(gpu.heap, num_words, 0); - std::fill_n(untracked.heap, num_words, ~u64{0}); - } - // Clean up tailing bits - const u64 last_word_size = size_bytes % BYTES_PER_WORD; - const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE); - const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD; - const u64 last_word = (~u64{0} << shift) >> shift; - cpu.Pointer(IsShort())[NumWords() - 1] = last_word; - untracked.Pointer(IsShort())[NumWords() - 1] = last_word; - } - - ~Words() { - Release(); - } - - Words& operator=(Words&& rhs) noexcept { - Release(); - size_bytes = rhs.size_bytes; - num_words = rhs.num_words; - cpu = rhs.cpu; - gpu = rhs.gpu; - untracked = rhs.untracked; - rhs.cpu.heap = nullptr; - return *this; - } - - Words(Words&& rhs) noexcept - : size_bytes{rhs.size_bytes}, num_words{rhs.num_words}, cpu{rhs.cpu}, gpu{rhs.gpu}, - untracked{rhs.untracked} { - rhs.cpu.heap = nullptr; - } - - Words& operator=(const Words&) = delete; - Words(const Words&) = delete; - - /// Returns true when the buffer fits in the small vector optimization - [[nodiscard]] bool IsShort() const noexcept { - return num_words <= stack_words; - } - - /// Returns the number of words of the buffer - [[nodiscard]] size_t NumWords() const noexcept { - return num_words; - } - - /// Release buffer resources - void Release() { - if (!IsShort()) { - // CPU written words is the base for the heap allocation - delete[] cpu.heap; - } - } - - template - std::span Span() noexcept { - if constexpr (type == Type::CPU) { - return std::span(cpu.Pointer(IsShort()), num_words); - } else if constexpr (type == Type::GPU) { - return std::span(gpu.Pointer(IsShort()), num_words); - } else if constexpr (type == Type::Untracked) { - return std::span(untracked.Pointer(IsShort()), num_words); - } - } - - template - std::span Span() const noexcept { - if constexpr (type == Type::CPU) { - return std::span(cpu.Pointer(IsShort()), num_words); - } else if constexpr (type == Type::GPU) { - return std::span(gpu.Pointer(IsShort()), num_words); - } else if constexpr (type == Type::Untracked) { - return std::span(untracked.Pointer(IsShort()), num_words); - } - } - - u64 size_bytes = 0; - size_t num_words = 0; - WordsArray cpu; - WordsArray gpu; - WordsArray untracked; -}; - -template -class WordManager { +/** + * Allows tracking CPU and GPU modification of pages in a contigious 4MB virtual address region. + * Information is stored in bitsets for spacial locality and fast update of single pages. + */ +class RegionManager { public: - explicit WordManager(PageManager* tracker_, VAddr cpu_addr_, u64 size_bytes) - : tracker{tracker_}, cpu_addr{cpu_addr_}, words{size_bytes} {} - - explicit WordManager() = default; + explicit RegionManager(PageManager* tracker_, VAddr cpu_addr_) + : tracker{tracker_}, cpu_addr{cpu_addr_} { + cpu.fill(~u64{0}); + gpu.fill(0); + untracked.fill(~u64{0}); + } + explicit RegionManager() = default; void SetCpuAddress(VAddr new_cpu_addr) { cpu_addr = new_cpu_addr; @@ -175,12 +74,12 @@ public: static constexpr bool BOOL_BREAK = std::is_same_v; const size_t start = static_cast(std::max(static_cast(offset), 0LL)); const size_t end = static_cast(std::max(static_cast(offset + size), 0LL)); - if (start >= SizeBytes() || end <= start) { + if (start >= HIGHER_PAGE_SIZE || end <= start) { return; } auto [start_word, start_page] = GetWordPage(start); auto [end_word, end_page] = GetWordPage(end + BYTES_PER_PAGE - 1ULL); - const size_t num_words = NumWords(); + constexpr size_t num_words = NUM_REGION_WORDS; start_word = std::min(start_word, num_words); end_word = std::min(end_word, num_words); const size_t diff = end_word - start_word; @@ -225,21 +124,21 @@ public: */ template void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) { - std::span state_words = words.template Span(); - [[maybe_unused]] std::span untracked_words = words.template Span(); + std::scoped_lock lk{lock}; + std::span state_words = Span(); IterateWords(dirty_addr - cpu_addr, size, [&](size_t index, u64 mask) { if constexpr (type == Type::CPU) { - NotifyPageTracker(index, untracked_words[index], mask); + UpdateProtection(index, untracked[index], mask); } if constexpr (enable) { state_words[index] |= mask; if constexpr (type == Type::CPU) { - untracked_words[index] |= mask; + untracked[index] |= mask; } } else { state_words[index] &= ~mask; if constexpr (type == Type::CPU) { - untracked_words[index] &= ~mask; + untracked[index] &= ~mask; } } }); @@ -255,10 +154,10 @@ public: */ template void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) { + std::scoped_lock lk{lock}; static_assert(type != Type::Untracked); - std::span state_words = words.template Span(); - [[maybe_unused]] std::span untracked_words = words.template Span(); + std::span state_words = Span(); const size_t offset = query_cpu_range - cpu_addr; bool pending = false; size_t pending_offset{}; @@ -269,16 +168,16 @@ public: }; IterateWords(offset, size, [&](size_t index, u64 mask) { if constexpr (type == Type::GPU) { - mask &= ~untracked_words[index]; + mask &= ~untracked[index]; } const u64 word = state_words[index] & mask; if constexpr (clear) { if constexpr (type == Type::CPU) { - NotifyPageTracker(index, untracked_words[index], mask); + UpdateProtection(index, untracked[index], mask); } state_words[index] &= ~mask; if constexpr (type == Type::CPU) { - untracked_words[index] &= ~mask; + untracked[index] &= ~mask; } } const size_t base_offset = index * PAGES_PER_WORD; @@ -315,13 +214,11 @@ public: [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { static_assert(type != Type::Untracked); - const std::span state_words = words.template Span(); - [[maybe_unused]] const std::span untracked_words = - words.template Span(); + const std::span state_words = Span(); bool result = false; IterateWords(offset, size, [&](size_t index, u64 mask) { if constexpr (type == Type::GPU) { - mask &= ~untracked_words[index]; + mask &= ~untracked[index]; } const u64 word = state_words[index] & mask; if (word != 0) { @@ -333,44 +230,7 @@ public: return result; } - /// Returns the number of words of the manager - [[nodiscard]] size_t NumWords() const noexcept { - return words.NumWords(); - } - - /// Returns the size in bytes of the manager - [[nodiscard]] u64 SizeBytes() const noexcept { - return words.size_bytes; - } - - /// Returns true when the buffer fits in the small vector optimization - [[nodiscard]] bool IsShort() const noexcept { - return words.IsShort(); - } - private: - template - u64* Array() noexcept { - if constexpr (type == Type::CPU) { - return words.cpu.Pointer(IsShort()); - } else if constexpr (type == Type::GPU) { - return words.gpu.Pointer(IsShort()); - } else if constexpr (type == Type::Untracked) { - return words.untracked.Pointer(IsShort()); - } - } - - template - const u64* Array() const noexcept { - if constexpr (type == Type::CPU) { - return words.cpu.Pointer(IsShort()); - } else if constexpr (type == Type::GPU) { - return words.gpu.Pointer(IsShort()); - } else if constexpr (type == Type::Untracked) { - return words.untracked.Pointer(IsShort()); - } - } - /** * Notify tracker about changes in the CPU tracking state of a word in the buffer * @@ -381,7 +241,7 @@ private: * @tparam add_to_tracker True when the tracker should start tracking the new pages */ template - void NotifyPageTracker(u64 word_index, u64 current_bits, u64 new_bits) const { + void UpdateProtection(u64 word_index, u64 current_bits, u64 new_bits) const { u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits; VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; IteratePages(changed_bits, [&](size_t offset, size_t size) { @@ -390,9 +250,34 @@ private: }); } + template + std::span Span() noexcept { + if constexpr (type == Type::CPU) { + return cpu; + } else if constexpr (type == Type::GPU) { + return gpu; + } else if constexpr (type == Type::Untracked) { + return untracked; + } + } + + template + std::span Span() const noexcept { + if constexpr (type == Type::CPU) { + return cpu; + } else if constexpr (type == Type::GPU) { + return gpu; + } else if constexpr (type == Type::Untracked) { + return untracked; + } + } + + Common::SpinLock lock; PageManager* tracker; VAddr cpu_addr = 0; - Words words; + WordsArray cpu; + WordsArray gpu; + WordsArray untracked; }; } // namespace VideoCore diff --git a/src/video_core/host_shaders/detile_m32x1.comp b/src/video_core/host_shaders/detile_m32x1.comp index 802f5f531..cdc8d0018 100644 --- a/src/video_core/host_shaders/detile_m32x1.comp +++ b/src/video_core/host_shaders/detile_m32x1.comp @@ -20,7 +20,7 @@ layout(push_constant) uniform image_info { } info; // Inverse morton LUT, small enough to fit into K$ -uint rmort[16] = { +const uint rmort[16] = { 0x11011000, 0x31213020, 0x13031202, 0x33233222, 0x51415040, 0x71617060, diff --git a/src/video_core/host_shaders/detile_m32x2.comp b/src/video_core/host_shaders/detile_m32x2.comp index 90063a185..c128ba5a1 100644 --- a/src/video_core/host_shaders/detile_m32x2.comp +++ b/src/video_core/host_shaders/detile_m32x2.comp @@ -20,7 +20,7 @@ layout(push_constant) uniform image_info { } info; // Inverse morton LUT, small enough to fit into K$ -uint rmort[16] = { +const uint rmort[16] = { 0x11011000, 0x31213020, 0x13031202, 0x33233222, 0x51415040, 0x71617060, diff --git a/src/video_core/host_shaders/detile_m32x4.comp b/src/video_core/host_shaders/detile_m32x4.comp index e1b988172..a09a0b4c4 100644 --- a/src/video_core/host_shaders/detile_m32x4.comp +++ b/src/video_core/host_shaders/detile_m32x4.comp @@ -20,7 +20,7 @@ layout(push_constant) uniform image_info { } info; // Inverse morton LUT, small enough to fit into K$ -uint rmort[16] = { +const uint rmort[16] = { 0x11011000, 0x31213020, 0x13031202, 0x33233222, 0x51415040, 0x71617060, diff --git a/src/video_core/host_shaders/detile_m8x1.comp b/src/video_core/host_shaders/detile_m8x1.comp index 39d0aaeb1..ecf706450 100644 --- a/src/video_core/host_shaders/detile_m8x1.comp +++ b/src/video_core/host_shaders/detile_m8x1.comp @@ -48,4 +48,4 @@ void main() { uint dw_ofs_x = target_tile_x * 2 + col; // 2 = uints uint dw_ofs_y = (target_tile_y * MICRO_TILE_DIM + row) * tiles_per_pitch * 2; // 2 = uints out_data[dw_ofs_x + dw_ofs_y] = dst_tx; -} \ No newline at end of file +} diff --git a/src/video_core/host_shaders/detile_m8x2.comp b/src/video_core/host_shaders/detile_m8x2.comp index 3f8e5ab33..909a14acc 100644 --- a/src/video_core/host_shaders/detile_m8x2.comp +++ b/src/video_core/host_shaders/detile_m8x2.comp @@ -25,7 +25,7 @@ layout(push_constant) uniform image_info { #define TEXELS_PER_ELEMENT 2 // Inverse morton LUT, small enough to fit into K$ -uint rmort[16] = { +const uint rmort[16] = { 0x11011000, 0x31213020, 0x13031202, 0x33233222, 0x51415040, 0x71617060, diff --git a/src/video_core/host_shaders/detile_macro32x1.comp b/src/video_core/host_shaders/detile_macro32x1.comp index 086fbcfb5..ecac47d1c 100644 --- a/src/video_core/host_shaders/detile_macro32x1.comp +++ b/src/video_core/host_shaders/detile_macro32x1.comp @@ -21,46 +21,46 @@ layout(push_constant) uniform image_info { } info; // Each LUT is 64 bytes, so should fit into K$ given tiled slices locality -const uint lut_32bpp[][64] = { +const uint lut_32bpp[][16] = { { - 0x00, 0x01, 0x04, 0x05, 0x40, 0x41, 0x44, 0x45, - 0x02, 0x03, 0x06, 0x07, 0x42, 0x43, 0x46, 0x47, - 0x10, 0x11, 0x14, 0x15, 0x50, 0x51, 0x54, 0x55, - 0x12, 0x13, 0x16, 0x17, 0x52, 0x53, 0x56, 0x57, - 0x80, 0x81, 0x84, 0x85, 0xc0, 0xc1, 0xc4, 0xc5, - 0x82, 0x83, 0x86, 0x87, 0xc2, 0xc3, 0xc6, 0xc7, - 0x90, 0x91, 0x94, 0x95, 0xd0, 0xd1, 0xd4, 0xd5, - 0x92, 0x93, 0x96, 0x97, 0xd2, 0xd3, 0xd6, 0xd7, + 0x05040100, 0x45444140, + 0x07060302, 0x47464342, + 0x15141110, 0x55545150, + 0x17161312, 0x57565352, + 0x85848180, 0xc5c4c1c0, + 0x87868382, 0xc7c6c3c2, + 0x95949190, 0xd5d4d1d0, + 0x97969392, 0xd7d6d3d2, }, { - 0x08, 0x09, 0x0c, 0x0d, 0x48, 0x49, 0x4c, 0x4d, - 0x0a, 0x0b, 0x0e, 0x0f, 0x4a, 0x4b, 0x4e, 0x4f, - 0x18, 0x19, 0x1c, 0x1d, 0x58, 0x59, 0x5c, 0x5d, - 0x1a, 0x1b, 0x1e, 0x1f, 0x5a, 0x5b, 0x5e, 0x5f, - 0x88, 0x89, 0x8c, 0x8d, 0xc8, 0xc9, 0xcc, 0xcd, - 0x8a, 0x8b, 0x8e, 0x8f, 0xca, 0xcb, 0xce, 0xcf, - 0x98, 0x99, 0x9c, 0x9d, 0xd8, 0xd9, 0xdc, 0xdd, - 0x9a, 0x9b, 0x9e, 0x9f, 0xda, 0xdb, 0xde, 0xdf, + 0x0d0c0908, 0x4d4c4948, + 0x0f0e0b0a, 0x4f4e4b4a, + 0x1d1c1918, 0x5d5c5958, + 0x1f1e1b1a, 0x5f5e5b5a, + 0x8d8c8988, 0xcdccc9c8, + 0x8f8e8b8a, 0xcfcecbca, + 0x9d9c9998, 0xdddcd9d8, + 0x9f9e9b9a, 0xdfdedbda, }, { - 0x20, 0x21, 0x24, 0x25, 0x60, 0x61, 0x64, 0x65, - 0x22, 0x23, 0x26, 0x27, 0x62, 0x63, 0x66, 0x67, - 0x30, 0x31, 0x34, 0x35, 0x70, 0x71, 0x74, 0x75, - 0x32, 0x33, 0x36, 0x37, 0x72, 0x73, 0x76, 0x77, - 0xa0, 0xa1, 0xa4, 0xa5, 0xe0, 0xe1, 0xe4, 0xe5, - 0xa2, 0xa3, 0xa6, 0xa7, 0xe2, 0xe3, 0xe6, 0xe7, - 0xb0, 0xb1, 0xb4, 0xb5, 0xf0, 0xf1, 0xf4, 0xf5, - 0xb2, 0xb3, 0xb6, 0xb7, 0xf2, 0xf3, 0xf6, 0xf7, + 0x25242120, 0x65646160, + 0x27262322, 0x67666362, + 0x35343130, 0x75747170, + 0x37363332, 0x77767372, + 0xa5a4a1a0, 0xe5e4e1e0, + 0xa7a6a3a2, 0xe7e6e3e2, + 0xb5b4b1b0, 0xf5f4f1f0, + 0xb7b6b3b2, 0xf7f6f3f2, }, { - 0x28, 0x29, 0x2c, 0x2d, 0x68, 0x69, 0x6c, 0x6d, - 0x2a, 0x2b, 0x2e, 0x2f, 0x6a, 0x6b, 0x6e, 0x6f, - 0x38, 0x39, 0x3c, 0x3d, 0x78, 0x79, 0x7c, 0x7d, - 0x3a, 0x3b, 0x3e, 0x3f, 0x7a, 0x7b, 0x7e, 0x7f, - 0xa8, 0xa9, 0xac, 0xad, 0xe8, 0xe9, 0xec, 0xed, - 0xaa, 0xab, 0xae, 0xaf, 0xea, 0xeb, 0xee, 0xef, - 0xb8, 0xb9, 0xbc, 0xbd, 0xf8, 0xf9, 0xfc, 0xfd, - 0xba, 0xbb, 0xbe, 0xbf, 0xfa, 0xfb, 0xfe, 0xff, + 0x2d2c2928, 0x6d6c6968, + 0x2f2e2b2a, 0x6f6e6b6a, + 0x3d3c3938, 0x7d7c7978, + 0x3f3e3b3a, 0x7f7e7b7a, + 0xadaca9a8, 0xedece9e8, + 0xafaeabaa, 0xefeeebea, + 0xbdbcb9b8, 0xfdfcf9f8, + 0xbfbebbba, 0xfffefbfa, } }; @@ -77,7 +77,9 @@ void main() { uint col = bitfieldExtract(x, 0, 3); uint row = bitfieldExtract(y, 0, 3); uint lut = bitfieldExtract(z, 0, 2); - uint idx = lut_32bpp[lut][col + row * MICRO_TILE_DIM]; + uint idx_dw = lut_32bpp[lut][(col + row * MICRO_TILE_DIM) >> 2u]; + uint byte_ofs = gl_LocalInvocationID.x & 3u; + uint idx = bitfieldExtract(idx_dw >> (8 * byte_ofs), 0, 8); uint slice_offs = (z >> 2u) * info.c1 * MICRO_TILE_SZ; uint tile_row = y / MICRO_TILE_DIM; diff --git a/src/video_core/host_shaders/detile_macro32x2.comp b/src/video_core/host_shaders/detile_macro32x2.comp index 296311c7a..d161484c1 100644 --- a/src/video_core/host_shaders/detile_macro32x2.comp +++ b/src/video_core/host_shaders/detile_macro32x2.comp @@ -20,46 +20,46 @@ layout(push_constant) uniform image_info { uint c1; } info; -const uint lut_64bpp[][64] = { +const uint lut_64bpp[][16] = { { - 0x00, 0x01, 0x08, 0x09, 0x40, 0x41, 0x48, 0x49, - 0x02, 0x03, 0x0a, 0x0b, 0x42, 0x43, 0x4a, 0x4b, - 0x10, 0x11, 0x18, 0x19, 0x50, 0x51, 0x58, 0x59, - 0x12, 0x13, 0x1a, 0x1b, 0x52, 0x53, 0x5a, 0x5b, - 0x80, 0x81, 0x88, 0x89, 0xc0, 0xc1, 0xc8, 0xc9, - 0x82, 0x83, 0x8a, 0x8b, 0xc2, 0xc3, 0xca, 0xcb, - 0x90, 0x91, 0x98, 0x99, 0xd0, 0xd1, 0xd8, 0xd9, - 0x92, 0x93, 0x9a, 0x9b, 0xd2, 0xd3, 0xda, 0xdb, + 0x09080100, 0x49484140, + 0x0b0a0302, 0x4a4b4342, + 0x19181110, 0x59585150, + 0x1b1a1312, 0x5a5b5352, + 0x89888180, 0xc9c8c1c0, + 0x8b8a8382, 0xcacbc3c2, + 0x99989190, 0xd9d8d1d0, + 0x9b9a9392, 0xdbdad3d2, }, { - 0x04, 0x05, 0x0c, 0x0d, 0x44, 0x45, 0x4c, 0x4d, - 0x06, 0x07, 0x0e, 0x0f, 0x46, 0x47, 0x4e, 0x4f, - 0x14, 0x15, 0x1c, 0x1d, 0x54, 0x55, 0x5c, 0x5d, - 0x16, 0x17, 0x1e, 0x1f, 0x56, 0x57, 0x5e, 0x5f, - 0x84, 0x85, 0x8c, 0x8d, 0xc4, 0xc5, 0xcc, 0xcd, - 0x86, 0x87, 0x8e, 0x8f, 0xc6, 0xc7, 0xce, 0xcf, - 0x94, 0x95, 0x9c, 0x9d, 0xd4, 0xd5, 0xdc, 0xdd, - 0x96, 0x97, 0x9e, 0x9f, 0xd6, 0xd7, 0xde, 0xdf, + 0x0d0c0504, 0x4d4c4544, + 0x0f0e0706, 0x4f4e4746, + 0x1d1c1514, 0x5d5c5554, + 0x1f1e1716, 0x5f5e5756, + 0x8d8c8584, 0xcdccc5c4, + 0x8f8e8786, 0xcfcec7c6, + 0x9d9c9594, 0xdddcd5d4, + 0x9f9e9796, 0xdfded7d6, }, { - 0x20, 0x21, 0x28, 0x29, 0x60, 0x61, 0x68, 0x69, - 0x22, 0x23, 0x2a, 0x2b, 0x62, 0x63, 0x6a, 0x6b, - 0x30, 0x31, 0x38, 0x39, 0x70, 0x71, 0x78, 0x79, - 0x32, 0x33, 0x3a, 0x3b, 0x72, 0x73, 0x7a, 0x7b, - 0xa0, 0xa1, 0xa8, 0xa9, 0xe0, 0xe1, 0xe8, 0xe9, - 0xa2, 0xa3, 0xaa, 0xab, 0xe2, 0xe3, 0xea, 0xeb, - 0xb0, 0xb1, 0xb8, 0xb9, 0xf0, 0xf1, 0xf8, 0xf9, - 0xb2, 0xb3, 0xba, 0xbb, 0xf2, 0xf3, 0xfa, 0xfb, + 0x29282120, 0x69686160, + 0x2b2a2322, 0x6b6a6362, + 0x39383130, 0x79787170, + 0x3b3a3332, 0x7b7a7372, + 0xa9a8a1a0, 0xe9e8e1e0, + 0xabaaa3a2, 0xebeae3e2, + 0xb9b8b1b0, 0xf9f8f1f0, + 0xbbbab3b2, 0xfbfaf3f2, }, { - 0x24, 0x25, 0x2c, 0x2d, 0x64, 0x65, 0x6c, 0x6d, - 0x26, 0x27, 0x2e, 0x2f, 0x66, 0x67, 0x6e, 0x6f, - 0x34, 0x35, 0x3c, 0x3d, 0x74, 0x75, 0x7c, 0x7d, - 0x36, 0x37, 0x3e, 0x3f, 0x76, 0x77, 0x7e, 0x7f, - 0xa4, 0xa5, 0xac, 0xad, 0xe4, 0xe5, 0xec, 0xed, - 0xa6, 0xa7, 0xae, 0xaf, 0xe6, 0xe7, 0xee, 0xef, - 0xb4, 0xb5, 0xbc, 0xbd, 0xf4, 0xf5, 0xfc, 0xfd, - 0xb6, 0xb7, 0xbe, 0xbf, 0xf6, 0xf7, 0xfe, 0xff, + 0x2d2c2524, 0x6d6c6564, + 0x2f2e2726, 0x6f6e6766, + 0x3d3c3534, 0x7d7c7574, + 0x3f3e3736, 0x7f7e7776, + 0xadaca5a4, 0xedece5e4, + 0xafaea7a6, 0xefeee7e6, + 0xbdbcb5b4, 0xfdfcf5f4, + 0xbfbeb7b6, 0xfffef7f6, }, }; @@ -76,7 +76,9 @@ void main() { uint col = bitfieldExtract(x, 0, 3); uint row = bitfieldExtract(y, 0, 3); uint lut = bitfieldExtract(z, 0, 2); - uint idx = lut_64bpp[lut][col + row * MICRO_TILE_DIM]; + uint idx_dw = lut_64bpp[lut][(col + row * MICRO_TILE_DIM) >> 2u]; + uint byte_ofs = gl_LocalInvocationID.x & 3u; + uint idx = bitfieldExtract(idx_dw >> (8 * byte_ofs), 0, 8); uint slice_offs = (z >> 2u) * info.c1 * MICRO_TILE_SZ; uint tile_row = y / MICRO_TILE_DIM; diff --git a/src/video_core/multi_level_page_table.h b/src/video_core/multi_level_page_table.h index 527476f3b..7f3205e1a 100644 --- a/src/video_core/multi_level_page_table.h +++ b/src/video_core/multi_level_page_table.h @@ -39,6 +39,15 @@ public: return &(*first_level_map[l1_page])[l2_page]; } + [[nodiscard]] const Entry* find(size_t page) const { + const size_t l1_page = page >> SecondLevelBits; + const size_t l2_page = page & (NumEntriesPerL1Page - 1); + if (!first_level_map[l1_page]) { + return nullptr; + } + return &(*first_level_map[l1_page])[l2_page]; + } + [[nodiscard]] const Entry& operator[](size_t page) const { const size_t l1_page = page >> SecondLevelBits; const size_t l2_page = page & (NumEntriesPerL1Page - 1); diff --git a/src/video_core/page_manager.cpp b/src/video_core/page_manager.cpp index 556555c25..47ed9e543 100644 --- a/src/video_core/page_manager.cpp +++ b/src/video_core/page_manager.cpp @@ -185,7 +185,7 @@ void PageManager::OnGpuUnmap(VAddr address, size_t size) { void PageManager::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) { static constexpr u64 PageShift = 12; - std::scoped_lock lk{mutex}; + std::scoped_lock lk{lock}; const u64 num_pages = ((addr + size - 1) >> PageShift) - (addr >> PageShift) + 1; const u64 page_start = addr >> PageShift; const u64 page_end = page_start + num_pages; diff --git a/src/video_core/page_manager.h b/src/video_core/page_manager.h index 29a946a8f..f44307f92 100644 --- a/src/video_core/page_manager.h +++ b/src/video_core/page_manager.h @@ -4,8 +4,8 @@ #pragma once #include -#include #include +#include "common/spin_lock.h" #include "common/types.h" namespace Vulkan { @@ -35,8 +35,8 @@ private: struct Impl; std::unique_ptr impl; Vulkan::Rasterizer* rasterizer; - std::mutex mutex; boost::icl::interval_map cached_pages; + Common::SpinLock lock; }; } // namespace VideoCore diff --git a/src/video_core/renderdoc.cpp b/src/video_core/renderdoc.cpp index 7e0994992..b082fd1ca 100644 --- a/src/video_core/renderdoc.cpp +++ b/src/video_core/renderdoc.cpp @@ -65,11 +65,18 @@ void LoadRenderDoc() { #else static constexpr const char RENDERDOC_LIB[] = "librenderdoc.so"; #endif - if (void* mod = dlopen(RENDERDOC_LIB, RTLD_NOW | RTLD_NOLOAD)) { - const auto RENDERDOC_GetAPI = - reinterpret_cast(dlsym(mod, "RENDERDOC_GetAPI")); - const s32 ret = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_6_0, (void**)&rdoc_api); - ASSERT(ret == 1); + // Check if we are running by RDoc GUI + void* mod = dlopen(RENDERDOC_LIB, RTLD_NOW | RTLD_NOLOAD); + if (!mod && Config::isRdocEnabled()) { + // If enabled in config, try to load RDoc runtime in offline mode + if ((mod = dlopen(RENDERDOC_LIB, RTLD_NOW))) { + const auto RENDERDOC_GetAPI = + reinterpret_cast(dlsym(mod, "RENDERDOC_GetAPI")); + const s32 ret = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_6_0, (void**)&rdoc_api); + ASSERT(ret == 1); + } else { + LOG_ERROR(Render, "Cannot load RenderDoc: {}", dlerror()); + } } #endif if (rdoc_api) { diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 25ff88b9d..690d26cfc 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -103,6 +103,7 @@ vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type) { case AmdGpu::PrimitiveType::TriangleList: return vk::PrimitiveTopology::eTriangleList; case AmdGpu::PrimitiveType::TriangleFan: + case AmdGpu::PrimitiveType::Polygon: return vk::PrimitiveTopology::eTriangleFan; case AmdGpu::PrimitiveType::TriangleStrip: return vk::PrimitiveTopology::eTriangleStrip; @@ -116,9 +117,6 @@ vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type) { return vk::PrimitiveTopology::eTriangleStripWithAdjacency; case AmdGpu::PrimitiveType::PatchPrimitive: return vk::PrimitiveTopology::ePatchList; - case AmdGpu::PrimitiveType::Polygon: - // Needs to generate index buffer on the fly. - return vk::PrimitiveTopology::eTriangleList; case AmdGpu::PrimitiveType::QuadList: case AmdGpu::PrimitiveType::RectList: return vk::PrimitiveTopology::ePatchList; @@ -326,6 +324,34 @@ vk::BorderColor BorderColor(AmdGpu::BorderColor color) { } } +vk::ComponentSwizzle ComponentSwizzle(AmdGpu::CompSwizzle comp_swizzle) { + switch (comp_swizzle) { + case AmdGpu::CompSwizzle::Zero: + return vk::ComponentSwizzle::eZero; + case AmdGpu::CompSwizzle::One: + return vk::ComponentSwizzle::eOne; + case AmdGpu::CompSwizzle::Red: + return vk::ComponentSwizzle::eR; + case AmdGpu::CompSwizzle::Green: + return vk::ComponentSwizzle::eG; + case AmdGpu::CompSwizzle::Blue: + return vk::ComponentSwizzle::eB; + case AmdGpu::CompSwizzle::Alpha: + return vk::ComponentSwizzle::eA; + default: + UNREACHABLE(); + } +} + +vk::ComponentMapping ComponentMapping(AmdGpu::CompMapping comp_mapping) { + return vk::ComponentMapping{ + .r = ComponentSwizzle(comp_mapping.r), + .g = ComponentSwizzle(comp_mapping.g), + .b = ComponentSwizzle(comp_mapping.b), + .a = ComponentSwizzle(comp_mapping.a), + }; +} + static constexpr vk::FormatFeatureFlags2 BufferRead = vk::FormatFeatureFlagBits2::eUniformTexelBuffer | vk::FormatFeatureFlagBits2::eVertexBuffer; static constexpr vk::FormatFeatureFlags2 BufferWrite = @@ -540,10 +566,8 @@ std::span SurfaceFormats() { // 10_11_11 CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format10_11_11, AmdGpu::NumberFormat::Float, vk::Format::eB10G11R11UfloatPack32), - // 11_11_10 - CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format11_11_10, AmdGpu::NumberFormat::Float, - vk::Format::eB10G11R11UfloatPack32), - // 10_10_10_2 + // 11_11_10 - Remapped to 10_11_11. + // 10_10_10_2 - Remapped to 2_10_10_10. // 2_10_10_10 CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Unorm, vk::Format::eA2B10G10R10UnormPack32), @@ -616,7 +640,7 @@ std::span SurfaceFormats() { // 1_5_5_5 CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format1_5_5_5, AmdGpu::NumberFormat::Unorm, vk::Format::eR5G5B5A1UnormPack16), - // 5_5_5_1 + // 5_5_5_1 - Remapped to 1_5_5_5. // 4_4_4_4 CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format4_4_4_4, AmdGpu::NumberFormat::Unorm, vk::Format::eR4G4B4A4UnormPack16), @@ -667,41 +691,40 @@ std::span SurfaceFormats() { return formats; } -vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) { - const auto& formats = SurfaceFormats(); - const auto format = - std::find_if(formats.begin(), formats.end(), [&](const SurfaceFormatInfo& format_info) { - return format_info.data_format == data_format && - format_info.number_format == num_format; - }); - ASSERT_MSG(format != formats.end(), "Unknown data_format={} and num_format={}", - static_cast(data_format), static_cast(num_format)); - return format->vk_format; +// Table 8.13 Data and Image Formats [Sea Islands Series Instruction Set Architecture] +static const size_t amd_gpu_data_format_bit_size = 6; // All values are under 64 +static const size_t amd_gpu_number_format_bit_size = 4; // All values are under 16 + +static size_t GetSurfaceFormatTableIndex(AmdGpu::DataFormat data_format, + AmdGpu::NumberFormat num_format) { + DEBUG_ASSERT(u32(data_format) < 1 << amd_gpu_data_format_bit_size); + DEBUG_ASSERT(u32(num_format) < 1 << amd_gpu_number_format_bit_size); + size_t result = static_cast(num_format) | + (static_cast(data_format) << amd_gpu_number_format_bit_size); + return result; } -vk::Format AdjustColorBufferFormat(vk::Format base_format, - Liverpool::ColorBuffer::SwapMode comp_swap) { - const bool comp_swap_alt = comp_swap == Liverpool::ColorBuffer::SwapMode::Alternate; - const bool comp_swap_reverse = comp_swap == Liverpool::ColorBuffer::SwapMode::StandardReverse; - const bool comp_swap_alt_reverse = - comp_swap == Liverpool::ColorBuffer::SwapMode::AlternateReverse; - if (comp_swap_alt) { - switch (base_format) { - case vk::Format::eR8G8B8A8Unorm: - return vk::Format::eB8G8R8A8Unorm; - case vk::Format::eB8G8R8A8Unorm: - return vk::Format::eR8G8B8A8Unorm; - case vk::Format::eR8G8B8A8Srgb: - return vk::Format::eB8G8R8A8Srgb; - case vk::Format::eB8G8R8A8Srgb: - return vk::Format::eR8G8B8A8Srgb; - case vk::Format::eA2B10G10R10UnormPack32: - return vk::Format::eA2R10G10B10UnormPack32; - default: - break; - } +static auto surface_format_table = []() constexpr { + std::array + result; + for (auto& entry : result) { + entry = vk::Format::eUndefined; } - return base_format; + for (const auto& supported_format : SurfaceFormats()) { + result[GetSurfaceFormatTableIndex(supported_format.data_format, + supported_format.number_format)] = + supported_format.vk_format; + } + return result; +}(); + +vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) { + vk::Format result = surface_format_table[GetSurfaceFormatTableIndex(data_format, num_format)]; + bool found = + result != vk::Format::eUndefined || data_format == AmdGpu::DataFormat::FormatInvalid; + ASSERT_MSG(found, "Unknown data_format={} and num_format={}", static_cast(data_format), + static_cast(num_format)); + return result; } static constexpr DepthFormatInfo CreateDepthFormatInfo( @@ -746,21 +769,12 @@ vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat } vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color_buffer) { - const auto comp_swap = color_buffer.info.comp_swap.Value(); - const auto format = color_buffer.info.format.Value(); - const auto number_type = color_buffer.info.number_type.Value(); + const auto comp_swizzle = color_buffer.Swizzle(); + const auto format = color_buffer.GetDataFmt(); + const auto number_type = color_buffer.GetNumberFmt(); const auto& c0 = color_buffer.clear_word0; const auto& c1 = color_buffer.clear_word1; - const auto num_bits = AmdGpu::NumBits(color_buffer.info.format); - const auto num_components = AmdGpu::NumComponents(format); - - const bool comp_swap_alt = - comp_swap == AmdGpu::Liverpool::ColorBuffer::SwapMode::Alternate || - comp_swap == AmdGpu::Liverpool::ColorBuffer::SwapMode::AlternateReverse; - const bool comp_swap_reverse = - comp_swap == AmdGpu::Liverpool::ColorBuffer::SwapMode::StandardReverse || - comp_swap == AmdGpu::Liverpool::ColorBuffer::SwapMode::AlternateReverse; vk::ClearColorValue color{}; @@ -1081,26 +1095,7 @@ vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color break; } - if (num_components == 1) { - if (comp_swap != Liverpool::ColorBuffer::SwapMode::Standard) { - color.float32[static_cast(comp_swap)] = color.float32[0]; - color.float32[0] = 0.0f; - } - } else { - if (comp_swap_alt && num_components == 4) { - std::swap(color.float32[0], color.float32[2]); - } - - if (comp_swap_reverse) { - std::reverse(std::begin(color.float32), std::begin(color.float32) + num_components); - } - - if (comp_swap_alt && num_components != 4) { - color.float32[3] = color.float32[num_components - 1]; - color.float32[num_components - 1] = 0.0f; - } - } - + color.float32 = comp_swizzle.Apply(color.float32); return {.color = color}; } diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.h b/src/video_core/renderer_vulkan/liverpool_to_vk.h index d5f8e693b..a68280e7d 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.h +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h @@ -42,6 +42,10 @@ vk::SamplerMipmapMode MipFilter(AmdGpu::MipFilter filter); vk::BorderColor BorderColor(AmdGpu::BorderColor color); +vk::ComponentSwizzle ComponentSwizzle(AmdGpu::CompSwizzle comp_swizzle); + +vk::ComponentMapping ComponentMapping(AmdGpu::CompMapping comp_mapping); + struct SurfaceFormatInfo { AmdGpu::DataFormat data_format; AmdGpu::NumberFormat number_format; @@ -52,9 +56,6 @@ std::span SurfaceFormats(); vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format); -vk::Format AdjustColorBufferFormat(vk::Format base_format, - Liverpool::ColorBuffer::SwapMode comp_swap); - struct DepthFormatInfo { Liverpool::DepthBuffer::ZFormat z_format; Liverpool::DepthBuffer::StencilFormat stencil_format; @@ -70,15 +71,6 @@ vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color vk::SampleCountFlagBits NumSamples(u32 num_samples, vk::SampleCountFlags supported_flags); -inline void EmitPolygonToTriangleListIndices(u8* out_ptr, u32 num_vertices) { - u16* out_data = reinterpret_cast(out_ptr); - for (u16 i = 1; i < num_vertices - 1; i++) { - *out_data++ = 0; - *out_data++ = i; - *out_data++ = i + 1; - } -} - static inline vk::Format PromoteFormatToDepth(vk::Format fmt) { if (fmt == vk::Format::eR32Sfloat) { return vk::Format::eD32Sfloat; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index ee8afa3e6..c8f4999b1 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -32,7 +32,7 @@ struct GraphicsPipelineKey { u32 num_color_attachments; std::array color_formats; std::array color_num_formats; - std::array mrt_swizzles; + std::array color_swizzles; vk::Format depth_format; vk::Format stencil_format; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index c880cad70..ba069dae1 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -168,7 +168,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS for (u32 i = 0; i < Shader::MaxColorBuffers; i++) { info.fs_info.color_buffers[i] = { .num_format = graphics_key.color_num_formats[i], - .mrt_swizzle = static_cast(graphics_key.mrt_swizzles[i]), + .swizzle = graphics_key.color_swizzles[i], }; } break; @@ -304,7 +304,7 @@ bool PipelineCache::RefreshGraphicsKey() { key.color_num_formats.fill(AmdGpu::NumberFormat::Unorm); key.blend_controls.fill({}); key.write_masks.fill({}); - key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard); + key.color_swizzles.fill({}); key.vertex_buffer_formats.fill(vk::Format::eUndefined); key.patch_control_points = 0; @@ -327,14 +327,10 @@ bool PipelineCache::RefreshGraphicsKey() { continue; } - const auto base_format = - LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat()); key.color_formats[remapped_cb] = - LiverpoolToVK::AdjustColorBufferFormat(base_format, col_buf.info.comp_swap.Value()); - key.color_num_formats[remapped_cb] = col_buf.NumFormat(); - if (base_format == key.color_formats[remapped_cb]) { - key.mrt_swizzles[remapped_cb] = col_buf.info.comp_swap.Value(); - } + LiverpoolToVK::SurfaceFormat(col_buf.GetDataFmt(), col_buf.GetNumberFmt()); + key.color_num_formats[remapped_cb] = col_buf.GetNumberFmt(); + key.color_swizzles[remapped_cb] = col_buf.Swizzle(); } fetch_shader = std::nullopt; @@ -450,7 +446,7 @@ bool PipelineCache::RefreshGraphicsKey() { // of the latter we need to change format to undefined, and either way we need to // increment the index for the null attachment binding. key.color_formats[remapped_cb] = vk::Format::eUndefined; - key.mrt_swizzles[remapped_cb] = Liverpool::ColorBuffer::SwapMode::Standard; + key.color_swizzles[remapped_cb] = {}; ++remapped_cb; continue; } diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp index ab61af6a4..7f0bcb5d2 100644 --- a/src/video_core/renderer_vulkan/vk_platform.cpp +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -283,6 +283,9 @@ vk::UniqueInstance CreateInstance(Frontend::WindowSystemType window_type, bool e Common::FS::GetUserPathString(Common::FS::PathType::LogDir); const char* log_path = crash_diagnostic_path.c_str(); vk::Bool32 enable_force_barriers = vk::True; +#ifdef __APPLE__ + const vk::Bool32 mvk_debug_mode = enable_crash_diagnostic ? vk::True : vk::False; +#endif const std::array layer_setings = { vk::LayerSettingEXT{ @@ -356,7 +359,7 @@ vk::UniqueInstance CreateInstance(Frontend::WindowSystemType window_type, bool e .pSettingName = "MVK_CONFIG_DEBUG", .type = vk::LayerSettingTypeEXT::eBool32, .valueCount = 1, - .pValues = &enable_crash_diagnostic, + .pValues = &mvk_debug_mode, } #endif }; diff --git a/src/video_core/renderer_vulkan/vk_presenter.cpp b/src/video_core/renderer_vulkan/vk_presenter.cpp index bc55cde23..93129842f 100644 --- a/src/video_core/renderer_vulkan/vk_presenter.cpp +++ b/src/video_core/renderer_vulkan/vk_presenter.cpp @@ -628,6 +628,13 @@ Frame* Presenter::PrepareFrameInternal(VideoCore::ImageId image_id, bool is_eop) } void Presenter::Present(Frame* frame) { + // Free the frame for reuse + const auto free_frame = [&] { + std::scoped_lock fl{free_mutex}; + free_queue.push(frame); + free_cv.notify_one(); + }; + // Recreate the swapchain if the window was resized. if (window.GetWidth() != swapchain.GetExtent().width || window.GetHeight() != swapchain.GetExtent().height) { @@ -636,8 +643,19 @@ void Presenter::Present(Frame* frame) { if (!swapchain.AcquireNextImage()) { swapchain.Recreate(window.GetWidth(), window.GetHeight()); + if (!swapchain.AcquireNextImage()) { + // User resizes the window too fast and GPU can't keep up. Skip this frame. + LOG_WARNING(Render_Vulkan, "Skipping frame!"); + free_frame(); + return; + } } + // Reset fence for queue submission. Do it here instead of GetRenderFrame() because we may + // skip frame because of slow swapchain recreation. If a frame skip occurs, we skip signal + // the frame's present fence and future GetRenderFrame() call will hang waiting for this frame. + instance.GetDevice().resetFences(frame->present_done); + ImGui::Core::NewFrame(); const vk::Image swapchain_image = swapchain.Image(); @@ -737,11 +755,7 @@ void Presenter::Present(Frame* frame) { swapchain.Recreate(window.GetWidth(), window.GetHeight()); } - // Free the frame for reuse - std::scoped_lock fl{free_mutex}; - free_queue.push(frame); - free_cv.notify_one(); - + free_frame(); DebugState.IncFlipFrameNum(); } @@ -776,9 +790,6 @@ Frame* Presenter::GetRenderFrame() { } } - // Reset fence for next queue submission. - device.resetFences(frame->present_done); - // If the window dimensions changed, recreate this frame if (frame->width != window.GetWidth() || frame->height != window.GetHeight()) { RecreateFrame(frame, window.GetWidth(), window.GetHeight()); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index d458fa124..6e628239b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -12,7 +12,6 @@ #include "video_core/renderer_vulkan/vk_shader_hle.h" #include "video_core/texture_cache/image_view.h" #include "video_core/texture_cache/texture_cache.h" -#include "vk_rasterizer.h" #ifdef MemoryBarrier #undef MemoryBarrier @@ -252,7 +251,9 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex); const auto& fetch_shader = pipeline->GetFetchShader(); buffer_cache.BindVertexBuffers(vs_info, fetch_shader); - const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset); + if (is_indexed) { + buffer_cache.BindIndexBuffer(index_offset); + } BeginRendering(*pipeline, state); UpdateDynamicState(*pipeline); @@ -263,10 +264,11 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); if (is_indexed) { - cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, s32(vertex_offset), - instance_offset); + cmdbuf.drawIndexed(regs.num_indices, regs.num_instances.NumInstances(), 0, + s32(vertex_offset), instance_offset); } else { - cmdbuf.draw(num_indices, regs.num_instances.NumInstances(), vertex_offset, instance_offset); + cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), vertex_offset, + instance_offset); } ResetBindings(); @@ -280,22 +282,12 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 return; } - const auto& regs = liverpool->regs; - if (regs.primitive_type == AmdGpu::PrimitiveType::Polygon) { - // We use a generated index buffer to convert polygons to triangles. Since it - // changes type of the draw, arguments are not valid for this case. We need to run a - // conversion pass to repack the indirect arguments buffer first. - LOG_WARNING(Render_Vulkan, "Primitive type is not supported for indirect draw"); - return; - } - const GraphicsPipeline* pipeline = pipeline_cache.GetGraphicsPipeline(); if (!pipeline) { return; } auto state = PrepareRenderState(pipeline->GetMrtMask()); - if (!BindResources(pipeline)) { return; } @@ -303,7 +295,9 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex); const auto& fetch_shader = pipeline->GetFetchShader(); buffer_cache.BindVertexBuffers(vs_info, fetch_shader); - buffer_cache.BindIndexBuffer(is_indexed, 0); + if (is_indexed) { + buffer_cache.BindIndexBuffer(0); + } const auto& [buffer, base] = buffer_cache.ObtainBuffer(arg_address + offset, stride * max_count, false); @@ -568,6 +562,12 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding push_data.AddOffset(binding.buffer, adjust); buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, vsharp.GetSize() + adjust); + if (auto barrier = + vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite + : vk::AccessFlagBits2::eShaderRead, + vk::PipelineStageFlagBits2::eAllCommands)) { + buffer_barriers.emplace_back(*barrier); + } } set_writes.push_back({ @@ -606,7 +606,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding if (auto barrier = vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite : vk::AccessFlagBits2::eShaderRead, - vk::PipelineStageFlagBits2::eComputeShader)) { + vk::PipelineStageFlagBits2::eAllCommands)) { buffer_barriers.emplace_back(*barrier); } if (desc.is_written) { diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 380660a2f..44f4be6dd 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -84,6 +84,7 @@ void Swapchain::Create(u32 width_, u32 height_, vk::SurfaceKHR surface_) { } void Swapchain::Recreate(u32 width_, u32 height_) { + LOG_DEBUG(Render_Vulkan, "Recreate the swapchain: width={} height={}", width_, height_); Create(width_, height_, surface); } diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 2cc4aab38..adc72c21f 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -265,9 +265,9 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, const AmdGpu::Liverpool::CbDbExtent& hint /*= {}*/) noexcept { props.is_tiled = buffer.IsTiled(); tiling_mode = buffer.GetTilingMode(); - pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat()); + pixel_format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()); num_samples = buffer.NumSamples(); - num_bits = NumBits(buffer.info.format); + num_bits = NumBits(buffer.GetDataFmt()); type = vk::ImageType::e2D; size.width = hint.Valid() ? hint.width : buffer.Pitch(); size.height = hint.Valid() ? hint.height : buffer.Height(); diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 9e67b7f73..68b116558 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -31,25 +31,6 @@ vk::ImageViewType ConvertImageViewType(AmdGpu::ImageType type) { } } -vk::ComponentSwizzle ConvertComponentSwizzle(u32 dst_sel) { - switch (dst_sel) { - case 0: - return vk::ComponentSwizzle::eZero; - case 1: - return vk::ComponentSwizzle::eOne; - case 4: - return vk::ComponentSwizzle::eR; - case 5: - return vk::ComponentSwizzle::eG; - case 6: - return vk::ComponentSwizzle::eB; - case 7: - return vk::ComponentSwizzle::eA; - default: - UNREACHABLE(); - } -} - ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept : is_storage{desc.IsStorage(image)} { const auto dfmt = image.GetDataFmt(); @@ -87,21 +68,16 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageReso } if (!is_storage) { - mapping.r = ConvertComponentSwizzle(image.dst_sel_x); - mapping.g = ConvertComponentSwizzle(image.dst_sel_y); - mapping.b = ConvertComponentSwizzle(image.dst_sel_z); - mapping.a = ConvertComponentSwizzle(image.dst_sel_w); + mapping = Vulkan::LiverpoolToVK::ComponentMapping(image.DstSelect()); } } ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer) noexcept { - const auto base_format = - Vulkan::LiverpoolToVK::SurfaceFormat(col_buffer.info.format, col_buffer.NumFormat()); range.base.layer = col_buffer.view.slice_start; range.extent.layers = col_buffer.NumSlices() - range.base.layer; type = range.extent.layers > 1 ? vk::ImageViewType::e2DArray : vk::ImageViewType::e2D; - format = Vulkan::LiverpoolToVK::AdjustColorBufferFormat(base_format, - col_buffer.info.comp_swap.Value()); + format = + Vulkan::LiverpoolToVK::SurfaceFormat(col_buffer.GetDataFmt(), col_buffer.GetNumberFmt()); } ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer, diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 897d6f67e..291e1da7c 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -542,31 +542,62 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule sched_ptr->EndRendering(); const auto cmdbuf = sched_ptr->CommandBuffer(); - image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}, - cmdbuf); - const VAddr image_addr = image.info.guest_address; const size_t image_size = image.info.guest_size_bytes; const auto [vk_buffer, buf_offset] = buffer_cache.ObtainViewBuffer(image_addr, image_size, is_gpu_dirty); + // The obtained buffer may be written by a shader so we need to emit a barrier to prevent RAW // hazard if (auto barrier = vk_buffer->GetBarrier(vk::AccessFlagBits2::eTransferRead, vk::PipelineStageFlagBits2::eTransfer)) { - const auto dependencies = vk::DependencyInfo{ + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ .dependencyFlags = vk::DependencyFlagBits::eByRegion, .bufferMemoryBarrierCount = 1, .pBufferMemoryBarriers = &barrier.value(), - }; - cmdbuf.pipelineBarrier2(dependencies); + }); } - const auto [buffer, offset] = tile_manager.TryDetile(vk_buffer->Handle(), buf_offset, image); + const auto [buffer, offset] = + tile_manager.TryDetile(vk_buffer->Handle(), buf_offset, image.info); for (auto& copy : image_copy) { copy.bufferOffset += offset; } + const vk::BufferMemoryBarrier2 pre_barrier{ + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferRead, + .buffer = buffer, + .offset = offset, + .size = image_size, + }; + const vk::BufferMemoryBarrier2 post_barrier{ + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite, + .buffer = buffer, + .offset = offset, + .size = image_size, + }; + const auto image_barriers = + image.GetBarriers(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, + vk::PipelineStageFlagBits2::eTransfer, {}); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &pre_barrier, + .imageMemoryBarrierCount = static_cast(image_barriers.size()), + .pImageMemoryBarriers = image_barriers.data(), + }); cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &post_barrier, + }); image.flags &= ~ImageFlagBits::Dirty; } diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index de108843b..c1243dafb 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -4,6 +4,7 @@ #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/texture_cache/image_info.h" #include "video_core/texture_cache/image_view.h" #include "video_core/texture_cache/tile_manager.h" @@ -32,6 +33,7 @@ static vk::Format DemoteImageFormatForDetiling(vk::Format format) { case vk::Format::eR5G5B5A1UnormPack16: case vk::Format::eR8G8Unorm: case vk::Format::eR16Sfloat: + case vk::Format::eR16Uint: case vk::Format::eR16Unorm: case vk::Format::eD16Unorm: return vk::Format::eR8G8Uint; @@ -85,10 +87,10 @@ static vk::Format DemoteImageFormatForDetiling(vk::Format format) { return format; } -const DetilerContext* TileManager::GetDetiler(const Image& image) const { - const auto format = DemoteImageFormatForDetiling(image.info.pixel_format); +const DetilerContext* TileManager::GetDetiler(const ImageInfo& info) const { + const auto format = DemoteImageFormatForDetiling(info.pixel_format); - switch (image.info.tiling_mode) { + switch (info.tiling_mode) { case AmdGpu::TilingMode::Texture_MicroTiled: switch (format) { case vk::Format::eR8Uint: @@ -257,23 +259,23 @@ void TileManager::FreeBuffer(ScratchBuffer buffer) { } std::pair TileManager::TryDetile(vk::Buffer in_buffer, u32 in_offset, - Image& image) { - if (!image.info.props.is_tiled) { + const ImageInfo& info) { + if (!info.props.is_tiled) { return {in_buffer, in_offset}; } - const auto* detiler = GetDetiler(image); + const auto* detiler = GetDetiler(info); if (!detiler) { - if (image.info.tiling_mode != AmdGpu::TilingMode::Texture_MacroTiled && - image.info.tiling_mode != AmdGpu::TilingMode::Display_MacroTiled && - image.info.tiling_mode != AmdGpu::TilingMode::Depth_MacroTiled) { + if (info.tiling_mode != AmdGpu::TilingMode::Texture_MacroTiled && + info.tiling_mode != AmdGpu::TilingMode::Display_MacroTiled && + info.tiling_mode != AmdGpu::TilingMode::Depth_MacroTiled) { LOG_ERROR(Render_Vulkan, "Unsupported tiled image: {} ({})", - vk::to_string(image.info.pixel_format), NameOf(image.info.tiling_mode)); + vk::to_string(info.pixel_format), NameOf(info.tiling_mode)); } return {in_buffer, in_offset}; } - const u32 image_size = image.info.guest_size_bytes; + const u32 image_size = info.guest_size_bytes; // Prepare output buffer auto out_buffer = AllocBuffer(image_size, true); @@ -316,22 +318,21 @@ std::pair TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o set_writes); DetilerParams params; - params.num_levels = image.info.resources.levels; - params.pitch0 = image.info.pitch >> (image.info.props.is_block ? 2u : 0u); - params.height = image.info.size.height; - if (image.info.tiling_mode == AmdGpu::TilingMode::Texture_Volume) { - ASSERT(image.info.resources.levels == 1); - ASSERT(image.info.num_bits >= 32); - const auto tiles_per_row = image.info.pitch / 8u; - const auto tiles_per_slice = tiles_per_row * ((image.info.size.height + 7u) / 8u); + params.num_levels = info.resources.levels; + params.pitch0 = info.pitch >> (info.props.is_block ? 2u : 0u); + params.height = info.size.height; + if (info.tiling_mode == AmdGpu::TilingMode::Texture_Volume) { + ASSERT(info.resources.levels == 1); + ASSERT(info.num_bits >= 32); + const auto tiles_per_row = info.pitch / 8u; + const auto tiles_per_slice = tiles_per_row * ((info.size.height + 7u) / 8u); params.sizes[0] = tiles_per_row; params.sizes[1] = tiles_per_slice; } else { - - ASSERT(image.info.resources.levels <= 14); + ASSERT(info.resources.levels <= 14); std::memset(¶ms.sizes, 0, sizeof(params.sizes)); - for (int m = 0; m < image.info.resources.levels; ++m) { - params.sizes[m] = image.info.mips_layout[m].size * image.info.resources.layers + + for (int m = 0; m < info.resources.levels; ++m) { + params.sizes[m] = info.mips_layout[m].size * info.resources.layers + (m > 0 ? params.sizes[m - 1] : 0); } } @@ -340,20 +341,9 @@ std::pair TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o ¶ms); ASSERT((image_size % 64) == 0); - const auto bpp = image.info.num_bits * (image.info.props.is_block ? 16u : 1u); + const auto bpp = info.num_bits * (info.props.is_block ? 16u : 1u); const auto num_tiles = image_size / (64 * (bpp / 8)); cmdbuf.dispatch(num_tiles, 1, 1); - - const vk::BufferMemoryBarrier post_barrier{ - .srcAccessMask = vk::AccessFlagBits::eShaderWrite, - .dstAccessMask = vk::AccessFlagBits::eTransferRead, - .buffer = out_buffer.first, - .size = image_size, - }; - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, - vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, - {}, post_barrier, {}); - return {out_buffer.first, 0}; } diff --git a/src/video_core/texture_cache/tile_manager.h b/src/video_core/texture_cache/tile_manager.h index 72860bca0..1d731d2f2 100644 --- a/src/video_core/texture_cache/tile_manager.h +++ b/src/video_core/texture_cache/tile_manager.h @@ -5,11 +5,11 @@ #include "common/types.h" #include "video_core/buffer_cache/buffer.h" -#include "video_core/texture_cache/image.h" namespace VideoCore { class TextureCache; +struct ImageInfo; enum DetilerType : u32 { Micro8x1, @@ -36,14 +36,15 @@ public: TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler); ~TileManager(); - std::pair TryDetile(vk::Buffer in_buffer, u32 in_offset, Image& image); + std::pair TryDetile(vk::Buffer in_buffer, u32 in_offset, + const ImageInfo& info); ScratchBuffer AllocBuffer(u32 size, bool is_storage = false); void Upload(ScratchBuffer buffer, const void* data, size_t size); void FreeBuffer(ScratchBuffer buffer); private: - const DetilerContext* GetDetiler(const Image& image) const; + const DetilerContext* GetDetiler(const ImageInfo& info) const; private: const Vulkan::Instance& instance;