diff --git a/.gitmodules b/.gitmodules index fb859c87d..8010250a9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -102,6 +102,8 @@ [submodule "externals/LibAtrac9"] path = externals/LibAtrac9 url = https://github.com/shadps4-emu/ext-LibAtrac9.git + shallow = true [submodule "externals/libpng"] path = externals/libpng url = https://github.com/pnggroup/libpng + shallow = true \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 447e48f5e..aa84139ef 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -113,13 +113,14 @@ find_package(FFmpeg 5.1.2 MODULE) find_package(fmt 10.2.0 CONFIG) find_package(glslang 15 CONFIG) find_package(half 1.12.0 MODULE) -find_package(magic_enum 0.9.6 CONFIG) +find_package(magic_enum 0.9.7 CONFIG) find_package(PNG 1.6 MODULE) find_package(RenderDoc 1.6.0 MODULE) find_package(SDL3 3.1.2 CONFIG) +find_package(stb MODULE) find_package(toml11 4.2.0 CONFIG) find_package(tsl-robin-map 1.3.0 CONFIG) -find_package(VulkanHeaders 1.3.289 CONFIG) +find_package(VulkanHeaders 1.4.303 CONFIG) find_package(VulkanMemoryAllocator 3.1.0 CONFIG) find_package(xbyak 7.07 CONFIG) find_package(xxHash 0.8.2 MODULE) @@ -209,7 +210,10 @@ set(GNM_LIB src/core/libraries/gnmdriver/gnmdriver.cpp src/core/libraries/gnmdriver/gnm_error.h ) -set(KERNEL_LIB src/core/libraries/kernel/threads/condvar.cpp +set(KERNEL_LIB src/core/libraries/kernel/sync/mutex.cpp + src/core/libraries/kernel/sync/mutex.h + src/core/libraries/kernel/sync/semaphore.h + src/core/libraries/kernel/threads/condvar.cpp src/core/libraries/kernel/threads/event_flag.cpp src/core/libraries/kernel/threads/exception.cpp src/core/libraries/kernel/threads/exception.h @@ -495,6 +499,8 @@ set(COMMON src/common/logging/backend.cpp src/common/slot_vector.h src/common/spin_lock.cpp src/common/spin_lock.h + src/common/stb.cpp + src/common/stb.h src/common/string_util.cpp src/common/string_util.h src/common/thread.cpp @@ -502,6 +508,7 @@ set(COMMON src/common/logging/backend.cpp src/common/types.h src/common/uint128.h src/common/unique_function.h + src/common/va_ctx.h src/common/version.h src/common/ntapi.h src/common/ntapi.cpp @@ -526,6 +533,12 @@ set(CORE src/core/aerolib/stubs.cpp src/core/crypto/crypto.cpp src/core/crypto/crypto.h src/core/crypto/keys.h + src/core/devices/base_device.cpp + src/core/devices/base_device.h + src/core/devices/ioccom.h + src/core/devices/logger.cpp + src/core/devices/logger.h + src/core/devices/nop_device.h src/core/file_format/pfs.h src/core/file_format/pkg.cpp src/core/file_format/pkg.h @@ -725,6 +738,8 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/renderer_vulkan/vk_resource_pool.h src/video_core/renderer_vulkan/vk_scheduler.cpp src/video_core/renderer_vulkan/vk_scheduler.h + src/video_core/renderer_vulkan/vk_shader_hle.cpp + src/video_core/renderer_vulkan/vk_shader_hle.h src/video_core/renderer_vulkan/vk_shader_util.cpp src/video_core/renderer_vulkan/vk_shader_util.h src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -862,11 +877,19 @@ endif() create_target_directory_groups(shadps4) target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API FFmpeg::ffmpeg Dear_ImGui gcn half::half ZLIB::ZLIB PNG::PNG) -target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::SPIRV glslang::glslang SDL3::SDL3 pugixml::pugixml) +target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::SPIRV glslang::glslang SDL3::SDL3 pugixml::pugixml stb::headers) target_compile_definitions(shadps4 PRIVATE IMGUI_USER_CONFIG="imgui/imgui_config.h") target_compile_definitions(Dear_ImGui PRIVATE IMGUI_USER_CONFIG="${PROJECT_SOURCE_DIR}/src/imgui/imgui_config.h") +if (ENABLE_DISCORD_RPC) + target_compile_definitions(shadps4 PRIVATE ENABLE_DISCORD_RPC) +endif() + +if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") + target_compile_definitions(shadps4 PRIVATE ENABLE_USERFAULTFD) +endif() + if (APPLE) option(USE_SYSTEM_VULKAN_LOADER "Enables using the system Vulkan loader instead of directly linking with MoltenVK. Useful for loading validation layers." OFF) if (USE_SYSTEM_VULKAN_LOADER) @@ -995,4 +1018,4 @@ if (ENABLE_QT_GUI AND CMAKE_SYSTEM_NAME STREQUAL "Linux") install(FILES "dist/net.shadps4.shadPS4.metainfo.xml" DESTINATION "share/metainfo") install(FILES ".github/shadps4.png" DESTINATION "share/icons/hicolor/512x512/apps" RENAME "net.shadps4.shadPS4.png") install(FILES "src/images/net.shadps4.shadPS4.svg" DESTINATION "share/icons/hicolor/scalable/apps") -endif() +endif() \ No newline at end of file diff --git a/REUSE.toml b/REUSE.toml index 2d94c9292..747679c8b 100644 --- a/REUSE.toml +++ b/REUSE.toml @@ -12,12 +12,13 @@ path = [ "dist/net.shadps4.shadPS4_metadata.pot", "dist/net.shadps4.shadPS4.metainfo.xml", "dist/net.shadps4.shadPS4.releases.xml", - "documents/changelog.txt", + "documents/changelog.md", "documents/Quickstart/2.png", "documents/Screenshots/*", "scripts/ps4_names.txt", "src/images/about_icon.png", "src/images/controller_icon.png", + "src/images/discord.png", "src/images/dump_icon.png", "src/images/exit_icon.png", "src/images/file_icon.png", @@ -28,8 +29,10 @@ path = [ "src/images/flag_us.png", "src/images/flag_world.png", "src/images/folder_icon.png", + "src/images/github.png", "src/images/grid_icon.png", "src/images/iconsize_icon.png", + "src/images/ko-fi.png", "src/images/list_icon.png", "src/images/list_mode_icon.png", "src/images/pause_icon.png", @@ -43,6 +46,8 @@ path = [ "src/images/net.shadps4.shadPS4.svg", "src/images/themes_icon.png", "src/images/update_icon.png", + "src/images/youtube.png", + "src/images/website.png", "src/shadps4.qrc", "src/shadps4.rc", ] @@ -63,7 +68,7 @@ SPDX-FileCopyrightText = "2019-2024 Baldur Karlsson" SPDX-License-Identifier = "MIT" [[annotations]] -path = "externals/stb_image.h" +path = "externals/stb/**" precedence = "aggregate" SPDX-FileCopyrightText = "2017 Sean Barrett" SPDX-License-Identifier = "MIT" diff --git a/cmake/Findstb.cmake b/cmake/Findstb.cmake new file mode 100644 index 000000000..667911e1d --- /dev/null +++ b/cmake/Findstb.cmake @@ -0,0 +1,19 @@ +# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +# SPDX-License-Identifier: GPL-2.0-or-later + +find_path(stb_image_INCLUDE_DIR stb_image.h PATH_SUFFIXES stb) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(stb + REQUIRED_VARS stb_image_INCLUDE_DIR +) + +if (stb_FOUND AND NOT TARGET stb::headers) + add_library(stb::headers INTERFACE IMPORTED) + set_property(TARGET stb::headers PROPERTY + INTERFACE_INCLUDE_DIRECTORIES + "${stb_image_INCLUDE_DIR}" + ) +endif() + +mark_as_advanced(stb_image_INCLUDE_DIR) diff --git a/documents/building-windows.md b/documents/building-windows.md index 48fd09c41..d01e7b81e 100644 --- a/documents/building-windows.md +++ b/documents/building-windows.md @@ -25,8 +25,8 @@ Once you are within the installer: Beware, this requires you to create a Qt account. If you do not want to do this, please follow the MSYS2/MinGW compilation method instead. -1. Under the current, non beta version of Qt (at the time of writing 6.7.2), select the option `MSVC 2019 64-bit` or similar. - If you are on Windows on ARM / Qualcomm Snapdragon Elite X, select `MSVC 2019 ARM64` instead. +1. Under the current, non beta version of Qt (at the time of writing 6.7.3), select the option `MSVC 2022 64-bit` or similar. + If you are on Windows on ARM / Qualcomm Snapdragon Elite X, select `MSVC 2022 ARM64` instead. Go through the installation normally. If you know what you are doing, you may unselect individual components that eat up too much disk space. @@ -35,7 +35,7 @@ Beware, this requires you to create a Qt account. If you do not want to do this, Once you are finished, you will have to configure Qt within Visual Studio: 1. Tools -> Options -> Qt -> Versions -2. Add a new Qt version and navigate it to the correct folder. Should look like so: `C:\Qt\6.7.2\msvc2019_64` +2. Add a new Qt version and navigate it to the correct folder. Should look like so: `C:\Qt\6.7.3\msvc2022_64` 3. Enable the default checkmark on the new version you just created. ### (Prerequisite) Download [**Git for Windows**](https://git-scm.com/download/win) @@ -55,16 +55,16 @@ Go through the Git for Windows installation as normal 3. If you want to build shadPS4 with the Qt Gui: 1. Click x64-Clang-Release and select "Manage Configurations" 2. Look for "CMake command arguments" and add to the text field - `-DENABLE_QT_GUI=ON -DCMAKE_PREFIX_PATH=C:\Qt\6.7.2\msvc2019_64` + `-DENABLE_QT_GUI=ON -DCMAKE_PREFIX_PATH=C:\Qt\6.7.3\msvc2022_64` (Change Qt path if you've installed it to non-default path) 3. Press CTRL+S to save and wait a moment for CMake generation 4. Change the project to build to shadps4.exe 5. Build -> Build All -Your shadps4.exe will be in `c:\path\to\source\Build\x64-Clang-Release\` +Your shadps4.exe will be in `C:\path\to\source\Build\x64-Clang-Release\` To automatically populate the necessary files to run shadPS4.exe, run in a command prompt or terminal: -`C:\Qt\6.7.2\msvc2019_64\bin\windeployqt.exe "c:\path\to\shadps4.exe"` +`C:\Qt\6.7.3\msvc2022_64\bin\windeployqt.exe "C:\path\to\shadps4.exe"` (Change Qt path if you've installed it to non-default path) ## Option 2: MSYS2/MinGW @@ -79,7 +79,7 @@ Normal x86-based computers, follow: 1. Open "MSYS2 MINGW64" from your new applications 2. Run `pacman -Syu`, let it complete; -3. Run `pacman -S --needed git mingw-w64-x86_64-binutils mingw-w64-x86_64-clang mingw-w64-x86_64-cmake mingw-w64-x86_64-ninja mingw-w64-x86_64-ffmpeg` +3. Run `pacman -S --needed git mingw-w64-x86_64-binutils mingw-w64-x86_64-clang mingw-w64-x86_64-cmake mingw-w64-x86_64-rapidjson mingw-w64-x86_64-ninja mingw-w64-x86_64-ffmpeg` 1. Optional (Qt only): run `pacman -S --needed mingw-w64-x86_64-qt6-base mingw-w64-x86_64-qt6-tools mingw-w64-x86_64-qt6-multimedia` 4. Run `git clone --depth 1 --recursive https://github.com/shadps4-emu/shadPS4` 5. Run `cd shadPS4` @@ -93,7 +93,7 @@ ARM64-based computers, follow: 1. Open "MSYS2 CLANGARM64" from your new applications 2. Run `pacman -Syu`, let it complete; -3. Run `pacman -S --needed git mingw-w64-clang-aarch64-binutils mingw-w64-clang-aarch64-clang mingw-w64-clang-aarch64-cmake mingw-w64-clang-aarch64-ninja mingw-w64-clang-aarch64-ffmpeg` +3. Run `pacman -S --needed git mingw-w64-clang-aarch64-binutils mingw-w64-clang-aarch64-clang mingw-w64-clang-aarch64-rapidjson mingw-w64-clang-aarch64-cmake mingw-w64-clang-aarch64-ninja mingw-w64-clang-aarch64-ffmpeg` 1. Optional (Qt only): run `pacman -S --needed mingw-w64-clang-aarch64-qt6-base mingw-w64-clang-aarch64-qt6-tools mingw-w64-clang-aarch64-qt6-multimedia` 4. Run `git clone --depth 1 --recursive https://github.com/shadps4-emu/shadPS4` 5. Run `cd shadPS4` diff --git a/documents/changelog.txt b/documents/changelog.md similarity index 50% rename from documents/changelog.txt rename to documents/changelog.md index 6df09472d..766e1a09f 100644 --- a/documents/changelog.txt +++ b/documents/changelog.md @@ -1,3 +1,47 @@ +v0.4.0 31/10/2024 - codename divicius +================= + +- Shader recompiler fixes +- Emulated support for cpus that doesn't have SSE4.2a (intel cpus) +- Frame graph + Precise 60 fps timing +- Save data: fix nullptr & concurrent file write +- Auto Update +- Error dialog implementation +- Swapchain recreation and window resizing +- Add playback of background/title music in game list +- Kernel: Quiet sceKernelWaitEventFlag error log on timeout +- Improve keyboard navigation in game list +- core/memory: Pooled memory implementation +- Fix PKG loading +- replace trophy xml assert with error +- Refactor audio handling with range checks, buffer threshold, and lock +- audio_core: Fix return value types and shift some error handling to library +- Devtools: PM4 Explorer +- Initial support of Geometry shaders +- Working touchpad support +- net: Stub sceNetErrnoLoc +- Add support to click touchpad using back button on non PS4/5 controllers +- Multiple Install Folders +- Using a more standard data directory for linux +- video_core: Implement sceGnmInsertPushColorMarker +- ime_dialog: Initial implementation +- Network libs fixes +- Use GetSystemTimePreciseAsFileTime to fix fps timing issues +- Added adaptive mutex initializer +- Small Np + trophy fixes +- Separate Updates from Game Folder +- Minor Fixes for Separate Update Folder +- AvPlayer: Do not align w/h to 16 with vdec2 +- Improve sceSystemServiceReceiveEvent stub +- renderer_vulkan: Commize and adjust buffer bindings +- Add poll interval to libScePad +- Add more surface format mappings. +- vulkan: Report only missing format feature flags. +- IME implementation +- Videodec2 implementation +- path_util: Make sure macOS has current directory set and clean up path code. +- Load LLE modules from sys_modules/GAMEID folder + v0.3.0 23/09/2024 - codename broamic ================= diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index bc2d41bda..082be211a 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -35,7 +35,7 @@ else() if (NOT TARGET cryptopp::cryptopp) set(CRYPTOPP_INSTALL OFF) set(CRYPTOPP_BUILD_TESTING OFF) - set(CRYPTOPP_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/cryptopp/) + set(CRYPTOPP_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/cryptopp) add_subdirectory(cryptopp-cmake) file(COPY cryptopp DESTINATION cryptopp FILES_MATCHING PATTERN "*.h") # remove externals/cryptopp from include directories because it contains a conflicting zlib.h file @@ -216,9 +216,16 @@ endif() # Discord RPC if (ENABLE_DISCORD_RPC) set(BUILD_EXAMPLES OFF) - add_subdirectory(discord-rpc/) + add_subdirectory(discord-rpc) target_include_directories(discord-rpc INTERFACE discord-rpc/include) endif() # GCN Headers add_subdirectory(gcn) + +# stb +if (NOT TARGET stb::headers) + add_library(stb INTERFACE) + target_include_directories(stb INTERFACE stb) + add_library(stb::headers ALIAS stb) +endif() diff --git a/externals/LibAtrac9 b/externals/LibAtrac9 index 3acdcdc78..9640129dc 160000 --- a/externals/LibAtrac9 +++ b/externals/LibAtrac9 @@ -1 +1 @@ -Subproject commit 3acdcdc78f129c2e6145331ff650fa76dd88d62c +Subproject commit 9640129dc6f2afbca6ceeca3019856e8653a5fb2 diff --git a/externals/date b/externals/date index dd8affc6d..28b7b2325 160000 --- a/externals/date +++ b/externals/date @@ -1 +1 @@ -Subproject commit dd8affc6de5755e07638bf0a14382d29549d6ee9 +Subproject commit 28b7b232521ace2c8ef3f2ad4126daec3569c14f diff --git a/externals/ext-boost b/externals/ext-boost index f2474e1b5..ca6f230e6 160000 --- a/externals/ext-boost +++ b/externals/ext-boost @@ -1 +1 @@ -Subproject commit f2474e1b584fb7a3ed6f85ba875e6eacd742ec8a +Subproject commit ca6f230e67be7cc45fc919057f07b2aee64dadc1 diff --git a/externals/glslang b/externals/glslang index e61d7bb30..a0995c49e 160000 --- a/externals/glslang +++ b/externals/glslang @@ -1 +1 @@ -Subproject commit e61d7bb3006f451968714e2f653412081871e1ee +Subproject commit a0995c49ebcaca2c6d3b03efbabf74f3843decdb diff --git a/externals/magic_enum b/externals/magic_enum index 126539e13..1a1824df7 160000 --- a/externals/magic_enum +++ b/externals/magic_enum @@ -1 +1 @@ -Subproject commit 126539e13cccdc2e75ce770e94f3c26403099fa5 +Subproject commit 1a1824df7ac798177a521eed952720681b0bf482 diff --git a/externals/pugixml b/externals/pugixml index 3b1718437..4bc14418d 160000 --- a/externals/pugixml +++ b/externals/pugixml @@ -1 +1 @@ -Subproject commit 3b17184379fcaaeb7f1fbe08018b7fedf2640b3b +Subproject commit 4bc14418d12d289dd9978fdce9490a45deeb653e diff --git a/externals/sdl3 b/externals/sdl3 index 54e622c2e..3a1d76d29 160000 --- a/externals/sdl3 +++ b/externals/sdl3 @@ -1 +1 @@ -Subproject commit 54e622c2e6af456bfef382fae44c17682d5ac88a +Subproject commit 3a1d76d298db023f6cf37fb08ee766f20a4e12ab diff --git a/externals/stb_image.h b/externals/stb/stb_image.h similarity index 100% rename from externals/stb_image.h rename to externals/stb/stb_image.h diff --git a/externals/toml11 b/externals/toml11 index f925e7f28..7f6c574ff 160000 --- a/externals/toml11 +++ b/externals/toml11 @@ -1 +1 @@ -Subproject commit f925e7f287c0008813c2294798cf9ca167fd9ffd +Subproject commit 7f6c574ff5aa1053534e7e19c0a4f22bf4c6aaca diff --git a/externals/vma b/externals/vma index 1c35ba99c..5a53a1989 160000 --- a/externals/vma +++ b/externals/vma @@ -1 +1 @@ -Subproject commit 1c35ba99ce775f8342d87a83a3f0f696f99c2a39 +Subproject commit 5a53a198945ba8260fbc58fadb788745ce6aa263 diff --git a/externals/vulkan-headers b/externals/vulkan-headers index d91597a82..6a74a7d65 160000 --- a/externals/vulkan-headers +++ b/externals/vulkan-headers @@ -1 +1 @@ -Subproject commit d91597a82f881d473887b560a03a7edf2720b72c +Subproject commit 6a74a7d65cafa19e38ec116651436cce6efd5b2e diff --git a/externals/xbyak b/externals/xbyak index d067f0d3f..4e44f4614 160000 --- a/externals/xbyak +++ b/externals/xbyak @@ -1 +1 @@ -Subproject commit d067f0d3f55696ae8bc9a25ad7012ee80f221d54 +Subproject commit 4e44f4614ddbf038f2a6296f5b906d5c72691e0f diff --git a/externals/xxhash b/externals/xxhash index d4ad85e4a..2bf8313b9 160000 --- a/externals/xxhash +++ b/externals/xxhash @@ -1 +1 @@ -Subproject commit d4ad85e4afaad5c780f54db1dc967fff5a869ffd +Subproject commit 2bf8313b934633b2a5b7e8fd239645b85e10c852 diff --git a/externals/zydis b/externals/zydis index 9d298eb80..bffbb610c 160000 --- a/externals/zydis +++ b/externals/zydis @@ -1 +1 @@ -Subproject commit 9d298eb8067ff62a237203d1e1470785033e185c +Subproject commit bffbb610cfea643b98e87658b9058382f7522807 diff --git a/src/common/config.cpp b/src/common/config.cpp index eae8897c8..3db98a438 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -743,6 +743,7 @@ void setDefaultValues() { emulator_language = "en"; m_language = 1; gpuId = -1; + separateupdatefolder = false; } } // namespace Config diff --git a/src/common/io_file.cpp b/src/common/io_file.cpp index dd3a40cae..067010a26 100644 --- a/src/common/io_file.cpp +++ b/src/common/io_file.cpp @@ -377,16 +377,18 @@ bool IOFile::Seek(s64 offset, SeekOrigin origin) const { return false; } - u64 size = GetSize(); - if (origin == SeekOrigin::CurrentPosition && Tell() + offset > size) { - LOG_ERROR(Common_Filesystem, "Seeking past the end of the file"); - return false; - } else if (origin == SeekOrigin::SetOrigin && (u64)offset > size) { - LOG_ERROR(Common_Filesystem, "Seeking past the end of the file"); - return false; - } else if (origin == SeekOrigin::End && offset > 0) { - LOG_ERROR(Common_Filesystem, "Seeking past the end of the file"); - return false; + if (False(file_access_mode & (FileAccessMode::Write | FileAccessMode::Append))) { + u64 size = GetSize(); + if (origin == SeekOrigin::CurrentPosition && Tell() + offset > size) { + LOG_ERROR(Common_Filesystem, "Seeking past the end of the file"); + return false; + } else if (origin == SeekOrigin::SetOrigin && (u64)offset > size) { + LOG_ERROR(Common_Filesystem, "Seeking past the end of the file"); + return false; + } else if (origin == SeekOrigin::End && offset > 0) { + LOG_ERROR(Common_Filesystem, "Seeking past the end of the file"); + return false; + } } errno = 0; diff --git a/src/common/io_file.h b/src/common/io_file.h index 8fed4981f..feb2110ac 100644 --- a/src/common/io_file.h +++ b/src/common/io_file.h @@ -10,6 +10,7 @@ #include "common/concepts.h" #include "common/types.h" +#include "enum.h" namespace Common::FS { @@ -42,6 +43,7 @@ enum class FileAccessMode { */ ReadAppend = Read | Append, }; +DECLARE_ENUM_FLAG_OPERATORS(FileAccessMode); enum class FileType { BinaryFile, diff --git a/src/common/logging/filter.cpp b/src/common/logging/filter.cpp index 632b2b329..75c61a188 100644 --- a/src/common/logging/filter.cpp +++ b/src/common/logging/filter.cpp @@ -69,6 +69,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) { SUB(Common, Memory) \ CLS(Core) \ SUB(Core, Linker) \ + SUB(Core, Devices) \ CLS(Config) \ CLS(Debug) \ CLS(Kernel) \ diff --git a/src/common/logging/types.h b/src/common/logging/types.h index e7e91882a..a0e7d021f 100644 --- a/src/common/logging/types.h +++ b/src/common/logging/types.h @@ -35,6 +35,7 @@ enum class Class : u8 { Common_Memory, ///< Memory mapping and management functions Core, ///< LLE emulation core Core_Linker, ///< The module linker + Core_Devices, ///< Devices emulation Config, ///< Emulator configuration (including commandline) Debug, ///< Debugging tools Kernel, ///< The HLE implementation of the PS4 kernel. diff --git a/src/common/ntapi.cpp b/src/common/ntapi.cpp index 0fe797e09..c76c4657e 100644 --- a/src/common/ntapi.cpp +++ b/src/common/ntapi.cpp @@ -5,8 +5,11 @@ #include "ntapi.h" -NtDelayExecution_t NtDelayExecution = nullptr; +NtClose_t NtClose = nullptr; NtSetInformationFile_t NtSetInformationFile = nullptr; +NtCreateThread_t NtCreateThread = nullptr; +NtTerminateThread_t NtTerminateThread = nullptr; +NtQueueApcThreadEx_t NtQueueApcThreadEx = nullptr; namespace Common::NtApi { @@ -14,9 +17,12 @@ void Initialize() { HMODULE nt_handle = GetModuleHandleA("ntdll.dll"); // http://stackoverflow.com/a/31411628/4725495 - NtDelayExecution = (NtDelayExecution_t)GetProcAddress(nt_handle, "NtDelayExecution"); + NtClose = (NtClose_t)GetProcAddress(nt_handle, "NtClose"); NtSetInformationFile = (NtSetInformationFile_t)GetProcAddress(nt_handle, "NtSetInformationFile"); + NtCreateThread = (NtCreateThread_t)GetProcAddress(nt_handle, "NtCreateThread"); + NtTerminateThread = (NtTerminateThread_t)GetProcAddress(nt_handle, "NtTerminateThread"); + NtQueueApcThreadEx = (NtQueueApcThreadEx_t)GetProcAddress(nt_handle, "NtQueueApcThreadEx"); } } // namespace Common::NtApi diff --git a/src/common/ntapi.h b/src/common/ntapi.h index 17d353403..daab8440d 100644 --- a/src/common/ntapi.h +++ b/src/common/ntapi.h @@ -108,14 +108,444 @@ typedef struct _FILE_DISPOSITION_INFORMATION { BOOLEAN DeleteFile; } FILE_DISPOSITION_INFORMATION, *PFILE_DISPOSITION_INFORMATION; -typedef u32(__stdcall* NtDelayExecution_t)(BOOL Alertable, PLARGE_INTEGER DelayInterval); +typedef struct _UNICODE_STRING { + USHORT Length; + USHORT MaximumLength; + PWCH Buffer; +} UNICODE_STRING, *PUNICODE_STRING; -typedef u32(__stdcall* NtSetInformationFile_t)(HANDLE FileHandle, PIO_STATUS_BLOCK IoStatusBlock, +typedef const UNICODE_STRING* PCUNICODE_STRING; + +typedef struct _OBJECT_ATTRIBUTES { + ULONG Length; + HANDLE RootDirectory; + PCUNICODE_STRING ObjectName; + ULONG Attributes; + PVOID SecurityDescriptor; // PSECURITY_DESCRIPTOR; + PVOID SecurityQualityOfService; // PSECURITY_QUALITY_OF_SERVICE +} OBJECT_ATTRIBUTES, *POBJECT_ATTRIBUTES; + +typedef const OBJECT_ATTRIBUTES* PCOBJECT_ATTRIBUTES; + +typedef struct _CLIENT_ID { + HANDLE UniqueProcess; + HANDLE UniqueThread; +} CLIENT_ID, *PCLIENT_ID; + +typedef struct _INITIAL_TEB { + struct { + PVOID OldStackBase; + PVOID OldStackLimit; + } OldInitialTeb; + PVOID StackBase; + PVOID StackLimit; + PVOID StackAllocationBase; +} INITIAL_TEB, *PINITIAL_TEB; + +typedef struct _PEB_LDR_DATA { + ULONG Length; + BOOLEAN Initialized; + PVOID SsHandle; + LIST_ENTRY InLoadOrderModuleList; + LIST_ENTRY InMemoryOrderModuleList; + LIST_ENTRY InInitializationOrderModuleList; + PVOID EntryInProgress; + BOOLEAN ShutdownInProgress; + HANDLE ShutdownThreadId; +} PEB_LDR_DATA, *PPEB_LDR_DATA; + +typedef struct _CURDIR { + UNICODE_STRING DosPath; + PVOID Handle; +} CURDIR, *PCURDIR; + +typedef struct RTL_DRIVE_LETTER_CURDIR { + USHORT Flags; + USHORT Length; + ULONG TimeStamp; + UNICODE_STRING DosPath; +} RTL_DRIVE_LETTER_CURDIR, *PRTL_DRIVE_LETTER_CURDIR; + +typedef struct _RTL_USER_PROCESS_PARAMETERS { + ULONG AllocationSize; + ULONG Size; + ULONG Flags; + ULONG DebugFlags; + HANDLE ConsoleHandle; + ULONG ConsoleFlags; + HANDLE hStdInput; + HANDLE hStdOutput; + HANDLE hStdError; + CURDIR CurrentDirectory; + UNICODE_STRING DllPath; + UNICODE_STRING ImagePathName; + UNICODE_STRING CommandLine; + PWSTR Environment; + ULONG dwX; + ULONG dwY; + ULONG dwXSize; + ULONG dwYSize; + ULONG dwXCountChars; + ULONG dwYCountChars; + ULONG dwFillAttribute; + ULONG dwFlags; + ULONG wShowWindow; + UNICODE_STRING WindowTitle; + UNICODE_STRING Desktop; + UNICODE_STRING ShellInfo; + UNICODE_STRING RuntimeInfo; + RTL_DRIVE_LETTER_CURDIR DLCurrentDirectory[0x20]; + ULONG_PTR EnvironmentSize; + ULONG_PTR EnvironmentVersion; + PVOID PackageDependencyData; + ULONG ProcessGroupId; + ULONG LoaderThreads; +} RTL_USER_PROCESS_PARAMETERS, *PRTL_USER_PROCESS_PARAMETERS; + +typedef struct tagRTL_BITMAP { + ULONG SizeOfBitMap; + PULONG Buffer; +} RTL_BITMAP, *PRTL_BITMAP; + +typedef struct { + UINT next; + UINT id; + ULONGLONG addr; + ULONGLONG size; + UINT args[4]; +} CROSS_PROCESS_WORK_ENTRY; + +typedef union { + struct { + UINT first; + UINT counter; + }; + volatile LONGLONG hdr; +} CROSS_PROCESS_WORK_HDR; + +typedef struct { + CROSS_PROCESS_WORK_HDR free_list; + CROSS_PROCESS_WORK_HDR work_list; + ULONGLONG unknown[4]; + CROSS_PROCESS_WORK_ENTRY entries[1]; +} CROSS_PROCESS_WORK_LIST; + +typedef struct _CHPEV2_PROCESS_INFO { + ULONG Wow64ExecuteFlags; /* 000 */ + USHORT NativeMachineType; /* 004 */ + USHORT EmulatedMachineType; /* 006 */ + HANDLE SectionHandle; /* 008 */ + CROSS_PROCESS_WORK_LIST* CrossProcessWorkList; /* 010 */ + void* unknown; /* 018 */ +} CHPEV2_PROCESS_INFO, *PCHPEV2_PROCESS_INFO; + +typedef u64(__stdcall* KERNEL_CALLBACK_PROC)(void*, ULONG); + +typedef struct _PEB { /* win32/win64 */ + BOOLEAN InheritedAddressSpace; /* 000/000 */ + BOOLEAN ReadImageFileExecOptions; /* 001/001 */ + BOOLEAN BeingDebugged; /* 002/002 */ + UCHAR ImageUsedLargePages : 1; /* 003/003 */ + UCHAR IsProtectedProcess : 1; + UCHAR IsImageDynamicallyRelocated : 1; + UCHAR SkipPatchingUser32Forwarders : 1; + UCHAR IsPackagedProcess : 1; + UCHAR IsAppContainer : 1; + UCHAR IsProtectedProcessLight : 1; + UCHAR IsLongPathAwareProcess : 1; + HANDLE Mutant; /* 004/008 */ + HMODULE ImageBaseAddress; /* 008/010 */ + PPEB_LDR_DATA LdrData; /* 00c/018 */ + RTL_USER_PROCESS_PARAMETERS* ProcessParameters; /* 010/020 */ + PVOID SubSystemData; /* 014/028 */ + HANDLE ProcessHeap; /* 018/030 */ + PRTL_CRITICAL_SECTION FastPebLock; /* 01c/038 */ + PVOID AtlThunkSListPtr; /* 020/040 */ + PVOID IFEOKey; /* 024/048 */ + ULONG ProcessInJob : 1; /* 028/050 */ + ULONG ProcessInitializing : 1; + ULONG ProcessUsingVEH : 1; + ULONG ProcessUsingVCH : 1; + ULONG ProcessUsingFTH : 1; + ULONG ProcessPreviouslyThrottled : 1; + ULONG ProcessCurrentlyThrottled : 1; + ULONG ProcessImagesHotPatched : 1; + ULONG ReservedBits0 : 24; + KERNEL_CALLBACK_PROC* KernelCallbackTable; /* 02c/058 */ + ULONG Reserved; /* 030/060 */ + ULONG AtlThunkSListPtr32; /* 034/064 */ + PVOID ApiSetMap; /* 038/068 */ + ULONG TlsExpansionCounter; /* 03c/070 */ + PRTL_BITMAP TlsBitmap; /* 040/078 */ + ULONG TlsBitmapBits[2]; /* 044/080 */ + PVOID ReadOnlySharedMemoryBase; /* 04c/088 */ + PVOID SharedData; /* 050/090 */ + PVOID* ReadOnlyStaticServerData; /* 054/098 */ + PVOID AnsiCodePageData; /* 058/0a0 */ + PVOID OemCodePageData; /* 05c/0a8 */ + PVOID UnicodeCaseTableData; /* 060/0b0 */ + ULONG NumberOfProcessors; /* 064/0b8 */ + ULONG NtGlobalFlag; /* 068/0bc */ + LARGE_INTEGER CriticalSectionTimeout; /* 070/0c0 */ + SIZE_T HeapSegmentReserve; /* 078/0c8 */ + SIZE_T HeapSegmentCommit; /* 07c/0d0 */ + SIZE_T HeapDeCommitTotalFreeThreshold; /* 080/0d8 */ + SIZE_T HeapDeCommitFreeBlockThreshold; /* 084/0e0 */ + ULONG NumberOfHeaps; /* 088/0e8 */ + ULONG MaximumNumberOfHeaps; /* 08c/0ec */ + PVOID* ProcessHeaps; /* 090/0f0 */ + PVOID GdiSharedHandleTable; /* 094/0f8 */ + PVOID ProcessStarterHelper; /* 098/100 */ + PVOID GdiDCAttributeList; /* 09c/108 */ + PVOID LoaderLock; /* 0a0/110 */ + ULONG OSMajorVersion; /* 0a4/118 */ + ULONG OSMinorVersion; /* 0a8/11c */ + ULONG OSBuildNumber; /* 0ac/120 */ + ULONG OSPlatformId; /* 0b0/124 */ + ULONG ImageSubSystem; /* 0b4/128 */ + ULONG ImageSubSystemMajorVersion; /* 0b8/12c */ + ULONG ImageSubSystemMinorVersion; /* 0bc/130 */ + KAFFINITY ActiveProcessAffinityMask; /* 0c0/138 */ +#ifdef _WIN64 + ULONG GdiHandleBuffer[60]; /* /140 */ +#else + ULONG GdiHandleBuffer[34]; /* 0c4/ */ +#endif + PVOID PostProcessInitRoutine; /* 14c/230 */ + PRTL_BITMAP TlsExpansionBitmap; /* 150/238 */ + ULONG TlsExpansionBitmapBits[32]; /* 154/240 */ + ULONG SessionId; /* 1d4/2c0 */ + ULARGE_INTEGER AppCompatFlags; /* 1d8/2c8 */ + ULARGE_INTEGER AppCompatFlagsUser; /* 1e0/2d0 */ + PVOID ShimData; /* 1e8/2d8 */ + PVOID AppCompatInfo; /* 1ec/2e0 */ + UNICODE_STRING CSDVersion; /* 1f0/2e8 */ + PVOID ActivationContextData; /* 1f8/2f8 */ + PVOID ProcessAssemblyStorageMap; /* 1fc/300 */ + PVOID SystemDefaultActivationData; /* 200/308 */ + PVOID SystemAssemblyStorageMap; /* 204/310 */ + SIZE_T MinimumStackCommit; /* 208/318 */ + PVOID* FlsCallback; /* 20c/320 */ + LIST_ENTRY FlsListHead; /* 210/328 */ + union { + PRTL_BITMAP FlsBitmap; /* 218/338 */ +#ifdef _WIN64 + CHPEV2_PROCESS_INFO* ChpeV2ProcessInfo; /* /338 */ +#endif + }; + ULONG FlsBitmapBits[4]; /* 21c/340 */ + ULONG FlsHighIndex; /* 22c/350 */ + PVOID WerRegistrationData; /* 230/358 */ + PVOID WerShipAssertPtr; /* 234/360 */ + PVOID EcCodeBitMap; /* 238/368 */ + PVOID pImageHeaderHash; /* 23c/370 */ + ULONG HeapTracingEnabled : 1; /* 240/378 */ + ULONG CritSecTracingEnabled : 1; + ULONG LibLoaderTracingEnabled : 1; + ULONG SpareTracingBits : 29; + ULONGLONG CsrServerReadOnlySharedMemoryBase; /* 248/380 */ + ULONG TppWorkerpListLock; /* 250/388 */ + LIST_ENTRY TppWorkerpList; /* 254/390 */ + PVOID WaitOnAddressHashTable[0x80]; /* 25c/3a0 */ + PVOID TelemetryCoverageHeader; /* 45c/7a0 */ + ULONG CloudFileFlags; /* 460/7a8 */ + ULONG CloudFileDiagFlags; /* 464/7ac */ + CHAR PlaceholderCompatibilityMode; /* 468/7b0 */ + CHAR PlaceholderCompatibilityModeReserved[7]; /* 469/7b1 */ + PVOID LeapSecondData; /* 470/7b8 */ + ULONG LeapSecondFlags; /* 474/7c0 */ + ULONG NtGlobalFlag2; /* 478/7c4 */ +} PEB, *PPEB; + +typedef struct _RTL_ACTIVATION_CONTEXT_STACK_FRAME { + struct _RTL_ACTIVATION_CONTEXT_STACK_FRAME* Previous; + struct _ACTIVATION_CONTEXT* ActivationContext; + ULONG Flags; +} RTL_ACTIVATION_CONTEXT_STACK_FRAME, *PRTL_ACTIVATION_CONTEXT_STACK_FRAME; + +typedef struct _ACTIVATION_CONTEXT_STACK { + RTL_ACTIVATION_CONTEXT_STACK_FRAME* ActiveFrame; + LIST_ENTRY FrameListCache; + ULONG Flags; + ULONG NextCookieSequenceNumber; + ULONG_PTR StackId; +} ACTIVATION_CONTEXT_STACK, *PACTIVATION_CONTEXT_STACK; + +typedef struct _GDI_TEB_BATCH { + ULONG Offset; + HANDLE HDC; + ULONG Buffer[0x136]; +} GDI_TEB_BATCH; + +typedef struct _TEB_ACTIVE_FRAME_CONTEXT { + ULONG Flags; + const char* FrameName; +} TEB_ACTIVE_FRAME_CONTEXT, *PTEB_ACTIVE_FRAME_CONTEXT; + +typedef struct _TEB_ACTIVE_FRAME { + ULONG Flags; + struct _TEB_ACTIVE_FRAME* Previous; + TEB_ACTIVE_FRAME_CONTEXT* Context; +} TEB_ACTIVE_FRAME, *PTEB_ACTIVE_FRAME; + +typedef struct _TEB { /* win32/win64 */ + NT_TIB Tib; /* 000/0000 */ + PVOID EnvironmentPointer; /* 01c/0038 */ + CLIENT_ID ClientId; /* 020/0040 */ + PVOID ActiveRpcHandle; /* 028/0050 */ + PVOID ThreadLocalStoragePointer; /* 02c/0058 */ + PPEB Peb; /* 030/0060 */ + ULONG LastErrorValue; /* 034/0068 */ + ULONG CountOfOwnedCriticalSections; /* 038/006c */ + PVOID CsrClientThread; /* 03c/0070 */ + PVOID Win32ThreadInfo; /* 040/0078 */ + ULONG User32Reserved[26]; /* 044/0080 */ + ULONG UserReserved[5]; /* 0ac/00e8 */ + PVOID WOW32Reserved; /* 0c0/0100 */ + ULONG CurrentLocale; /* 0c4/0108 */ + ULONG FpSoftwareStatusRegister; /* 0c8/010c */ + PVOID ReservedForDebuggerInstrumentation[16]; /* 0cc/0110 */ +#ifdef _WIN64 + PVOID SystemReserved1[30]; /* /0190 */ +#else + PVOID SystemReserved1[26]; /* 10c/ */ +#endif + char PlaceholderCompatibilityMode; /* 174/0280 */ + BOOLEAN PlaceholderHydrationAlwaysExplicit; /* 175/0281 */ + char PlaceholderReserved[10]; /* 176/0282 */ + DWORD ProxiedProcessId; /* 180/028c */ + ACTIVATION_CONTEXT_STACK ActivationContextStack; /* 184/0290 */ + UCHAR WorkingOnBehalfOfTicket[8]; /* 19c/02b8 */ + LONG ExceptionCode; /* 1a4/02c0 */ + ACTIVATION_CONTEXT_STACK* ActivationContextStackPointer; /* 1a8/02c8 */ + ULONG_PTR InstrumentationCallbackSp; /* 1ac/02d0 */ + ULONG_PTR InstrumentationCallbackPreviousPc; /* 1b0/02d8 */ + ULONG_PTR InstrumentationCallbackPreviousSp; /* 1b4/02e0 */ +#ifdef _WIN64 + ULONG TxFsContext; /* /02e8 */ + BOOLEAN InstrumentationCallbackDisabled; /* /02ec */ + BOOLEAN UnalignedLoadStoreExceptions; /* /02ed */ +#else + BOOLEAN InstrumentationCallbackDisabled; /* 1b8/ */ + BYTE SpareBytes1[23]; /* 1b9/ */ + ULONG TxFsContext; /* 1d0/ */ +#endif + GDI_TEB_BATCH GdiTebBatch; /* 1d4/02f0 */ + CLIENT_ID RealClientId; /* 6b4/07d8 */ + HANDLE GdiCachedProcessHandle; /* 6bc/07e8 */ + ULONG GdiClientPID; /* 6c0/07f0 */ + ULONG GdiClientTID; /* 6c4/07f4 */ + PVOID GdiThreadLocaleInfo; /* 6c8/07f8 */ + ULONG_PTR Win32ClientInfo[62]; /* 6cc/0800 */ + PVOID glDispatchTable[233]; /* 7c4/09f0 */ + PVOID glReserved1[29]; /* b68/1138 */ + PVOID glReserved2; /* bdc/1220 */ + PVOID glSectionInfo; /* be0/1228 */ + PVOID glSection; /* be4/1230 */ + PVOID glTable; /* be8/1238 */ + PVOID glCurrentRC; /* bec/1240 */ + PVOID glContext; /* bf0/1248 */ + ULONG LastStatusValue; /* bf4/1250 */ + UNICODE_STRING StaticUnicodeString; /* bf8/1258 */ + WCHAR StaticUnicodeBuffer[261]; /* c00/1268 */ + PVOID DeallocationStack; /* e0c/1478 */ + PVOID TlsSlots[64]; /* e10/1480 */ + LIST_ENTRY TlsLinks; /* f10/1680 */ + PVOID Vdm; /* f18/1690 */ + PVOID ReservedForNtRpc; /* f1c/1698 */ + PVOID DbgSsReserved[2]; /* f20/16a0 */ + ULONG HardErrorMode; /* f28/16b0 */ +#ifdef _WIN64 + PVOID Instrumentation[11]; /* /16b8 */ +#else + PVOID Instrumentation[9]; /* f2c/ */ +#endif + GUID ActivityId; /* f50/1710 */ + PVOID SubProcessTag; /* f60/1720 */ + PVOID PerflibData; /* f64/1728 */ + PVOID EtwTraceData; /* f68/1730 */ + PVOID WinSockData; /* f6c/1738 */ + ULONG GdiBatchCount; /* f70/1740 */ + ULONG IdealProcessorValue; /* f74/1744 */ + ULONG GuaranteedStackBytes; /* f78/1748 */ + PVOID ReservedForPerf; /* f7c/1750 */ + PVOID ReservedForOle; /* f80/1758 */ + ULONG WaitingOnLoaderLock; /* f84/1760 */ + PVOID SavedPriorityState; /* f88/1768 */ + ULONG_PTR ReservedForCodeCoverage; /* f8c/1770 */ + PVOID ThreadPoolData; /* f90/1778 */ + PVOID* TlsExpansionSlots; /* f94/1780 */ +#ifdef _WIN64 + union { + PVOID DeallocationBStore; /* /1788 */ + PVOID* ChpeV2CpuAreaInfo; /* /1788 */ + } DUMMYUNIONNAME; + PVOID BStoreLimit; /* /1790 */ +#endif + ULONG MuiGeneration; /* f98/1798 */ + ULONG IsImpersonating; /* f9c/179c */ + PVOID NlsCache; /* fa0/17a0 */ + PVOID ShimData; /* fa4/17a8 */ + ULONG HeapVirtualAffinity; /* fa8/17b0 */ + PVOID CurrentTransactionHandle; /* fac/17b8 */ + TEB_ACTIVE_FRAME* ActiveFrame; /* fb0/17c0 */ + PVOID* FlsSlots; /* fb4/17c8 */ + PVOID PreferredLanguages; /* fb8/17d0 */ + PVOID UserPrefLanguages; /* fbc/17d8 */ + PVOID MergedPrefLanguages; /* fc0/17e0 */ + ULONG MuiImpersonation; /* fc4/17e8 */ + USHORT CrossTebFlags; /* fc8/17ec */ + USHORT SameTebFlags; /* fca/17ee */ + PVOID TxnScopeEnterCallback; /* fcc/17f0 */ + PVOID TxnScopeExitCallback; /* fd0/17f8 */ + PVOID TxnScopeContext; /* fd4/1800 */ + ULONG LockCount; /* fd8/1808 */ + LONG WowTebOffset; /* fdc/180c */ + PVOID ResourceRetValue; /* fe0/1810 */ + PVOID ReservedForWdf; /* fe4/1818 */ + ULONGLONG ReservedForCrt; /* fe8/1820 */ + GUID EffectiveContainerId; /* ff0/1828 */ +} TEB, *PTEB; +static_assert(offsetof(TEB, DeallocationStack) == + 0x1478); /* The only member we care about at the moment */ + +typedef enum _QUEUE_USER_APC_FLAGS { + QueueUserApcFlagsNone, + QueueUserApcFlagsSpecialUserApc, + QueueUserApcFlagsMaxValue +} QUEUE_USER_APC_FLAGS; + +typedef union _USER_APC_OPTION { + ULONG_PTR UserApcFlags; + HANDLE MemoryReserveHandle; +} USER_APC_OPTION, *PUSER_APC_OPTION; + +using PPS_APC_ROUTINE = void (*)(PVOID ApcArgument1, PVOID ApcArgument2, PVOID ApcArgument3, + PCONTEXT Context); + +typedef u64(__stdcall* NtClose_t)(HANDLE Handle); + +typedef u64(__stdcall* NtSetInformationFile_t)(HANDLE FileHandle, PIO_STATUS_BLOCK IoStatusBlock, PVOID FileInformation, ULONG Length, FILE_INFORMATION_CLASS FileInformationClass); -extern NtDelayExecution_t NtDelayExecution; +typedef u64(__stdcall* NtCreateThread_t)(PHANDLE ThreadHandle, ACCESS_MASK DesiredAccess, + PCOBJECT_ATTRIBUTES ObjectAttributes, HANDLE ProcessHandle, + PCLIENT_ID ClientId, PCONTEXT ThreadContext, + PINITIAL_TEB InitialTeb, BOOLEAN CreateSuspended); + +typedef u64(__stdcall* NtTerminateThread_t)(HANDLE ThreadHandle, u64 ExitStatus); + +typedef u64(__stdcall* NtQueueApcThreadEx_t)(HANDLE ThreadHandle, + USER_APC_OPTION UserApcReserveHandle, + PPS_APC_ROUTINE ApcRoutine, PVOID ApcArgument1, + PVOID ApcArgument2, PVOID ApcArgument3); + +extern NtClose_t NtClose; extern NtSetInformationFile_t NtSetInformationFile; +extern NtCreateThread_t NtCreateThread; +extern NtTerminateThread_t NtTerminateThread; +extern NtQueueApcThreadEx_t NtQueueApcThreadEx; namespace Common::NtApi { void Initialize(); diff --git a/src/common/stb.cpp b/src/common/stb.cpp new file mode 100644 index 000000000..0cd916185 --- /dev/null +++ b/src/common/stb.cpp @@ -0,0 +1,7 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#define STB_IMAGE_IMPLEMENTATION +#define STBI_ONLY_PNG +#define STBI_NO_STDIO +#include "common/stb.h" diff --git a/src/common/stb.h b/src/common/stb.h new file mode 100644 index 000000000..6f4d34483 --- /dev/null +++ b/src/common/stb.h @@ -0,0 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp index 6d5a254cd..4658d0ef4 100644 --- a/src/common/string_util.cpp +++ b/src/common/string_util.cpp @@ -37,6 +37,10 @@ std::vector SplitString(const std::string& str, char delimiter) { return output; } +std::string_view U8stringToString(std::u8string_view u8str) { + return std::string_view{reinterpret_cast(u8str.data()), u8str.size()}; +} + #ifdef _WIN32 static std::wstring CPToUTF16(u32 code_page, std::string_view input) { const auto size = diff --git a/src/common/string_util.h b/src/common/string_util.h index 23e82b93c..18972de44 100644 --- a/src/common/string_util.h +++ b/src/common/string_util.h @@ -16,6 +16,8 @@ void ToLowerInPlace(std::string& str); std::vector SplitString(const std::string& str, char delimiter); +std::string_view U8stringToString(std::u8string_view u8str); + #ifdef _WIN32 [[nodiscard]] std::string UTF16ToUTF8(std::wstring_view input); [[nodiscard]] std::wstring UTF8ToUTF16W(std::string_view str); diff --git a/src/common/support/avdec.h b/src/common/support/avdec.h new file mode 100644 index 000000000..fa3483dc4 --- /dev/null +++ b/src/common/support/avdec.h @@ -0,0 +1,17 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +// support header file for libav + +// The av_err2str macro in libavutil/error.h does not play nice with C++ +#ifdef av_err2str +#undef av_err2str +#include +av_always_inline std::string av_err2string(int errnum) { + char errbuf[AV_ERROR_MAX_STRING_SIZE]; + return av_make_error_string(errbuf, AV_ERROR_MAX_STRING_SIZE, errnum); +} +#define av_err2str(err) av_err2string(err).c_str() +#endif // av_err2str diff --git a/src/common/thread.cpp b/src/common/thread.cpp index 46df68c38..c87aea6ef 100644 --- a/src/common/thread.cpp +++ b/src/common/thread.cpp @@ -147,6 +147,10 @@ void SetCurrentThreadName(const char* name) { SetThreadDescription(GetCurrentThread(), UTF8ToUTF16W(name).data()); } +void SetThreadName(void* thread, const char* name) { + SetThreadDescription(thread, UTF8ToUTF16W(name).data()); +} + #else // !MSVC_VER, so must be POSIX threads // MinGW with the POSIX threading model does not support pthread_setname_np @@ -170,11 +174,19 @@ void SetCurrentThreadName(const char* name) { pthread_setname_np(pthread_self(), name); #endif } + +void SetThreadName(void* thread, const char* name) { + // TODO +} #endif #if defined(_WIN32) void SetCurrentThreadName(const char*) { - // Do Nothing on MingW + // Do Nothing on MinGW +} + +void SetThreadName(void* thread, const char* name) { + // Do Nothing on MinGW } #endif diff --git a/src/common/thread.h b/src/common/thread.h index fd962f8e5..175ba9445 100644 --- a/src/common/thread.h +++ b/src/common/thread.h @@ -23,6 +23,8 @@ void SetCurrentThreadPriority(ThreadPriority new_priority); void SetCurrentThreadName(const char* name); +void SetThreadName(void* thread, const char* name); + class AccurateTimer { std::chrono::nanoseconds target_interval{}; std::chrono::nanoseconds total_wait{}; diff --git a/src/common/va_ctx.h b/src/common/va_ctx.h new file mode 100644 index 000000000..e0b8c0bab --- /dev/null +++ b/src/common/va_ctx.h @@ -0,0 +1,111 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later +#pragma once + +#include +#include "common/types.h" + +#define VA_ARGS \ + uint64_t rdi, uint64_t rsi, uint64_t rdx, uint64_t rcx, uint64_t r8, uint64_t r9, \ + uint64_t overflow_arg_area, __m128 xmm0, __m128 xmm1, __m128 xmm2, __m128 xmm3, \ + __m128 xmm4, __m128 xmm5, __m128 xmm6, __m128 xmm7, ... + +#define VA_CTX(ctx) \ + alignas(16)::Common::VaCtx ctx{}; \ + (ctx).reg_save_area.gp[0] = rdi; \ + (ctx).reg_save_area.gp[1] = rsi; \ + (ctx).reg_save_area.gp[2] = rdx; \ + (ctx).reg_save_area.gp[3] = rcx; \ + (ctx).reg_save_area.gp[4] = r8; \ + (ctx).reg_save_area.gp[5] = r9; \ + (ctx).reg_save_area.fp[0] = xmm0; \ + (ctx).reg_save_area.fp[1] = xmm1; \ + (ctx).reg_save_area.fp[2] = xmm2; \ + (ctx).reg_save_area.fp[3] = xmm3; \ + (ctx).reg_save_area.fp[4] = xmm4; \ + (ctx).reg_save_area.fp[5] = xmm5; \ + (ctx).reg_save_area.fp[6] = xmm6; \ + (ctx).reg_save_area.fp[7] = xmm7; \ + (ctx).va_list.reg_save_area = &(ctx).reg_save_area; \ + (ctx).va_list.gp_offset = offsetof(::Common::VaRegSave, gp); \ + (ctx).va_list.fp_offset = offsetof(::Common::VaRegSave, fp); \ + (ctx).va_list.overflow_arg_area = &overflow_arg_area; + +namespace Common { + +// https://stackoverflow.com/questions/4958384/what-is-the-format-of-the-x86-64-va-list-structure + +struct VaList { + u32 gp_offset; + u32 fp_offset; + void* overflow_arg_area; + void* reg_save_area; +}; + +struct VaRegSave { + u64 gp[6]; + __m128 fp[8]; +}; + +struct VaCtx { + VaRegSave reg_save_area; + VaList va_list; +}; + +template +T vaArgRegSaveAreaGp(VaList* l) { + auto* addr = reinterpret_cast(static_cast(l->reg_save_area) + l->gp_offset); + l->gp_offset += Size; + return *addr; +} +template +T vaArgOverflowArgArea(VaList* l) { + auto ptr = ((reinterpret_cast(l->overflow_arg_area) + (Align - 1)) & ~(Align - 1)); + auto* addr = reinterpret_cast(ptr); + l->overflow_arg_area = reinterpret_cast(ptr + Size); + return *addr; +} + +template +T vaArgRegSaveAreaFp(VaList* l) { + auto* addr = reinterpret_cast(static_cast(l->reg_save_area) + l->fp_offset); + l->fp_offset += Size; + return *addr; +} + +inline int vaArgInteger(VaList* l) { + if (l->gp_offset <= 40) { + return vaArgRegSaveAreaGp(l); + } + return vaArgOverflowArgArea(l); +} + +inline long long vaArgLongLong(VaList* l) { + if (l->gp_offset <= 40) { + return vaArgRegSaveAreaGp(l); + } + return vaArgOverflowArgArea(l); +} +inline long vaArgLong(VaList* l) { + if (l->gp_offset <= 40) { + return vaArgRegSaveAreaGp(l); + } + return vaArgOverflowArgArea(l); +} + +inline double vaArgDouble(VaList* l) { + if (l->fp_offset <= 160) { + return vaArgRegSaveAreaFp(l); + } + return vaArgOverflowArgArea(l); +} + +template +T* vaArgPtr(VaList* l) { + if (l->gp_offset <= 40) { + return vaArgRegSaveAreaGp(l); + } + return vaArgOverflowArgArea(l); +} + +} // namespace Common diff --git a/src/core/debug_state.cpp b/src/core/debug_state.cpp index 562cb62e8..649624924 100644 --- a/src/core/debug_state.cpp +++ b/src/core/debug_state.cpp @@ -177,9 +177,10 @@ void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, } } -void DebugStateImpl::CollectShader(const std::string& name, std::span spv, - std::span raw_code) { - shader_dump_list.emplace_back(name, std::vector{spv.begin(), spv.end()}, - std::vector{raw_code.begin(), raw_code.end()}); - std::ranges::sort(shader_dump_list, {}, &ShaderDump::name); +void DebugStateImpl::CollectShader(const std::string& name, vk::ShaderModule module, + std::span spv, std::span raw_code, + std::span patch_spv, bool is_patched) { + shader_dump_list.emplace_back(name, module, std::vector{spv.begin(), spv.end()}, + std::vector{raw_code.begin(), raw_code.end()}, + std::vector{patch_spv.begin(), patch_spv.end()}, is_patched); } diff --git a/src/core/debug_state.h b/src/core/debug_state.h index 759755b52..fa2e5cd9d 100644 --- a/src/core/debug_state.h +++ b/src/core/debug_state.h @@ -12,7 +12,7 @@ #include "common/types.h" #include "video_core/amdgpu/liverpool.h" -#include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #ifdef _WIN32 #ifndef WIN32_LEAN_AND_MEAN @@ -76,29 +76,46 @@ struct FrameDump { struct ShaderDump { std::string name; + vk::ShaderModule module; + std::vector spv; - std::vector raw_code; + std::vector isa; + std::vector patch_spv; + std::string patch_source{}; + + bool loaded_data = false; + bool is_patched = false; std::string cache_spv_disasm{}; - std::string cache_raw_disasm{}; + std::string cache_isa_disasm{}; + std::string cache_patch_disasm{}; - ShaderDump(std::string name, std::vector spv, std::vector raw_code) - : name(std::move(name)), spv(std::move(spv)), raw_code(std::move(raw_code)) {} + ShaderDump(std::string name, vk::ShaderModule module, std::vector spv, + std::vector isa, std::vector patch_spv, bool is_patched) + : name(std::move(name)), module(module), spv(std::move(spv)), isa(std::move(isa)), + patch_spv(std::move(patch_spv)), is_patched(is_patched) {} ShaderDump(const ShaderDump& other) = delete; ShaderDump(ShaderDump&& other) noexcept - : name{std::move(other.name)}, spv{std::move(other.spv)}, - raw_code{std::move(other.raw_code)}, cache_spv_disasm{std::move(other.cache_spv_disasm)}, - cache_raw_disasm{std::move(other.cache_raw_disasm)} {} + : name{std::move(other.name)}, module{std::move(other.module)}, spv{std::move(other.spv)}, + isa{std::move(other.isa)}, patch_spv{std::move(other.patch_spv)}, + patch_source{std::move(other.patch_source)}, + cache_spv_disasm{std::move(other.cache_spv_disasm)}, + cache_isa_disasm{std::move(other.cache_isa_disasm)}, + cache_patch_disasm{std::move(other.cache_patch_disasm)} {} ShaderDump& operator=(const ShaderDump& other) = delete; ShaderDump& operator=(ShaderDump&& other) noexcept { if (this == &other) return *this; name = std::move(other.name); + module = std::move(other.module); spv = std::move(other.spv); - raw_code = std::move(other.raw_code); + isa = std::move(other.isa); + patch_spv = std::move(other.patch_spv); + patch_source = std::move(other.patch_source); cache_spv_disasm = std::move(other.cache_spv_disasm); - cache_raw_disasm = std::move(other.cache_raw_disasm); + cache_isa_disasm = std::move(other.cache_isa_disasm); + cache_patch_disasm = std::move(other.cache_patch_disasm); return *this; } }; @@ -186,8 +203,9 @@ public: void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, const AmdGpu::Liverpool::Regs& regs, bool is_compute = false); - void CollectShader(const std::string& name, std::span spv, - std::span raw_code); + void CollectShader(const std::string& name, vk::ShaderModule module, std::span spv, + std::span raw_code, std::span patch_spv, + bool is_patched); }; } // namespace DebugStateType diff --git a/src/core/devices/base_device.cpp b/src/core/devices/base_device.cpp new file mode 100644 index 000000000..4f91c81c7 --- /dev/null +++ b/src/core/devices/base_device.cpp @@ -0,0 +1,12 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "base_device.h" + +namespace Core::Devices { + +BaseDevice::BaseDevice() = default; + +BaseDevice::~BaseDevice() = default; + +} // namespace Core::Devices \ No newline at end of file diff --git a/src/core/devices/base_device.h b/src/core/devices/base_device.h new file mode 100644 index 000000000..351af82b4 --- /dev/null +++ b/src/core/devices/base_device.h @@ -0,0 +1,72 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "common/types.h" +#include "common/va_ctx.h" + +namespace Libraries::Kernel { +struct OrbisKernelStat; +struct SceKernelIovec; +} // namespace Libraries::Kernel + +namespace Core::Devices { + +class BaseDevice { +public: + explicit BaseDevice(); + + virtual ~BaseDevice() = 0; + + virtual int ioctl(u64 cmd, Common::VaCtx* args) { + return ORBIS_KERNEL_ERROR_ENOTTY; + } + + virtual s64 write(const void* buf, size_t nbytes) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + virtual size_t readv(const Libraries::Kernel::SceKernelIovec* iov, int iovcnt) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + virtual size_t writev(const Libraries::Kernel::SceKernelIovec* iov, int iovcnt) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + virtual s64 preadv(const Libraries::Kernel::SceKernelIovec* iov, int iovcnt, u64 offset) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + virtual s64 lseek(s64 offset, int whence) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + virtual s64 read(void* buf, size_t nbytes) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + virtual int fstat(Libraries::Kernel::OrbisKernelStat* sb) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + virtual s32 fsync() { + return ORBIS_KERNEL_ERROR_EBADF; + } + + virtual int ftruncate(s64 length) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + virtual int getdents(void* buf, u32 nbytes, s64* basep) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + virtual s64 pwrite(const void* buf, size_t nbytes, u64 offset) { + return ORBIS_KERNEL_ERROR_EBADF; + } +}; + +} // namespace Core::Devices diff --git a/src/core/devices/ioccom.h b/src/core/devices/ioccom.h new file mode 100644 index 000000000..671ee33d4 --- /dev/null +++ b/src/core/devices/ioccom.h @@ -0,0 +1,67 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +/*- + * Copyright (c) 1982, 1986, 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ioccom.h 8.2 (Berkeley) 3/28/94 + * $FreeBSD$ + */ + +#define IOCPARM_SHIFT 13 /* number of bits for ioctl size */ +#define IOCPARM_MASK ((1 << IOCPARM_SHIFT) - 1) /* parameter length mask */ +#define IOCPARM_LEN(x) (((x) >> 16) & IOCPARM_MASK) +#define IOCBASECMD(x) ((x) & ~(IOCPARM_MASK << 16)) +#define IOCGROUP(x) (((x) >> 8) & 0xff) + +#define IOCPARM_MAX (1 << IOCPARM_SHIFT) /* max size of ioctl */ +#define IOC_VOID 0x20000000 /* no parameters */ +#define IOC_OUT 0x40000000 /* copy out parameters */ +#define IOC_IN 0x80000000 /* copy in parameters */ +#define IOC_INOUT (IOC_IN | IOC_OUT) +#define IOC_DIRMASK (IOC_VOID | IOC_OUT | IOC_IN) + +#define _IOC(inout, group, num, len) \ + ((unsigned long)((inout) | (((len) & IOCPARM_MASK) << 16) | ((group) << 8) | (num))) +#define _IO(g, n) _IOC(IOC_VOID, (g), (n), 0) +#define _IOWINT(g, n) _IOC(IOC_VOID, (g), (n), sizeof(int)) +#define _IOR(g, n, t) _IOC(IOC_OUT, (g), (n), sizeof(t)) +#define _IOW(g, n, t) _IOC(IOC_IN, (g), (n), sizeof(t)) +/* this should be _IORW, but stdio got there first */ +#define _IOWR(g, n, t) _IOC(IOC_INOUT, (g), (n), sizeof(t)) + +/* +# Simple parse of ioctl cmd +def parse(v): + print('inout', (v >> 24 & 0xFF)) + print('len', hex(v >> 16 & 0xFF)) + print('group', chr(v >> 8 & 0xFF)) + print('num', hex(v & 0xFF)) +*/ diff --git a/src/core/devices/logger.cpp b/src/core/devices/logger.cpp new file mode 100644 index 000000000..6f104509c --- /dev/null +++ b/src/core/devices/logger.cpp @@ -0,0 +1,65 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/logging/log.h" +#include "core/libraries/kernel/file_system.h" +#include "logger.h" + +namespace Core::Devices { + +Logger::Logger(std::string prefix, bool is_err) : prefix(std::move(prefix)), is_err(is_err) {} + +Logger::~Logger() = default; + +s64 Logger::write(const void* buf, size_t nbytes) { + log(static_cast(buf), nbytes); + return nbytes; +} + +size_t Logger::writev(const Libraries::Kernel::SceKernelIovec* iov, int iovcnt) { + for (int i = 0; i < iovcnt; i++) { + log(static_cast(iov[i].iov_base), iov[i].iov_len); + } + return iovcnt; +} + +s64 Logger::pwrite(const void* buf, size_t nbytes, u64 offset) { + log(static_cast(buf), nbytes); + return nbytes; +} + +s32 Logger::fsync() { + log_flush(); + return 0; +} + +void Logger::log(const char* buf, size_t nbytes) { + std::scoped_lock lock{mtx}; + const char* end = buf + nbytes; + for (const char* it = buf; it < end; ++it) { + char c = *it; + if (c == '\r') { + continue; + } + if (c == '\n') { + log_flush(); + continue; + } + buffer.push_back(c); + } +} + +void Logger::log_flush() { + std::scoped_lock lock{mtx}; + if (buffer.empty()) { + return; + } + if (is_err) { + LOG_ERROR(Tty, "[{}] {}", prefix, std::string_view{buffer}); + } else { + LOG_INFO(Tty, "[{}] {}", prefix, std::string_view{buffer}); + } + buffer.clear(); +} + +} // namespace Core::Devices \ No newline at end of file diff --git a/src/core/devices/logger.h b/src/core/devices/logger.h new file mode 100644 index 000000000..bfb07f337 --- /dev/null +++ b/src/core/devices/logger.h @@ -0,0 +1,37 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "base_device.h" + +#include +#include +#include + +namespace Core::Devices { + +class Logger final : BaseDevice { + std::string prefix; + bool is_err; + + std::recursive_mutex mtx; + std::vector buffer; + +public: + explicit Logger(std::string prefix, bool is_err); + + ~Logger() override; + + s64 write(const void* buf, size_t nbytes) override; + size_t writev(const Libraries::Kernel::SceKernelIovec* iov, int iovcnt) override; + s64 pwrite(const void* buf, size_t nbytes, u64 offset) override; + + s32 fsync() override; + +private: + void log(const char* buf, size_t nbytes); + void log_flush(); +}; + +} // namespace Core::Devices diff --git a/src/core/devices/nop_device.h b/src/core/devices/nop_device.h new file mode 100644 index 000000000..a75b92f1b --- /dev/null +++ b/src/core/devices/nop_device.h @@ -0,0 +1,55 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once +#include "base_device.h" + +namespace Core::Devices { + +class NopDevice final : BaseDevice { + u32 handle; + +public: + explicit NopDevice(u32 handle) : handle(handle) {} + + ~NopDevice() override = default; + + int ioctl(u64 cmd, Common::VaCtx* args) override { + return 0; + } + s64 write(const void* buf, size_t nbytes) override { + return 0; + } + size_t readv(const Libraries::Kernel::SceKernelIovec* iov, int iovcnt) override { + return 0; + } + size_t writev(const Libraries::Kernel::SceKernelIovec* iov, int iovcnt) override { + return 0; + } + s64 preadv(const Libraries::Kernel::SceKernelIovec* iov, int iovcnt, u64 offset) override { + return 0; + } + s64 lseek(s64 offset, int whence) override { + return 0; + } + s64 read(void* buf, size_t nbytes) override { + return 0; + } + int fstat(Libraries::Kernel::OrbisKernelStat* sb) override { + return 0; + } + s32 fsync() override { + return 0; + } + int ftruncate(s64 length) override { + return 0; + } + int getdents(void* buf, u32 nbytes, s64* basep) override { + return 0; + } + s64 pwrite(const void* buf, size_t nbytes, u64 offset) override { + return 0; + } +}; + +} // namespace Core::Devices diff --git a/src/core/devtools/layer.cpp b/src/core/devtools/layer.cpp index 2c2099f4d..776f3377d 100644 --- a/src/core/devtools/layer.cpp +++ b/src/core/devtools/layer.cpp @@ -254,7 +254,7 @@ void L::DrawAdvanced() { void L::DrawSimple() { const auto io = GetIO(); - Text("Frame time: %.3f ms (%.1f FPS)", 1000.0f / io.Framerate, io.Framerate); + Text("%.1f FPS (%.2f ms)", io.Framerate, 1000.0f / io.Framerate); } static void LoadSettings(const char* line) { @@ -338,6 +338,7 @@ void L::Draw() { const auto fn = DebugState.flip_frame_count.load(); frame_graph.AddFrame(fn, io.DeltaTime); } + if (IsKeyPressed(ImGuiKey_F10, false)) { if (io.KeyCtrl) { show_advanced_debug = !show_advanced_debug; diff --git a/src/core/devtools/options.h b/src/core/devtools/options.h index 70e1d137b..a859a2eec 100644 --- a/src/core/devtools/options.h +++ b/src/core/devtools/options.h @@ -10,8 +10,8 @@ struct ImGuiTextBuffer; namespace Core::Devtools { struct TOptions { - std::string disassembler_cli_isa{"clrxdisasm --raw \"{src}\""}; - std::string disassembler_cli_spv{"spirv-cross -V \"{src}\""}; + std::string disassembler_cli_isa{"clrxdisasm --raw {src}"}; + std::string disassembler_cli_spv{"spirv-cross -V {src}"}; bool frame_dump_render_on_collapse{false}; }; diff --git a/src/core/devtools/widget/common.h b/src/core/devtools/widget/common.h index 4429f5581..75eb55301 100644 --- a/src/core/devtools/widget/common.h +++ b/src/core/devtools/widget/common.h @@ -8,7 +8,7 @@ #include #include -#include +#include #include "common/bit_field.h" #include "common/io_file.h" @@ -117,7 +117,7 @@ static bool IsDrawCall(AmdGpu::PM4ItOpcode opcode) { inline std::optional exec_cli(const char* cli) { std::array buffer{}; std::string output; - const auto f = popen(cli, "r"); + const auto f = popen(cli, "rt"); if (!f) { pclose(f); return {}; @@ -129,21 +129,27 @@ inline std::optional exec_cli(const char* cli) { return output; } -inline std::string RunDisassembler(const std::string& disassembler_cli, - const std::vector& shader_code) { +template +inline std::string RunDisassembler(const std::string& disassembler_cli, const T& shader_code, + bool* success = nullptr) { std::string shader_dis; if (disassembler_cli.empty()) { shader_dis = "No disassembler set"; + if (success) { + *success = false; + } } else { auto bin_path = std::filesystem::temp_directory_path() / "shadps4_tmp_shader.bin"; constexpr std::string_view src_arg = "{src}"; - std::string cli = disassembler_cli; + std::string cli = disassembler_cli + " 2>&1"; const auto pos = cli.find(src_arg); if (pos == std::string::npos) { - DebugState.ShowDebugMessage("Disassembler CLI does not contain {src} argument\n" + - disassembler_cli); + shader_dis = "Disassembler CLI does not contain {src} argument"; + if (success) { + *success = false; + } } else { cli.replace(pos, src_arg.size(), "\"" + bin_path.string() + "\""); Common::FS::IOFile file(bin_path, Common::FS::FileAccessMode::Write); @@ -151,9 +157,16 @@ inline std::string RunDisassembler(const std::string& disassembler_cli, file.Close(); auto result = exec_cli(cli.c_str()); - shader_dis = result.value_or("Could not disassemble shader"); - if (shader_dis.empty()) { - shader_dis = "Disassembly empty or failed"; + if (result) { + shader_dis = result.value(); + if (success) { + *success = true; + } + } else { + if (success) { + *success = false; + } + shader_dis = "Could not disassemble shader"; } std::filesystem::remove(bin_path); diff --git a/src/core/devtools/widget/frame_dump.cpp b/src/core/devtools/widget/frame_dump.cpp index 86ba7b86e..055ce1333 100644 --- a/src/core/devtools/widget/frame_dump.cpp +++ b/src/core/devtools/widget/frame_dump.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include "common/io_file.h" #include "core/devtools/options.h" diff --git a/src/core/devtools/widget/memory_map.cpp b/src/core/devtools/widget/memory_map.cpp index dc8f5c2e9..7edd676e9 100644 --- a/src/core/devtools/widget/memory_map.cpp +++ b/src/core/devtools/widget/memory_map.cpp @@ -3,7 +3,7 @@ #include #include -#include +#include #include "core/debug_state.h" #include "core/memory.h" diff --git a/src/core/devtools/widget/reg_popup.cpp b/src/core/devtools/widget/reg_popup.cpp index 0633e76e6..2727e1745 100644 --- a/src/core/devtools/widget/reg_popup.cpp +++ b/src/core/devtools/widget/reg_popup.cpp @@ -5,7 +5,7 @@ #include #include -#include +#include #include "cmd_list.h" #include "common.h" diff --git a/src/core/devtools/widget/reg_view.cpp b/src/core/devtools/widget/reg_view.cpp index a60090a8c..79b02a849 100644 --- a/src/core/devtools/widget/reg_view.cpp +++ b/src/core/devtools/widget/reg_view.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include "common.h" diff --git a/src/core/devtools/widget/shader_list.cpp b/src/core/devtools/widget/shader_list.cpp index b056880dd..80c939718 100644 --- a/src/core/devtools/widget/shader_list.cpp +++ b/src/core/devtools/widget/shader_list.cpp @@ -1,66 +1,221 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include + #include "shader_list.h" #include #include "common.h" #include "common/config.h" +#include "common/path_util.h" +#include "common/string_util.h" #include "core/debug_state.h" #include "core/devtools/options.h" #include "imgui/imgui_std.h" +#include "sdl_window.h" +#include "video_core/renderer_vulkan/vk_presenter.h" +#include "video_core/renderer_vulkan/vk_rasterizer.h" + +extern std::unique_ptr presenter; using namespace ImGui; namespace Core::Devtools::Widget { -void ShaderList::DrawShader(DebugStateType::ShaderDump& value) { - if (!loaded_data) { - loaded_data = true; - if (value.cache_raw_disasm.empty()) { - value.cache_raw_disasm = RunDisassembler(Options.disassembler_cli_isa, value.raw_code); - } - isa_editor.SetText(value.cache_raw_disasm); +ShaderList::Selection::Selection(int index) : index(index) { + isa_editor.SetPalette(TextEditor::GetDarkPalette()); + isa_editor.SetReadOnly(true); + glsl_editor.SetPalette(TextEditor::GetDarkPalette()); + glsl_editor.SetLanguageDefinition(TextEditor::LanguageDefinition::GLSL()); + presenter->GetWindow().RequestKeyboard(); +} +ShaderList::Selection::~Selection() { + presenter->GetWindow().ReleaseKeyboard(); +} + +void ShaderList::Selection::ReloadShader(DebugStateType::ShaderDump& value) { + auto& spv = value.is_patched ? value.patch_spv : value.spv; + if (spv.empty()) { + return; + } + auto& cache = presenter->GetRasterizer().GetPipelineCache(); + if (const auto m = cache.ReplaceShader(value.module, spv); m) { + value.module = *m; + } +} + +bool ShaderList::Selection::DrawShader(DebugStateType::ShaderDump& value) { + if (!value.loaded_data) { + value.loaded_data = true; + if (value.cache_isa_disasm.empty()) { + value.cache_isa_disasm = RunDisassembler(Options.disassembler_cli_isa, value.isa); + } if (value.cache_spv_disasm.empty()) { value.cache_spv_disasm = RunDisassembler(Options.disassembler_cli_spv, value.spv); } - spv_editor.SetText(value.cache_spv_disasm); + if (!value.patch_spv.empty() && value.cache_patch_disasm.empty()) { + value.cache_patch_disasm = RunDisassembler("spirv-dis {src}", value.patch_spv); + } + patch_path = + Common::FS::GetUserPath(Common::FS::PathType::ShaderDir) / "patch" / value.name; + patch_bin_path = patch_path; + patch_bin_path += ".spv"; + patch_path += ".glsl"; + if (std::filesystem::exists(patch_path)) { + std::ifstream file{patch_path}; + value.patch_source = + std::string{std::istreambuf_iterator{file}, std::istreambuf_iterator{}}; + } + + value.is_patched = !value.patch_spv.empty(); + if (!value.is_patched) { // No patch + isa_editor.SetText(value.cache_isa_disasm); + glsl_editor.SetText(value.cache_spv_disasm); + } else { + isa_editor.SetText(value.cache_patch_disasm); + isa_editor.SetLanguageDefinition(TextEditor::LanguageDefinition::SPIRV()); + glsl_editor.SetText(value.patch_source); + glsl_editor.SetReadOnly(false); + } } - if (SmallButton("<-")) { - selected_shader = -1; + char name[64]; + snprintf(name, sizeof(name), "Shader %s", value.name.c_str()); + SetNextWindowSize({450.0f, 600.0f}, ImGuiCond_FirstUseEver); + if (!Begin(name, &open, ImGuiWindowFlags_NoNav)) { + End(); + return open; } - SameLine(); + Text("%s", value.name.c_str()); SameLine(0.0f, 7.0f); - if (BeginCombo("Shader type", showing_isa ? "ISA" : "SPIRV", ImGuiComboFlags_WidthFitPreview)) { - if (Selectable("SPIRV")) { - showing_isa = false; + if (Checkbox("Enable patch", &value.is_patched)) { + if (value.is_patched) { + if (value.patch_source.empty()) { + value.patch_source = value.cache_spv_disasm; + } + isa_editor.SetText(value.cache_patch_disasm); + isa_editor.SetLanguageDefinition(TextEditor::LanguageDefinition::SPIRV()); + glsl_editor.SetText(value.patch_source); + glsl_editor.SetReadOnly(false); + if (!value.patch_spv.empty()) { + ReloadShader(value); + } + } else { + isa_editor.SetText(value.cache_isa_disasm); + isa_editor.SetLanguageDefinition(TextEditor::LanguageDefinition()); + glsl_editor.SetText(value.cache_spv_disasm); + glsl_editor.SetReadOnly(true); + ReloadShader(value); } - if (Selectable("ISA")) { - showing_isa = true; - } - EndCombo(); } - if (showing_isa) { - isa_editor.Render("ISA", GetContentRegionAvail()); + if (value.is_patched) { + if (BeginCombo("Shader type", showing_bin ? "SPIRV" : "GLSL", + ImGuiComboFlags_WidthFitPreview)) { + if (Selectable("GLSL")) { + showing_bin = false; + } + if (Selectable("SPIRV")) { + showing_bin = true; + } + EndCombo(); + } } else { - spv_editor.Render("SPIRV", GetContentRegionAvail()); + if (BeginCombo("Shader type", showing_bin ? "ISA" : "GLSL", + ImGuiComboFlags_WidthFitPreview)) { + if (Selectable("GLSL")) { + showing_bin = false; + } + if (Selectable("ISA")) { + showing_bin = true; + } + EndCombo(); + } } -} -ShaderList::ShaderList() { - isa_editor.SetPalette(TextEditor::GetDarkPalette()); - isa_editor.SetReadOnly(true); - spv_editor.SetPalette(TextEditor::GetDarkPalette()); - spv_editor.SetReadOnly(true); - spv_editor.SetLanguageDefinition(TextEditor::LanguageDefinition::GLSL()); + if (value.is_patched) { + bool save = false; + bool compile = false; + SameLine(0.0f, 3.0f); + if (Button("Save")) { + save = true; + } + SameLine(); + if (Button("Save & Compile")) { + save = true; + compile = true; + } + if (save) { + value.patch_source = glsl_editor.GetText(); + std::ofstream file{patch_path, std::ios::binary | std::ios::trunc}; + file << value.patch_source; + std::string msg = "Patch saved to "; + msg += Common::U8stringToString(patch_path.u8string()); + DebugState.ShowDebugMessage(msg); + } + if (compile) { + static std::map stage_arg = { + {"vs", "vert"}, + {"gs", "geom"}, + {"fs", "frag"}, + {"cs", "comp"}, + }; + auto stage = stage_arg.find(value.name.substr(0, 2)); + if (stage == stage_arg.end()) { + DebugState.ShowDebugMessage(std::string{"Invalid shader stage: "} + + value.name.substr(0, 2)); + } else { + std::string cmd = + fmt::format("glslc --target-env=vulkan1.3 --target-spv=spv1.6 " + "-fshader-stage={} {{src}} -o \"{}\"", + stage->second, Common::U8stringToString(patch_bin_path.u8string())); + bool success = false; + auto res = RunDisassembler(cmd, value.patch_source, &success); + if (!res.empty() || !success) { + DebugState.ShowDebugMessage("Compilation failed:\n" + res); + } else { + Common::FS::IOFile file{patch_bin_path, Common::FS::FileAccessMode::Read}; + value.patch_spv.resize(file.GetSize() / sizeof(u32)); + file.Read(value.patch_spv); + value.cache_patch_disasm = + RunDisassembler("spirv-dis {src}", value.patch_spv, &success); + if (!success) { + DebugState.ShowDebugMessage("Decompilation failed (Compile was ok):\n" + + res); + } else { + isa_editor.SetText(value.cache_patch_disasm); + ReloadShader(value); + } + } + } + } + } + + if (showing_bin) { + isa_editor.Render(value.is_patched ? "SPIRV" : "ISA", GetContentRegionAvail()); + } else { + glsl_editor.Render("GLSL", GetContentRegionAvail()); + } + + End(); + return open; } void ShaderList::Draw() { + for (auto it = open_shaders.begin(); it != open_shaders.end();) { + auto& selection = *it; + auto& shader = DebugState.shader_dump_list[selection.index]; + if (!selection.DrawShader(shader)) { + it = open_shaders.erase(it); + } else { + ++it; + } + } + SetNextWindowSize({500.0f, 600.0f}, ImGuiCond_FirstUseEver); if (!Begin("Shader list", &open)) { End(); @@ -73,18 +228,19 @@ void ShaderList::Draw() { return; } - if (selected_shader >= 0) { - DrawShader(DebugState.shader_dump_list[selected_shader]); - End(); - return; - } - auto width = GetContentRegionAvail().x; int i = 0; for (const auto& shader : DebugState.shader_dump_list) { - if (ButtonEx(shader.name.c_str(), {width, 20.0f}, ImGuiButtonFlags_NoHoveredOnFocus)) { - selected_shader = i; - loaded_data = false; + char name[128]; + if (shader.is_patched) { + snprintf(name, sizeof(name), "%s (PATCH ON)", shader.name.c_str()); + } else if (!shader.patch_spv.empty()) { + snprintf(name, sizeof(name), "%s (PATCH OFF)", shader.name.c_str()); + } else { + snprintf(name, sizeof(name), "%s", shader.name.c_str()); + } + if (ButtonEx(name, {width, 20.0f}, ImGuiButtonFlags_NoHoveredOnFocus)) { + open_shaders.emplace_back(i); } i++; } diff --git a/src/core/devtools/widget/shader_list.h b/src/core/devtools/widget/shader_list.h index 5a47f656d..2534ded35 100644 --- a/src/core/devtools/widget/shader_list.h +++ b/src/core/devtools/widget/shader_list.h @@ -6,20 +6,32 @@ #include "core/debug_state.h" #include "text_editor.h" +#include + namespace Core::Devtools::Widget { class ShaderList { - int selected_shader = -1; - TextEditor isa_editor{}; - TextEditor spv_editor{}; - bool loaded_data = false; - bool showing_isa = false; + struct Selection { + explicit Selection(int index); + ~Selection(); - void DrawShader(DebugStateType::ShaderDump& value); + void ReloadShader(DebugStateType::ShaderDump& value); + + bool DrawShader(DebugStateType::ShaderDump& value); + + int index; + TextEditor isa_editor{}; + TextEditor glsl_editor{}; + bool open = true; + bool showing_bin = false; + + std::filesystem::path patch_path; + std::filesystem::path patch_bin_path; + }; + + std::vector open_shaders{}; public: - ShaderList(); - bool open = false; void Draw(); diff --git a/src/core/devtools/widget/text_editor.cpp b/src/core/devtools/widget/text_editor.cpp index 07f2f658d..7171cac47 100644 --- a/src/core/devtools/widget/text_editor.cpp +++ b/src/core/devtools/widget/text_editor.cpp @@ -1059,7 +1059,8 @@ void TextEditor::Render(const char* aTitle, const ImVec2& aSize, bool aBorder) { if (!mIgnoreImGuiChild) ImGui::BeginChild(aTitle, aSize, aBorder, ImGuiWindowFlags_HorizontalScrollbar | - ImGuiWindowFlags_AlwaysHorizontalScrollbar | ImGuiWindowFlags_NoMove); + ImGuiWindowFlags_AlwaysHorizontalScrollbar | ImGuiWindowFlags_NoMove | + ImGuiWindowFlags_NoNav); if (mHandleKeyboardInputs) { HandleKeyboardInputs(); @@ -2331,4 +2332,50 @@ const TextEditor::LanguageDefinition& TextEditor::LanguageDefinition::GLSL() { return langDef; } +// Source: https://github.com/dfranx/ImGuiColorTextEdit/blob/master/TextEditor.cpp +const TextEditor::LanguageDefinition& TextEditor::LanguageDefinition::SPIRV() { + static bool inited = false; + static LanguageDefinition langDef; + if (!inited) { + /* + langDef.mTokenRegexStrings.push_back(std::make_pair("[ \\t]*#[ + \\t]*[a-zA-Z_]+", PaletteIndex::Preprocessor)); + langDef.mTokenRegexStrings.push_back(std::make_pair("\\'\\\\?[^\\']\\'", PaletteIndex::CharLiteral)); + langDef.mTokenRegexStrings.push_back(std::make_pair("[a-zA-Z_][a-zA-Z0-9_]*", PaletteIndex::Identifier)); + langDef.mTokenRegexStrings.push_back(std::make_pair("[\\[\\]\\{\\}\\!\\%\\^\\&\\*\\(\\)\\-\\+\\=\\~\\|\\<\\>\\?\\/\\;\\,\\.]", + PaletteIndex::Punctuation)); + */ + + langDef.mTokenRegexStrings.push_back(std::make_pair( + "L?\\\"(\\\\.|[^\\\"])*\\\"", PaletteIndex::String)); + langDef.mTokenRegexStrings.push_back( + std::make_pair("[ =\\t]Op[a-zA-Z]*", PaletteIndex::Keyword)); + langDef.mTokenRegexStrings.push_back( + std::make_pair("%[_a-zA-Z0-9]*", PaletteIndex::Identifier)); + langDef.mTokenRegexStrings.push_back(std::make_pair( + "[+-]?([0-9]+([.][0-9]*)?|[.][0-9]+)([eE][+-]?[0-9]+)?[fF]?", PaletteIndex::Number)); + langDef.mTokenRegexStrings.push_back(std::make_pair( + "[+-]?[0-9]+[Uu]?[lL]?[lL]?", PaletteIndex::Number)); + langDef.mTokenRegexStrings.push_back(std::make_pair( + "0[0-7]+[Uu]?[lL]?[lL]?", PaletteIndex::Number)); + langDef.mTokenRegexStrings.push_back(std::make_pair( + "0[xX][0-9a-fA-F]+[uU]?[lL]?[lL]?", PaletteIndex::Number)); + + langDef.mCommentStart = "/*"; + langDef.mCommentEnd = "*/"; + langDef.mSingleLineComment = ";"; + + langDef.mCaseSensitive = true; + langDef.mAutoIndentation = false; + + langDef.mName = "SPIR-V"; + + inited = true; + } + return langDef; +} + } // namespace Core::Devtools::Widget diff --git a/src/core/devtools/widget/text_editor.h b/src/core/devtools/widget/text_editor.h index 5c3f29f11..aa81d0d23 100644 --- a/src/core/devtools/widget/text_editor.h +++ b/src/core/devtools/widget/text_editor.h @@ -161,6 +161,7 @@ public: : mPreprocChar('#'), mAutoIndentation(true), mTokenize(nullptr), mCaseSensitive(true) {} static const LanguageDefinition& GLSL(); + static const LanguageDefinition& SPIRV(); }; TextEditor(); diff --git a/src/core/file_format/splash.cpp b/src/core/file_format/splash.cpp index 5e06c912d..b68702157 100644 --- a/src/core/file_format/splash.cpp +++ b/src/core/file_format/splash.cpp @@ -5,13 +5,9 @@ #include "common/assert.h" #include "common/io_file.h" +#include "common/stb.h" #include "splash.h" -#define STB_IMAGE_IMPLEMENTATION -#define STBI_ONLY_PNG -#define STBI_NO_STDIO -#include "externals/stb_image.h" - bool Splash::Open(const std::filesystem::path& filepath) { ASSERT_MSG(filepath.stem().string() != "png", "Unexpected file format passed"); diff --git a/src/core/file_sys/fs.cpp b/src/core/file_sys/fs.cpp index 769940cf0..0fdbb2783 100644 --- a/src/core/file_sys/fs.cpp +++ b/src/core/file_sys/fs.cpp @@ -4,12 +4,12 @@ #include #include "common/config.h" #include "common/string_util.h" +#include "core/devices/logger.h" +#include "core/devices/nop_device.h" #include "core/file_sys/fs.h" namespace Core::FileSys { -constexpr int RESERVED_HANDLES = 3; // First 3 handles are stdin,stdout,stderr - void MntPoints::Mount(const std::filesystem::path& host_folder, const std::string& guest_folder, bool read_only) { std::scoped_lock lock{m_mutex}; @@ -135,7 +135,6 @@ int HandleTable::CreateHandle() { std::scoped_lock lock{m_mutex}; auto* file = new File{}; - file->is_directory = false; file->is_opened = false; int existingFilesNum = m_files.size(); @@ -143,23 +142,23 @@ int HandleTable::CreateHandle() { for (int index = 0; index < existingFilesNum; index++) { if (m_files.at(index) == nullptr) { m_files[index] = file; - return index + RESERVED_HANDLES; + return index; } } m_files.push_back(file); - return m_files.size() + RESERVED_HANDLES - 1; + return m_files.size() - 1; } void HandleTable::DeleteHandle(int d) { std::scoped_lock lock{m_mutex}; - delete m_files.at(d - RESERVED_HANDLES); - m_files[d - RESERVED_HANDLES] = nullptr; + delete m_files.at(d); + m_files[d] = nullptr; } File* HandleTable::GetFile(int d) { std::scoped_lock lock{m_mutex}; - return m_files.at(d - RESERVED_HANDLES); + return m_files.at(d); } File* HandleTable::GetFile(const std::filesystem::path& host_name) { @@ -171,4 +170,20 @@ File* HandleTable::GetFile(const std::filesystem::path& host_name) { return nullptr; } +void HandleTable::CreateStdHandles() { + auto setup = [this](const char* path, auto* device) { + int fd = CreateHandle(); + auto* file = GetFile(fd); + file->is_opened = true; + file->type = FileType::Device; + file->m_guest_name = path; + file->device = + std::shared_ptr{reinterpret_cast(device)}; + }; + // order matters + setup("/dev/stdin", new Devices::NopDevice(0)); // stdin + setup("/dev/stdout", new Devices::Logger("stdout", false)); // stdout + setup("/dev/stderr", new Devices::Logger("stderr", true)); // stderr +} + } // namespace Core::FileSys diff --git a/src/core/file_sys/fs.h b/src/core/file_sys/fs.h index eeaeaf781..b0153c162 100644 --- a/src/core/file_sys/fs.h +++ b/src/core/file_sys/fs.h @@ -9,6 +9,7 @@ #include #include #include "common/io_file.h" +#include "core/devices/base_device.h" namespace Core::FileSys { @@ -55,15 +56,22 @@ struct DirEntry { bool isFile; }; +enum class FileType { + Regular, // standard file + Directory, + Device, +}; + struct File { std::atomic_bool is_opened{}; - std::atomic_bool is_directory{}; + std::atomic type{FileType::Regular}; std::filesystem::path m_host_name; std::string m_guest_name; Common::FS::IOFile f; std::vector dirents; u32 dirents_index; std::mutex m_mutex; + std::shared_ptr device; // only valid for type == Device }; class HandleTable { @@ -76,6 +84,8 @@ public: File* GetFile(int d); File* GetFile(const std::filesystem::path& host_name); + void CreateStdHandles(); + private: std::vector m_files; std::mutex m_mutex; diff --git a/src/core/libraries/ajm/ajm.cpp b/src/core/libraries/ajm/ajm.cpp index 2396669b6..3184fa64f 100644 --- a/src/core/libraries/ajm/ajm.cpp +++ b/src/core/libraries/ajm/ajm.cpp @@ -9,7 +9,7 @@ #include "core/libraries/error_codes.h" #include "core/libraries/libs.h" -#include +#include namespace Libraries::Ajm { @@ -19,7 +19,7 @@ constexpr int ORBIS_AJM_CHANNELMASK_QUAD = 0x0033; constexpr int ORBIS_AJM_CHANNELMASK_5POINT1 = 0x060F; constexpr int ORBIS_AJM_CHANNELMASK_7POINT1 = 0x063F; -static std::unique_ptr context{}; +static std::unordered_map> contexts{}; u32 GetChannelMask(u32 num_channels) { switch (num_channels) { @@ -40,7 +40,13 @@ u32 GetChannelMask(u32 num_channels) { int PS4_SYSV_ABI sceAjmBatchCancel(const u32 context_id, const u32 batch_id) { LOG_INFO(Lib_Ajm, "called context_id = {} batch_id = {}", context_id, batch_id); - return context->BatchCancel(batch_id); + + auto it = contexts.find(context_id); + if (it == contexts.end()) { + return ORBIS_AJM_ERROR_INVALID_CONTEXT; + } + + return it->second->BatchCancel(batch_id); } int PS4_SYSV_ABI sceAjmBatchErrorDump() { @@ -90,14 +96,26 @@ int PS4_SYSV_ABI sceAjmBatchStartBuffer(u32 context_id, u8* p_batch, u32 batch_s u32* out_batch_id) { LOG_TRACE(Lib_Ajm, "called context = {}, batch_size = {:#x}, priority = {}", context_id, batch_size, priority); - return context->BatchStartBuffer(p_batch, batch_size, priority, batch_error, out_batch_id); + + auto it = contexts.find(context_id); + if (it == contexts.end()) { + return ORBIS_AJM_ERROR_INVALID_CONTEXT; + } + + return it->second->BatchStartBuffer(p_batch, batch_size, priority, batch_error, out_batch_id); } int PS4_SYSV_ABI sceAjmBatchWait(const u32 context_id, const u32 batch_id, const u32 timeout, AjmBatchError* const batch_error) { LOG_TRACE(Lib_Ajm, "called context = {}, batch_id = {}, timeout = {}", context_id, batch_id, timeout); - return context->BatchWait(batch_id, timeout, batch_error); + + auto it = contexts.find(context_id); + if (it == contexts.end()) { + return ORBIS_AJM_ERROR_INVALID_CONTEXT; + } + + return it->second->BatchWait(batch_id, timeout, batch_error); } int PS4_SYSV_ABI sceAjmDecAt9ParseConfigData() { @@ -117,12 +135,12 @@ int PS4_SYSV_ABI sceAjmFinalize() { int PS4_SYSV_ABI sceAjmInitialize(s64 reserved, u32* p_context_id) { LOG_INFO(Lib_Ajm, "called reserved = {}", reserved); - ASSERT_MSG(context == nullptr, "Multiple contexts are currently unsupported."); if (p_context_id == nullptr || reserved != 0) { return ORBIS_AJM_ERROR_INVALID_PARAMETER; } - *p_context_id = 1; - context = std::make_unique(); + u32 id = contexts.size() + 1; + *p_context_id = id; + contexts.emplace(id, std::make_unique()); return ORBIS_OK; } @@ -135,12 +153,24 @@ int PS4_SYSV_ABI sceAjmInstanceCreate(u32 context_id, AjmCodecType codec_type, AjmInstanceFlags flags, u32* out_instance) { LOG_INFO(Lib_Ajm, "called context = {}, codec_type = {}, flags = {:#x}", context_id, magic_enum::enum_name(codec_type), flags.raw); - return context->InstanceCreate(codec_type, flags, out_instance); + + auto it = contexts.find(context_id); + if (it == contexts.end()) { + return ORBIS_AJM_ERROR_INVALID_CONTEXT; + } + + return it->second->InstanceCreate(codec_type, flags, out_instance); } int PS4_SYSV_ABI sceAjmInstanceDestroy(u32 context_id, u32 instance_id) { LOG_INFO(Lib_Ajm, "called context = {}, instance = {}", context_id, instance_id); - return context->InstanceDestroy(instance_id); + + auto it = contexts.find(context_id); + if (it == contexts.end()) { + return ORBIS_AJM_ERROR_INVALID_CONTEXT; + } + + return it->second->InstanceDestroy(instance_id); } int PS4_SYSV_ABI sceAjmInstanceExtend() { @@ -168,7 +198,13 @@ int PS4_SYSV_ABI sceAjmModuleRegister(u32 context_id, AjmCodecType codec_type, s if (reserved != 0) { return ORBIS_AJM_ERROR_INVALID_PARAMETER; } - return context->ModuleRegister(codec_type); + + auto it = contexts.find(context_id); + if (it == contexts.end()) { + return ORBIS_AJM_ERROR_INVALID_CONTEXT; + } + + return it->second->ModuleRegister(codec_type); } int PS4_SYSV_ABI sceAjmModuleUnregister() { diff --git a/src/core/libraries/ajm/ajm_context.cpp b/src/core/libraries/ajm/ajm_context.cpp index e30e1c478..09255110c 100644 --- a/src/core/libraries/ajm/ajm_context.cpp +++ b/src/core/libraries/ajm/ajm_context.cpp @@ -3,6 +3,7 @@ #include "common/assert.h" #include "common/logging/log.h" +#include "common/thread.h" #include "core/libraries/ajm/ajm.h" #include "core/libraries/ajm/ajm_at9.h" #include "core/libraries/ajm/ajm_context.h" @@ -53,6 +54,7 @@ s32 AjmContext::ModuleRegister(AjmCodecType type) { } void AjmContext::WorkerThread(std::stop_token stop) { + Common::SetCurrentThreadName("shadPS4:AjmWorker"); while (!stop.stop_requested()) { auto batch = batch_queue.PopWait(stop); if (batch != nullptr) { diff --git a/src/core/libraries/ajm/ajm_instance.cpp b/src/core/libraries/ajm/ajm_instance.cpp index 4e04eea74..ea7fd5617 100644 --- a/src/core/libraries/ajm/ajm_instance.cpp +++ b/src/core/libraries/ajm/ajm_instance.cpp @@ -5,7 +5,7 @@ #include "core/libraries/ajm/ajm_instance.h" #include "core/libraries/ajm/ajm_mp3.h" -#include +#include namespace Libraries::Ajm { diff --git a/src/core/libraries/ajm/ajm_mp3.cpp b/src/core/libraries/ajm/ajm_mp3.cpp index 3b464238d..2c572a01b 100644 --- a/src/core/libraries/ajm/ajm_mp3.cpp +++ b/src/core/libraries/ajm/ajm_mp3.cpp @@ -12,6 +12,8 @@ extern "C" { #include } +#include "common/support/avdec.h" + namespace Libraries::Ajm { // Following tables have been reversed from AJM library diff --git a/src/core/libraries/audio/audioout.cpp b/src/core/libraries/audio/audioout.cpp index b92c75a8f..78b04cc90 100644 --- a/src/core/libraries/audio/audioout.cpp +++ b/src/core/libraries/audio/audioout.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include -#include +#include #include "common/assert.h" #include "common/logging/log.h" diff --git a/src/core/libraries/avplayer/avplayer_file_streamer.cpp b/src/core/libraries/avplayer/avplayer_file_streamer.cpp index 3323ee9b6..19faeb273 100644 --- a/src/core/libraries/avplayer/avplayer_file_streamer.cpp +++ b/src/core/libraries/avplayer/avplayer_file_streamer.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include // std::max, std::min -#include +#include #include "core/libraries/avplayer/avplayer_file_streamer.h" extern "C" { diff --git a/src/core/libraries/avplayer/avplayer_source.cpp b/src/core/libraries/avplayer/avplayer_source.cpp index 8e43e7277..cf783403c 100644 --- a/src/core/libraries/avplayer/avplayer_source.cpp +++ b/src/core/libraries/avplayer/avplayer_source.cpp @@ -8,7 +8,7 @@ #include "core/libraries/avplayer/avplayer_file_streamer.h" #include "core/libraries/avplayer/avplayer_source.h" -#include +#include extern "C" { #include @@ -18,16 +18,7 @@ extern "C" { #include } -// The av_err2str macro in libavutil/error.h does not play nice with C++ -#ifdef av_err2str -#undef av_err2str -#include -av_always_inline std::string av_err2string(int errnum) { - char errbuf[AV_ERROR_MAX_STRING_SIZE]; - return av_make_error_string(errbuf, AV_ERROR_MAX_STRING_SIZE, errnum); -} -#define av_err2str(err) av_err2string(err).c_str() -#endif // av_err2str +#include "common/support/avdec.h" namespace Libraries::AvPlayer { diff --git a/src/core/libraries/avplayer/avplayer_state.cpp b/src/core/libraries/avplayer/avplayer_state.cpp index c3694eec0..143df749c 100644 --- a/src/core/libraries/avplayer/avplayer_state.cpp +++ b/src/core/libraries/avplayer/avplayer_state.cpp @@ -8,7 +8,7 @@ #include "core/libraries/avplayer/avplayer_state.h" #include "core/tls.h" -#include +#include namespace Libraries::AvPlayer { diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 10d121afe..18035e6ce 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -544,15 +544,15 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) { .base_addr = base_addr, }); } - liverpool->SubmitAsc(vqid, acb_span); + liverpool->SubmitAsc(gnm_vqid, acb_span); *asc_queue.read_addr += acb_size; *asc_queue.read_addr %= asc_queue.ring_size_dw * 4; } -int PS4_SYSV_ABI sceGnmDingDongForWorkload() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); - return ORBIS_OK; +void PS4_SYSV_ABI sceGnmDingDongForWorkload(u32 gnm_vqid, u32 next_offs_dw, u64 workload_id) { + LOG_DEBUG(Lib_GnmDriver, "called, redirecting to sceGnmDingDong"); + sceGnmDingDong(gnm_vqid, next_offs_dw); } int PS4_SYSV_ABI sceGnmDisableMipStatsReport() { diff --git a/src/core/libraries/gnmdriver/gnmdriver.h b/src/core/libraries/gnmdriver/gnmdriver.h index 5307b3baa..017dbe3ad 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.h +++ b/src/core/libraries/gnmdriver/gnmdriver.h @@ -34,7 +34,7 @@ int PS4_SYSV_ABI sceGnmDebugHardwareStatus(); s32 PS4_SYSV_ABI sceGnmDeleteEqEvent(SceKernelEqueue eq, u64 id); int PS4_SYSV_ABI sceGnmDestroyWorkloadStream(); void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw); -int PS4_SYSV_ABI sceGnmDingDongForWorkload(); +void PS4_SYSV_ABI sceGnmDingDongForWorkload(u32 gnm_vqid, u32 next_offs_dw, u64 workload_id); int PS4_SYSV_ABI sceGnmDisableMipStatsReport(); s32 PS4_SYSV_ABI sceGnmDispatchDirect(u32* cmdbuf, u32 size, u32 threads_x, u32 threads_y, u32 threads_z, u32 flags); diff --git a/src/core/libraries/ime/error_dialog.cpp b/src/core/libraries/ime/error_dialog.cpp index 811f2cb99..07580fe1d 100644 --- a/src/core/libraries/ime/error_dialog.cpp +++ b/src/core/libraries/ime/error_dialog.cpp @@ -3,7 +3,7 @@ #include #include -#include +#include #include "common/assert.h" #include "common/logging/log.h" diff --git a/src/core/libraries/ime/ime_dialog.cpp b/src/core/libraries/ime/ime_dialog.cpp index d6d027885..9151aa64e 100644 --- a/src/core/libraries/ime/ime_dialog.cpp +++ b/src/core/libraries/ime/ime_dialog.cpp @@ -2,7 +2,8 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include -#include +#include + #include "common/logging/log.h" #include "core/libraries/error_codes.h" #include "core/libraries/libs.h" diff --git a/src/core/libraries/ime/ime_dialog_ui.cpp b/src/core/libraries/ime/ime_dialog_ui.cpp index 5957606eb..51183c79b 100644 --- a/src/core/libraries/ime/ime_dialog_ui.cpp +++ b/src/core/libraries/ime/ime_dialog_ui.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include "common/assert.h" #include "common/logging/log.h" diff --git a/src/core/libraries/jpeg/jpegenc.cpp b/src/core/libraries/jpeg/jpegenc.cpp index b664a2334..b9c88d094 100644 --- a/src/core/libraries/jpeg/jpegenc.cpp +++ b/src/core/libraries/jpeg/jpegenc.cpp @@ -1,7 +1,8 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include +#include + #include "common/alignment.h" #include "common/assert.h" #include "common/logging/log.h" diff --git a/src/core/libraries/kernel/file_system.cpp b/src/core/libraries/kernel/file_system.cpp index 1b95e5270..5ba9976c6 100644 --- a/src/core/libraries/kernel/file_system.cpp +++ b/src/core/libraries/kernel/file_system.cpp @@ -1,16 +1,49 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include +#include + #include "common/assert.h" #include "common/logging/log.h" #include "common/scope_exit.h" #include "common/singleton.h" +#include "core/devices/logger.h" +#include "core/devices/nop_device.h" #include "core/file_sys/fs.h" #include "core/libraries/kernel/file_system.h" #include "core/libraries/kernel/orbis_error.h" #include "core/libraries/libs.h" +#include "core/memory.h" #include "kernel.h" +namespace D = Core::Devices; +using FactoryDevice = std::function(u32, const char*, int, u16)>; + +#define GET_DEVICE_FD(fd) \ + [](u32, const char*, int, u16) { \ + return Common::Singleton::Instance()->GetFile(fd)->device; \ + } + +// prefix path, only dev devices +static std::map available_device = { + // clang-format off + {"/dev/stdin", GET_DEVICE_FD(0)}, + {"/dev/stdout", GET_DEVICE_FD(1)}, + {"/dev/stderr", GET_DEVICE_FD(2)}, + + {"/dev/fd/0", GET_DEVICE_FD(0)}, + {"/dev/fd/1", GET_DEVICE_FD(1)}, + {"/dev/fd/2", GET_DEVICE_FD(2)}, + + {"/dev/deci_stdin", GET_DEVICE_FD(0)}, + {"/dev/deci_stdout", GET_DEVICE_FD(1)}, + {"/dev/deci_stderr", GET_DEVICE_FD(2)}, + + {"/dev/null", GET_DEVICE_FD(0)}, // fd0 (stdin) is a nop device + // clang-format on +}; + namespace Libraries::Kernel { auto GetDirectoryEntries(const std::filesystem::path& path) { @@ -24,8 +57,8 @@ auto GetDirectoryEntries(const std::filesystem::path& path) { return files; } -int PS4_SYSV_ABI sceKernelOpen(const char* path, int flags, u16 mode) { - LOG_INFO(Kernel_Fs, "path = {} flags = {:#x} mode = {}", path, flags, mode); +int PS4_SYSV_ABI sceKernelOpen(const char* raw_path, int flags, u16 mode) { + LOG_INFO(Kernel_Fs, "path = {} flags = {:#x} mode = {}", raw_path, flags, mode); auto* h = Common::Singleton::Instance(); auto* mnt = Common::Singleton::Instance(); @@ -44,22 +77,35 @@ int PS4_SYSV_ABI sceKernelOpen(const char* path, int flags, u16 mode) { bool direct = (flags & ORBIS_KERNEL_O_DIRECT) != 0; bool directory = (flags & ORBIS_KERNEL_O_DIRECTORY) != 0; - if (std::string_view{path} == "/dev/console") { + std::string_view path{raw_path}; + + if (path == "/dev/console") { return 2000; } - if (std::string_view{path} == "/dev/deci_tty6") { + if (path == "/dev/deci_tty6") { return 2001; } - if (std::string_view{path} == "/dev/stdout") { - return 2002; - } - if (std::string_view{path} == "/dev/urandom") { + if (path == "/dev/urandom") { return 2003; } + u32 handle = h->CreateHandle(); auto* file = h->GetFile(handle); + + if (path.starts_with("/dev/")) { + for (const auto& [prefix, factory] : available_device) { + if (path.starts_with(prefix)) { + file->is_opened = true; + file->type = Core::FileSys::FileType::Device; + file->m_guest_name = path; + file->device = factory(handle, path.data(), flags, mode); + return handle; + } + } + } + if (directory) { - file->is_directory = true; + file->type = Core::FileSys::FileType::Directory; file->m_guest_name = path; file->m_host_name = mnt->GetHostPath(file->m_guest_name); if (!std::filesystem::is_directory(file->m_host_name)) { // directory doesn't exist @@ -135,11 +181,12 @@ int PS4_SYSV_ABI sceKernelClose(int d) { if (file == nullptr) { return ORBIS_KERNEL_ERROR_EBADF; } - if (!file->is_directory) { + if (file->type == Core::FileSys::FileType::Regular) { file->f.Close(); } file->is_opened = false; LOG_INFO(Kernel_Fs, "Closing {}", file->m_guest_name); + // FIXME: Lock file mutex before deleting it? h->DeleteHandle(d); return ORBIS_OK; } @@ -154,15 +201,7 @@ int PS4_SYSV_ABI posix_close(int d) { return result; } -size_t PS4_SYSV_ABI sceKernelWrite(int d, const void* buf, size_t nbytes) { - if (d <= 2) { // stdin,stdout,stderr - char* str = strdup((const char*)buf); - if (str[nbytes - 1] == '\n') - str[nbytes - 1] = 0; - LOG_INFO(Tty, "{}", str); - free(str); - return nbytes; - } +s64 PS4_SYSV_ABI sceKernelWrite(int d, const void* buf, size_t nbytes) { auto* h = Common::Singleton::Instance(); auto* file = h->GetFile(d); if (file == nullptr) { @@ -170,6 +209,9 @@ size_t PS4_SYSV_ABI sceKernelWrite(int d, const void* buf, size_t nbytes) { } std::scoped_lock lk{file->m_mutex}; + if (file->type == Core::FileSys::FileType::Device) { + return file->device->write(buf, nbytes); + } return file->f.WriteRaw(buf, nbytes); } @@ -204,20 +246,75 @@ int PS4_SYSV_ABI sceKernelUnlink(const char* path) { return ORBIS_OK; } +size_t ReadFile(Common::FS::IOFile& file, void* buf, size_t nbytes) { + const auto* memory = Core::Memory::Instance(); + // Invalidate up to the actual number of bytes that could be read. + const auto remaining = file.GetSize() - file.Tell(); + memory->InvalidateMemory(reinterpret_cast(buf), std::min(nbytes, remaining)); + + return file.ReadRaw(buf, nbytes); +} + size_t PS4_SYSV_ABI _readv(int d, const SceKernelIovec* iov, int iovcnt) { auto* h = Common::Singleton::Instance(); auto* file = h->GetFile(d); - size_t total_read = 0; + if (file == nullptr) { + return ORBIS_KERNEL_ERROR_EBADF; + } + std::scoped_lock lk{file->m_mutex}; + if (file->type == Core::FileSys::FileType::Device) { + int r = file->device->readv(iov, iovcnt); + if (r < 0) { + ErrSceToPosix(r); + return -1; + } + return r; + } + size_t total_read = 0; for (int i = 0; i < iovcnt; i++) { - total_read += file->f.ReadRaw(iov[i].iov_base, iov[i].iov_len); + total_read += ReadFile(file->f, iov[i].iov_base, iov[i].iov_len); } return total_read; } +size_t PS4_SYSV_ABI _writev(int fd, const SceKernelIovec* iov, int iovcn) { + if (fd == 1) { + size_t total_written = 0; + for (int i = 0; i < iovcn; i++) { + total_written += ::fwrite(iov[i].iov_base, 1, iov[i].iov_len, stdout); + } + return total_written; + } + auto* h = Common::Singleton::Instance(); + auto* file = h->GetFile(fd); + if (file == nullptr) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + std::scoped_lock lk{file->m_mutex}; + + if (file->type == Core::FileSys::FileType::Device) { + return file->device->writev(iov, iovcn); + } + size_t total_written = 0; + for (int i = 0; i < iovcn; i++) { + total_written += file->f.WriteRaw(iov[i].iov_base, iov[i].iov_len); + } + return total_written; +} + s64 PS4_SYSV_ABI sceKernelLseek(int d, s64 offset, int whence) { auto* h = Common::Singleton::Instance(); auto* file = h->GetFile(d); + if (file == nullptr) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + std::scoped_lock lk{file->m_mutex}; + if (file->type == Core::FileSys::FileType::Device) { + return file->device->lseek(offset, whence); + } Common::FS::SeekOrigin origin{}; if (whence == 0) { @@ -228,7 +325,6 @@ s64 PS4_SYSV_ABI sceKernelLseek(int d, s64 offset, int whence) { origin = Common::FS::SeekOrigin::End; } - std::scoped_lock lk{file->m_mutex}; if (!file->f.Seek(offset, origin)) { LOG_CRITICAL(Kernel_Fs, "sceKernelLseek: failed to seek"); return ORBIS_KERNEL_ERROR_EINVAL; @@ -261,7 +357,10 @@ s64 PS4_SYSV_ABI sceKernelRead(int d, void* buf, size_t nbytes) { } std::scoped_lock lk{file->m_mutex}; - return file->f.ReadRaw(buf, nbytes); + if (file->type == Core::FileSys::FileType::Device) { + return file->device->read(buf, nbytes); + } + return ReadFile(file->f, buf, nbytes); } int PS4_SYSV_ABI posix_read(int d, void* buf, size_t nbytes) { @@ -409,7 +508,13 @@ int PS4_SYSV_ABI posix_stat(const char* path, OrbisKernelStat* sb) { int PS4_SYSV_ABI sceKernelCheckReachability(const char* path) { auto* mnt = Common::Singleton::Instance(); - const auto path_name = mnt->GetHostPath(path); + std::string_view guest_path{path}; + for (const auto& prefix : available_device | std::views::keys) { + if (guest_path.starts_with(prefix)) { + return ORBIS_OK; + } + } + const auto path_name = mnt->GetHostPath(guest_path); if (!std::filesystem::exists(path_name)) { return ORBIS_KERNEL_ERROR_ENOENT; } @@ -431,6 +536,10 @@ s64 PS4_SYSV_ABI sceKernelPreadv(int d, SceKernelIovec* iov, int iovcnt, s64 off } std::scoped_lock lk{file->m_mutex}; + if (file->type == Core::FileSys::FileType::Device) { + return file->device->preadv(iov, iovcnt, offset); + } + const s64 pos = file->f.Tell(); SCOPE_EXIT { file->f.Seek(pos); @@ -441,7 +550,7 @@ s64 PS4_SYSV_ABI sceKernelPreadv(int d, SceKernelIovec* iov, int iovcnt, s64 off } size_t total_read = 0; for (int i = 0; i < iovcnt; i++) { - total_read += file->f.ReadRaw(iov[i].iov_base, iov[i].iov_len); + total_read += ReadFile(file->f, iov[i].iov_base, iov[i].iov_len); } return total_read; } @@ -466,18 +575,25 @@ int PS4_SYSV_ABI sceKernelFStat(int fd, OrbisKernelStat* sb) { } std::memset(sb, 0, sizeof(OrbisKernelStat)); - if (file->is_directory) { - sb->st_mode = 0000777u | 0040000u; - sb->st_size = 0; - sb->st_blksize = 512; - sb->st_blocks = 0; - // TODO incomplete - } else { + switch (file->type) { + case Core::FileSys::FileType::Device: + return file->device->fstat(sb); + case Core::FileSys::FileType::Regular: sb->st_mode = 0000777u | 0100000u; sb->st_size = file->f.GetSize(); sb->st_blksize = 512; sb->st_blocks = (sb->st_size + 511) / 512; // TODO incomplete + break; + case Core::FileSys::FileType::Directory: + sb->st_mode = 0000777u | 0040000u; + sb->st_size = 0; + sb->st_blksize = 512; + sb->st_blocks = 0; + // TODO incomplete + break; + default: + UNREACHABLE(); } return ORBIS_OK; } @@ -495,6 +611,13 @@ int PS4_SYSV_ABI posix_fstat(int fd, OrbisKernelStat* sb) { s32 PS4_SYSV_ABI sceKernelFsync(int fd) { auto* h = Common::Singleton::Instance(); auto* file = h->GetFile(fd); + if (file == nullptr) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + if (file->type == Core::FileSys::FileType::Device) { + return file->device->fsync(); + } file->f.Flush(); return ORBIS_OK; } @@ -517,6 +640,10 @@ int PS4_SYSV_ABI sceKernelFtruncate(int fd, s64 length) { return ORBIS_KERNEL_ERROR_EBADF; } + if (file->type == Core::FileSys::FileType::Device) { + return file->device->ftruncate(length); + } + if (file->m_host_name.empty()) { return ORBIS_KERNEL_ERROR_EACCES; } @@ -538,10 +665,15 @@ static int GetDents(int fd, char* buf, int nbytes, s64* basep) { if (file == nullptr) { return ORBIS_KERNEL_ERROR_EBADF; } + if (file->type == Core::FileSys::FileType::Device) { + return file->device->getdents(buf, nbytes, basep); + } + if (file->dirents_index == file->dirents.size()) { return ORBIS_OK; } - if (!file->is_directory || nbytes < 512 || file->dirents_index > file->dirents.size()) { + if (file->type != Core::FileSys::FileType::Directory || nbytes < 512 || + file->dirents_index > file->dirents.size()) { return ORBIS_KERNEL_ERROR_EINVAL; } const auto& entry = file->dirents.at(file->dirents_index++); @@ -586,6 +718,10 @@ s64 PS4_SYSV_ABI sceKernelPwrite(int d, void* buf, size_t nbytes, s64 offset) { } std::scoped_lock lk{file->m_mutex}; + + if (file->type == Core::FileSys::FileType::Device) { + return file->device->pwrite(buf, nbytes, offset); + } const s64 pos = file->f.Tell(); SCOPE_EXIT { file->f.Seek(pos); @@ -637,6 +773,7 @@ void RegisterFileSystem(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("4wSze92BhLI", "libkernel", 1, "libkernel", 1, 1, sceKernelWrite); LIB_FUNCTION("+WRlkKjZvag", "libkernel", 1, "libkernel", 1, 1, _readv); + LIB_FUNCTION("YSHRBRLn2pI", "libkernel", 1, "libkernel", 1, 1, _writev); LIB_FUNCTION("Oy6IpwgtYOk", "libkernel", 1, "libkernel", 1, 1, posix_lseek); LIB_FUNCTION("Oy6IpwgtYOk", "libScePosix", 1, "libkernel", 1, 1, posix_lseek); LIB_FUNCTION("oib76F-12fk", "libkernel", 1, "libkernel", 1, 1, sceKernelLseek); diff --git a/src/core/libraries/kernel/file_system.h b/src/core/libraries/kernel/file_system.h index dcbb3957d..6443962ff 100644 --- a/src/core/libraries/kernel/file_system.h +++ b/src/core/libraries/kernel/file_system.h @@ -65,6 +65,9 @@ constexpr int ORBIS_KERNEL_O_DSYNC = 0x1000; constexpr int ORBIS_KERNEL_O_DIRECT = 0x00010000; constexpr int ORBIS_KERNEL_O_DIRECTORY = 0x00020000; +s64 PS4_SYSV_ABI sceKernelWrite(int d, const void* buf, size_t nbytes); +s64 PS4_SYSV_ABI sceKernelRead(int d, void* buf, size_t nbytes); + void RegisterFileSystem(Core::Loader::SymbolsResolver* sym); } // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/kernel.cpp b/src/core/libraries/kernel/kernel.cpp index 4028116ef..b05c96fad 100644 --- a/src/core/libraries/kernel/kernel.cpp +++ b/src/core/libraries/kernel/kernel.cpp @@ -9,6 +9,9 @@ #include "common/logging/log.h" #include "common/polyfill_thread.h" #include "common/thread.h" +#include "common/va_ctx.h" +#include "core/file_sys/fs.h" +#include "core/libraries/error_codes.h" #include "core/libraries/kernel/equeue.h" #include "core/libraries/kernel/file_system.h" #include "core/libraries/kernel/kernel.h" @@ -24,6 +27,7 @@ #ifdef _WIN64 #include #endif +#include namespace Libraries::Kernel { @@ -42,7 +46,7 @@ void KernelSignalRequest() { } static void KernelServiceThread(std::stop_token stoken) { - Common::SetCurrentThreadName("shadPS4:Kernel_ServiceThread"); + Common::SetCurrentThreadName("shadPS4:KernelServiceThread"); while (!stoken.stop_requested()) { HLE_TRACE; @@ -65,19 +69,6 @@ static PS4_SYSV_ABI void stack_chk_fail() { UNREACHABLE(); } -struct iovec { - void* iov_base; /* Base address. */ - size_t iov_len; /* Length. */ -}; - -size_t PS4_SYSV_ABI _writev(int fd, const struct iovec* iov, int iovcn) { - size_t total_written = 0; - for (int i = 0; i < iovcn; i++) { - total_written += ::fwrite(iov[i].iov_base, 1, iov[i].iov_len, stdout); - } - return total_written; -} - static thread_local int g_posix_errno = 0; int* PS4_SYSV_ABI __Error() { @@ -142,24 +133,11 @@ void PS4_SYSV_ABI sceLibcHeapGetTraceInfo(HeapInfoInfo* info) { } s64 PS4_SYSV_ABI ps4__write(int d, const char* buf, std::size_t nbytes) { - if (d <= 2) { // stdin,stdout,stderr - std::string_view str{buf}; - if (str[nbytes - 1] == '\n') { - str = str.substr(0, nbytes - 1); - } - LOG_INFO(Tty, "{}", str); - return nbytes; - } - LOG_ERROR(Kernel, "(STUBBED) called d = {} nbytes = {} ", d, nbytes); - UNREACHABLE(); - return ORBIS_OK; + return sceKernelWrite(d, buf, nbytes); } s64 PS4_SYSV_ABI ps4__read(int d, void* buf, u64 nbytes) { - ASSERT_MSG(d == 0, "d is not 0!"); - - return static_cast( - strlen(std::fgets(static_cast(buf), static_cast(nbytes), stdin))); + return sceKernelRead(d, buf, nbytes); } struct OrbisKernelUuid { @@ -189,6 +167,29 @@ int PS4_SYSV_ABI sceKernelUuidCreate(OrbisKernelUuid* orbisUuid) { return 0; } +int PS4_SYSV_ABI kernel_ioctl(int fd, u64 cmd, VA_ARGS) { + auto* h = Common::Singleton::Instance(); + auto* file = h->GetFile(fd); + if (file == nullptr) { + LOG_INFO(Lib_Kernel, "ioctl: fd = {:X} cmd = {:X} file == nullptr", fd, cmd); + g_posix_errno = POSIX_EBADF; + return -1; + } + if (file->type != Core::FileSys::FileType::Device) { + LOG_WARNING(Lib_Kernel, "ioctl: fd = {:X} cmd = {:X} file->type != Device", fd, cmd); + g_posix_errno = ENOTTY; + return -1; + } + VA_CTX(ctx); + int result = file->device->ioctl(cmd, &ctx); + LOG_TRACE(Lib_Kernel, "ioctl: fd = {:X} cmd = {:X} result = {}", fd, cmd, result); + if (result < 0) { + ErrSceToPosix(result); + return -1; + } + return result; +} + const char* PS4_SYSV_ABI sceKernelGetFsSandboxRandomWord() { const char* path = "sys"; return path; @@ -219,19 +220,20 @@ void RegisterKernel(Core::Loader::SymbolsResolver* sym) { Libraries::Kernel::RegisterException(sym); LIB_OBJ("f7uOxY9mM1U", "libkernel", 1, "libkernel", 1, 1, &g_stack_chk_guard); + LIB_FUNCTION("PfccT7qURYE", "libkernel", 1, "libkernel", 1, 1, kernel_ioctl); LIB_FUNCTION("JGfTMBOdUJo", "libkernel", 1, "libkernel", 1, 1, sceKernelGetFsSandboxRandomWord); LIB_FUNCTION("XVL8So3QJUk", "libkernel", 1, "libkernel", 1, 1, posix_connect); LIB_FUNCTION("6xVpy0Fdq+I", "libkernel", 1, "libkernel", 1, 1, _sigprocmask); LIB_FUNCTION("Xjoosiw+XPI", "libkernel", 1, "libkernel", 1, 1, sceKernelUuidCreate); LIB_FUNCTION("Ou3iL1abvng", "libkernel", 1, "libkernel", 1, 1, stack_chk_fail); LIB_FUNCTION("9BcDykPmo1I", "libkernel", 1, "libkernel", 1, 1, __Error); - LIB_FUNCTION("YSHRBRLn2pI", "libkernel", 1, "libkernel", 1, 1, _writev); LIB_FUNCTION("DRuBt2pvICk", "libkernel", 1, "libkernel", 1, 1, ps4__read); LIB_FUNCTION("k+AXqu2-eBc", "libkernel", 1, "libkernel", 1, 1, posix_getpagesize); LIB_FUNCTION("k+AXqu2-eBc", "libScePosix", 1, "libkernel", 1, 1, posix_getpagesize); LIB_FUNCTION("NWtTN10cJzE", "libSceLibcInternalExt", 1, "libSceLibcInternal", 1, 1, sceLibcHeapGetTraceInfo); LIB_FUNCTION("FxVZqBAA7ks", "libkernel", 1, "libkernel", 1, 1, ps4__write); + LIB_FUNCTION("FN4gaPmuFV8", "libScePosix", 1, "libkernel", 1, 1, ps4__write); } } // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/memory.cpp b/src/core/libraries/kernel/memory.cpp index 606c5c185..7d326cbbf 100644 --- a/src/core/libraries/kernel/memory.cpp +++ b/src/core/libraries/kernel/memory.cpp @@ -492,8 +492,7 @@ int PS4_SYSV_ABI sceKernelMunmap(void* addr, size_t len) { return ORBIS_OK; } auto* memory = Core::Memory::Instance(); - memory->UnmapMemory(std::bit_cast(addr), len); - return ORBIS_OK; + return memory->UnmapMemory(std::bit_cast(addr), len); } int PS4_SYSV_ABI posix_munmap(void* addr, size_t len) { diff --git a/src/core/libraries/kernel/process.cpp b/src/core/libraries/kernel/process.cpp index 15e4ff820..6c29d9305 100644 --- a/src/core/libraries/kernel/process.cpp +++ b/src/core/libraries/kernel/process.cpp @@ -20,7 +20,7 @@ int PS4_SYSV_ABI sceKernelIsNeoMode() { int PS4_SYSV_ABI sceKernelGetCompiledSdkVersion(int* ver) { int version = Common::ElfInfo::Instance().RawFirmwareVer(); *ver = version; - return (version > 0) ? ORBIS_OK : ORBIS_KERNEL_ERROR_EINVAL; + return (version >= 0) ? ORBIS_OK : ORBIS_KERNEL_ERROR_EINVAL; } int PS4_SYSV_ABI sceKernelGetCpumode() { @@ -45,10 +45,11 @@ s32 PS4_SYSV_ABI sceKernelLoadStartModule(const char* moduleFileName, size_t arg // Load PRX module and relocate any modules that import it. auto* linker = Common::Singleton::Instance(); - u32 handle = linker->LoadModule(path, true); - if (handle == -1) { - return ORBIS_KERNEL_ERROR_EINVAL; + u32 handle = linker->FindByName(path); + if (handle != -1) { + return handle; } + handle = linker->LoadModule(path, true); auto* module = linker->GetModule(handle); linker->RelocateAnyImports(module); @@ -60,7 +61,10 @@ s32 PS4_SYSV_ABI sceKernelLoadStartModule(const char* moduleFileName, size_t arg // Retrieve and verify proc param according to libkernel. u64* param = module->GetProcParam(); ASSERT_MSG(!param || param[0] >= 0x18, "Invalid module param size: {}", param[0]); - module->Start(args, argp, param); + s32 ret = module->Start(args, argp, param); + if (pRes) { + *pRes = ret; + } return handle; } @@ -104,6 +108,9 @@ s32 PS4_SYSV_ABI sceKernelGetModuleInfoForUnwind(VAddr addr, int flags, LOG_INFO(Lib_Kernel, "called addr = {:#x}, flags = {:#x}", addr, flags); auto* linker = Common::Singleton::Instance(); auto* module = linker->FindByAddress(addr); + if (!module) { + return ORBIS_KERNEL_ERROR_EFAULT; + } const auto mod_info = module->GetModuleInfoEx(); // Fill in module info. diff --git a/src/core/libraries/kernel/sync/mutex.cpp b/src/core/libraries/kernel/sync/mutex.cpp new file mode 100644 index 000000000..c5e3eba1d --- /dev/null +++ b/src/core/libraries/kernel/sync/mutex.cpp @@ -0,0 +1,52 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "mutex.h" + +#include "common/assert.h" + +namespace Libraries::Kernel { + +TimedMutex::TimedMutex() { +#ifdef _WIN64 + mtx = CreateMutex(nullptr, false, nullptr); + ASSERT(mtx); +#endif +} + +TimedMutex::~TimedMutex() { +#ifdef _WIN64 + CloseHandle(mtx); +#endif +} + +void TimedMutex::lock() { +#ifdef _WIN64 + for (;;) { + u64 res = WaitForSingleObjectEx(mtx, INFINITE, true); + if (res == WAIT_OBJECT_0) { + return; + } + } +#else + mtx.lock(); +#endif +} + +bool TimedMutex::try_lock() { +#ifdef _WIN64 + return WaitForSingleObjectEx(mtx, 0, true) == WAIT_OBJECT_0; +#else + return mtx.try_lock(); +#endif +} + +void TimedMutex::unlock() { +#ifdef _WIN64 + ReleaseMutex(mtx); +#else + mtx.unlock(); +#endif +} + +} // namespace Libraries::Kernel \ No newline at end of file diff --git a/src/core/libraries/kernel/sync/mutex.h b/src/core/libraries/kernel/sync/mutex.h new file mode 100644 index 000000000..f14a920b4 --- /dev/null +++ b/src/core/libraries/kernel/sync/mutex.h @@ -0,0 +1,80 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +#include "common/types.h" + +#ifdef _WIN64 +#include +#else +#include +#endif + +namespace Libraries::Kernel { + +class TimedMutex { +public: + TimedMutex(); + ~TimedMutex(); + + void lock(); + bool try_lock(); + + void unlock(); + + template + bool try_lock_for(const std::chrono::duration& rel_time) { +#ifdef _WIN64 + constexpr auto zero = std::chrono::duration::zero(); + const auto now = std::chrono::steady_clock::now(); + + std::chrono::steady_clock::time_point abs_time = now; + if (rel_time > zero) { + constexpr auto max = (std::chrono::steady_clock::time_point::max)(); + if (abs_time < max - rel_time) { + abs_time += rel_time; + } else { + abs_time = max; + } + } + + return try_lock_until(abs_time); +#else + return mtx.try_lock_for(rel_time); +#endif + } + + template + bool try_lock_until(const std::chrono::time_point& abs_time) { +#ifdef _WIN64 + for (;;) { + const auto now = Clock::now(); + if (abs_time <= now) { + return false; + } + + const auto rel_ms = std::chrono::ceil(abs_time - now); + u64 res = WaitForSingleObjectEx(mtx, static_cast(rel_ms.count()), true); + if (res == WAIT_OBJECT_0) { + return true; + } else if (res == WAIT_TIMEOUT) { + return false; + } + } +#else + return mtx.try_lock_until(abs_time); +#endif + } + +private: +#ifdef _WIN64 + HANDLE mtx; +#else + std::timed_mutex mtx; +#endif +}; + +} // namespace Libraries::Kernel \ No newline at end of file diff --git a/src/core/libraries/kernel/sync/semaphore.h b/src/core/libraries/kernel/sync/semaphore.h new file mode 100644 index 000000000..48a5dc0d8 --- /dev/null +++ b/src/core/libraries/kernel/sync/semaphore.h @@ -0,0 +1,167 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include + +#include "common/assert.h" +#include "common/types.h" + +#ifdef _WIN64 +#include +#elif defined(__APPLE__) +#include +#else +#include +#endif + +namespace Libraries::Kernel { + +template +class Semaphore { +public: + Semaphore(s32 initialCount) +#if !defined(_WIN64) && !defined(__APPLE__) + : sem{initialCount} +#endif + { +#ifdef _WIN64 + sem = CreateSemaphore(nullptr, initialCount, max, nullptr); + ASSERT(sem); +#elif defined(__APPLE__) + sem = dispatch_semaphore_create(initialCount); + ASSERT(sem); +#endif + } + + ~Semaphore() { +#ifdef _WIN64 + CloseHandle(sem); +#elif defined(__APPLE__) + dispatch_release(sem); +#endif + } + + void release() { +#ifdef _WIN64 + ReleaseSemaphore(sem, 1, nullptr); +#elif defined(__APPLE__) + dispatch_semaphore_signal(sem); +#else + sem.release(); +#endif + } + + void acquire() { +#ifdef _WIN64 + for (;;) { + u64 res = WaitForSingleObjectEx(sem, INFINITE, true); + if (res == WAIT_OBJECT_0) { + return; + } + } +#elif defined(__APPLE__) + for (;;) { + const auto res = dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER); + if (res == 0) { + return; + } + } +#else + sem.acquire(); +#endif + } + + bool try_acquire() { +#ifdef _WIN64 + return WaitForSingleObjectEx(sem, 0, true) == WAIT_OBJECT_0; +#elif defined(__APPLE__) + return dispatch_semaphore_wait(sem, DISPATCH_TIME_NOW) == 0; +#else + return sem.try_acquire(); +#endif + } + + template + bool try_acquire_for(const std::chrono::duration& rel_time) { +#ifdef _WIN64 + const auto start_time = std::chrono::high_resolution_clock::now(); + auto rel_time_ms = std::chrono::ceil(rel_time); + + while (rel_time_ms.count() > 0) { + u64 timeout_ms = static_cast(rel_time_ms.count()); + u64 res = WaitForSingleObjectEx(sem, timeout_ms, true); + if (res == WAIT_OBJECT_0) { + return true; + } else if (res == WAIT_IO_COMPLETION) { + auto elapsed_time = std::chrono::high_resolution_clock::now() - start_time; + rel_time_ms -= std::chrono::duration_cast(elapsed_time); + } else { + return false; + } + } + + return false; +#elif defined(__APPLE__) + const auto rel_time_ns = std::chrono::ceil(rel_time).count(); + const auto timeout = dispatch_time(DISPATCH_TIME_NOW, rel_time_ns); + return dispatch_semaphore_wait(sem, timeout) == 0; +#else + return sem.try_acquire_for(rel_time); +#endif + } + + template + bool try_acquire_until(const std::chrono::time_point& abs_time) { +#ifdef _WIN64 + const auto start_time = Clock::now(); + if (start_time >= abs_time) { + return false; + } + + auto rel_time = std::chrono::ceil(abs_time - start_time); + while (rel_time.count() > 0) { + u64 timeout_ms = static_cast(rel_time.count()); + u64 res = WaitForSingleObjectEx(sem, timeout_ms, true); + if (res == WAIT_OBJECT_0) { + return true; + } else if (res == WAIT_IO_COMPLETION) { + auto elapsed_time = Clock::now() - start_time; + rel_time -= std::chrono::duration_cast(elapsed_time); + } else { + return false; + } + } + + return false; +#elif defined(__APPLE__) + auto abs_s = std::chrono::time_point_cast(abs_time); + auto abs_ns = std::chrono::time_point_cast(abs_time) - + std::chrono::time_point_cast(abs_s); + const timespec abs_timespec = { + .tv_sec = abs_s.time_since_epoch().count(), + .tv_nsec = abs_ns.count(), + }; + const auto timeout = dispatch_walltime(&abs_timespec, 0); + return dispatch_semaphore_wait(sem, timeout) == 0; +#else + return sem.try_acquire_until(abs_time); +#endif + } + +private: +#ifdef _WIN64 + HANDLE sem; +#elif defined(__APPLE__) + dispatch_semaphore_t sem; +#else + std::counting_semaphore sem; +#endif +}; + +using BinarySemaphore = Semaphore<1>; +using CountingSemaphore = Semaphore<0x7FFFFFFF /*ORBIS_KERNEL_SEM_VALUE_MAX*/>; + +} // namespace Libraries::Kernel \ No newline at end of file diff --git a/src/core/libraries/kernel/threads/condvar.cpp b/src/core/libraries/kernel/threads/condvar.cpp index cbe8f6ca7..853526559 100644 --- a/src/core/libraries/kernel/threads/condvar.cpp +++ b/src/core/libraries/kernel/threads/condvar.cpp @@ -177,7 +177,7 @@ int PS4_SYSV_ABI posix_pthread_cond_reltimedwait_np(PthreadCondT* cond, PthreadM return cvp->Wait(mutex, THR_RELTIME, usec); } -int PthreadCond::Signal() { +int PthreadCond::Signal(Pthread* thread) { Pthread* curthread = g_curthread; SleepqLock(this); @@ -187,11 +187,12 @@ int PthreadCond::Signal() { return 0; } - Pthread* td = sq->sq_blocked.front(); + Pthread* td = thread ? thread : sq->sq_blocked.front(); + PthreadMutex* mp = td->mutex_obj; has_user_waiters = SleepqRemove(sq, td); - std::binary_semaphore* waddr = nullptr; + BinarySemaphore* waddr = nullptr; if (mp->m_owner == curthread) { if (curthread->nwaiter_defer >= Pthread::MaxDeferWaiters) { curthread->WakeAll(); @@ -211,7 +212,7 @@ int PthreadCond::Signal() { struct BroadcastArg { Pthread* curthread; - std::binary_semaphore* waddrs[Pthread::MaxDeferWaiters]; + BinarySemaphore* waddrs[Pthread::MaxDeferWaiters]; int count; }; @@ -262,7 +263,13 @@ int PthreadCond::Broadcast() { int PS4_SYSV_ABI posix_pthread_cond_signal(PthreadCondT* cond) { PthreadCond* cvp{}; CHECK_AND_INIT_COND - return cvp->Signal(); + return cvp->Signal(nullptr); +} + +int PS4_SYSV_ABI posix_pthread_cond_signalto_np(PthreadCondT* cond, Pthread* thread) { + PthreadCond* cvp{}; + CHECK_AND_INIT_COND + return cvp->Signal(thread); } int PS4_SYSV_ABI posix_pthread_cond_broadcast(PthreadCondT* cond) { @@ -358,6 +365,8 @@ void RegisterCond(Core::Loader::SymbolsResolver* sym) { ORBIS(posix_pthread_cond_reltimedwait_np)); LIB_FUNCTION("g+PZd2hiacg", "libkernel", 1, "libkernel", 1, 1, ORBIS(posix_pthread_cond_destroy)); + LIB_FUNCTION("o69RpYO-Mu0", "libkernel", 1, "libkernel", 1, 1, + ORBIS(posix_pthread_cond_signalto_np)); } } // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/threads/event_flag.cpp b/src/core/libraries/kernel/threads/event_flag.cpp index 39925153c..24ddcb927 100644 --- a/src/core/libraries/kernel/threads/event_flag.cpp +++ b/src/core/libraries/kernel/threads/event_flag.cpp @@ -118,7 +118,6 @@ public: } m_bits |= bits; - m_cond_var.notify_all(); } @@ -133,6 +132,33 @@ public: m_bits &= bits; } + void Cancel(u64 setPattern, int* numWaitThreads) { + std::unique_lock lock{m_mutex}; + + while (m_status != Status::Set) { + m_mutex.unlock(); + std::this_thread::sleep_for(std::chrono::microseconds(10)); + m_mutex.lock(); + } + + if (numWaitThreads) { + *numWaitThreads = m_waiting_threads; + } + + m_status = Status::Canceled; + m_bits = setPattern; + + m_cond_var.notify_all(); + + while (m_waiting_threads > 0) { + m_mutex.unlock(); + std::this_thread::sleep_for(std::chrono::microseconds(10)); + m_mutex.lock(); + } + + m_status = Status::Set; + } + private: enum class Status { Set, Canceled, Deleted }; @@ -233,7 +259,8 @@ int PS4_SYSV_ABI sceKernelClearEventFlag(OrbisKernelEventFlag ef, u64 bitPattern int PS4_SYSV_ABI sceKernelCancelEventFlag(OrbisKernelEventFlag ef, u64 setPattern, int* pNumWaitThreads) { - LOG_ERROR(Kernel_Event, "(STUBBED) called"); + LOG_DEBUG(Kernel_Event, "called"); + ef->Cancel(setPattern, pNumWaitThreads); return ORBIS_OK; } diff --git a/src/core/libraries/kernel/threads/exception.cpp b/src/core/libraries/kernel/threads/exception.cpp index b6d89aae4..017984e0d 100644 --- a/src/core/libraries/kernel/threads/exception.cpp +++ b/src/core/libraries/kernel/threads/exception.cpp @@ -7,6 +7,7 @@ #include "core/libraries/libs.h" #ifdef _WIN64 +#include "common/ntapi.h" #else #include #endif @@ -64,6 +65,34 @@ void SigactionHandler(int signum, siginfo_t* inf, ucontext_t* raw_context) { handler(POSIX_SIGUSR1, &ctx); } } +#else +void ExceptionHandler(void* arg1, void* arg2, void* arg3, PCONTEXT context) { + const char* thrName = (char*)arg1; + LOG_INFO(Lib_Kernel, "Exception raised successfully on thread '{}'", thrName); + const auto handler = Handlers[POSIX_SIGUSR1]; + if (handler) { + auto ctx = Ucontext{}; + ctx.uc_mcontext.mc_r8 = context->R8; + ctx.uc_mcontext.mc_r9 = context->R9; + ctx.uc_mcontext.mc_r10 = context->R10; + ctx.uc_mcontext.mc_r11 = context->R11; + ctx.uc_mcontext.mc_r12 = context->R12; + ctx.uc_mcontext.mc_r13 = context->R13; + ctx.uc_mcontext.mc_r14 = context->R14; + ctx.uc_mcontext.mc_r15 = context->R15; + ctx.uc_mcontext.mc_rdi = context->Rdi; + ctx.uc_mcontext.mc_rsi = context->Rsi; + ctx.uc_mcontext.mc_rbp = context->Rbp; + ctx.uc_mcontext.mc_rbx = context->Rbx; + ctx.uc_mcontext.mc_rdx = context->Rdx; + ctx.uc_mcontext.mc_rax = context->Rax; + ctx.uc_mcontext.mc_rcx = context->Rcx; + ctx.uc_mcontext.mc_rsp = context->Rsp; + ctx.uc_mcontext.mc_fs = context->SegFs; + ctx.uc_mcontext.mc_gs = context->SegGs; + handler(POSIX_SIGUSR1, &ctx); + } +} #endif int PS4_SYSV_ABI sceKernelInstallExceptionHandler(s32 signum, SceKernelExceptionHandler handler) { @@ -73,9 +102,7 @@ int PS4_SYSV_ABI sceKernelInstallExceptionHandler(s32 signum, SceKernelException } ASSERT_MSG(!Handlers[POSIX_SIGUSR1], "Invalid parameters"); Handlers[POSIX_SIGUSR1] = handler; -#ifdef _WIN64 - UNREACHABLE_MSG("Missing exception implementation"); -#else +#ifndef _WIN64 struct sigaction act = {}; act.sa_flags = SA_SIGINFO | SA_RESTART; act.sa_sigaction = reinterpret_cast(SigactionHandler); @@ -91,9 +118,7 @@ int PS4_SYSV_ABI sceKernelRemoveExceptionHandler(s32 signum) { } ASSERT_MSG(Handlers[POSIX_SIGUSR1], "Invalid parameters"); Handlers[POSIX_SIGUSR1] = nullptr; -#ifdef _WIN64 - UNREACHABLE_MSG("Missing exception implementation"); -#else +#ifndef _WIN64 struct sigaction act = {}; act.sa_flags = SA_SIGINFO | SA_RESTART; act.sa_sigaction = nullptr; @@ -103,13 +128,18 @@ int PS4_SYSV_ABI sceKernelRemoveExceptionHandler(s32 signum) { } int PS4_SYSV_ABI sceKernelRaiseException(PthreadT thread, int signum) { - LOG_ERROR(Lib_Kernel, "Raising exception"); + LOG_WARNING(Lib_Kernel, "Raising exception on thread '{}'", thread->name); ASSERT_MSG(signum == POSIX_SIGUSR1, "Attempting to raise non user defined signal!"); -#ifdef _WIN64 - UNREACHABLE_MSG("Missing exception implementation"); -#else +#ifndef _WIN64 pthread_t pthr = *reinterpret_cast(thread->native_thr.GetHandle()); pthread_kill(pthr, SIGUSR2); +#else + USER_APC_OPTION option; + option.UserApcFlags = QueueUserApcFlagsSpecialUserApc; + + u64 res = NtQueueApcThreadEx(reinterpret_cast(thread->native_thr.GetHandle()), option, + ExceptionHandler, (void*)thread->name.c_str(), nullptr, nullptr); + ASSERT(res == 0); #endif return 0; } diff --git a/src/core/libraries/kernel/threads/pthread.cpp b/src/core/libraries/kernel/threads/pthread.cpp index 793ddd1fe..08886c6eb 100644 --- a/src/core/libraries/kernel/threads/pthread.cpp +++ b/src/core/libraries/kernel/threads/pthread.cpp @@ -206,6 +206,7 @@ static void RunThread(void* arg) { DebugState.AddCurrentThreadToGuestList(); /* Run the current thread's start routine with argument: */ + curthread->native_thr.Initialize(); void* ret = Core::ExecuteGuest(curthread->start_routine, curthread->arg); /* Remove thread from tracking */ @@ -242,6 +243,13 @@ int PS4_SYSV_ABI posix_pthread_create_name_np(PthreadT* thread, const PthreadAtt static int TidCounter = 1; new_thread->tid = ++TidCounter; + if (new_thread->attr.stackaddr_attr == 0) { + /* Enforce minimum stack size of 64 KB */ + static constexpr size_t MinimumStack = 64_KB; + auto& stacksize = new_thread->attr.stacksize_attr; + stacksize = std::max(stacksize, MinimumStack); + } + if (thread_state->CreateStack(&new_thread->attr) != 0) { /* Insufficient memory to create a stack: */ thread_state->Free(curthread, new_thread); @@ -280,7 +288,7 @@ int PS4_SYSV_ABI posix_pthread_create_name_np(PthreadT* thread, const PthreadAtt (*thread) = new_thread; /* Create thread */ - new_thread->native_thr = Core::Thread(); + new_thread->native_thr = Core::NativeThread(); int ret = new_thread->native_thr.Create(RunThread, new_thread, &new_thread->attr); ASSERT_MSG(ret == 0, "Failed to create thread with error {}", ret); if (ret) { @@ -379,6 +387,7 @@ int PS4_SYSV_ABI posix_sched_get_priority_min() { int PS4_SYSV_ABI posix_pthread_rename_np(PthreadT thread, const char* name) { LOG_INFO(Kernel_Pthread, "name = {}", name); + Common::SetThreadName(reinterpret_cast(thread->native_thr.GetHandle()), name); thread->name = name; return ORBIS_OK; } @@ -412,6 +421,33 @@ int PS4_SYSV_ABI posix_pthread_getschedparam(PthreadT pthread, SchedPolicy* poli return 0; } +int PS4_SYSV_ABI posix_pthread_setschedparam(PthreadT pthread, SchedPolicy policy, + const SchedParam* param) { + if (pthread == nullptr || param == nullptr) { + return POSIX_EINVAL; + } + + auto* thread_state = ThrState::Instance(); + if (pthread == g_curthread) { + g_curthread->lock.lock(); + } else if (int ret = thread_state->FindThread(pthread, /*include dead*/ 0); ret != 0) { + return ret; + } + + if (pthread->attr.sched_policy == policy && + (policy == SchedPolicy::Other || pthread->attr.prio == param->sched_priority)) { + pthread->attr.prio = param->sched_priority; + pthread->lock.unlock(); + return 0; + } + + // TODO: _thr_setscheduler + pthread->attr.sched_policy = policy; + pthread->attr.prio = param->sched_priority; + pthread->lock.unlock(); + return 0; +} + int PS4_SYSV_ABI scePthreadGetprio(PthreadT thread, int* priority) { SchedParam param; SchedPolicy policy; @@ -495,6 +531,7 @@ void RegisterThread(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("lZzFeSxPl08", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_setcancelstate); LIB_FUNCTION("a2P9wYGeZvc", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_setprio); LIB_FUNCTION("FIs3-UQT9sg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_getschedparam); + LIB_FUNCTION("Xs9hdiD7sAA", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_setschedparam); LIB_FUNCTION("6XG4B33N09g", "libScePosix", 1, "libkernel", 1, 1, sched_yield); // Posix-Kernel @@ -510,6 +547,8 @@ void RegisterThread(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("onNY9Byn-W8", "libkernel", 1, "libkernel", 1, 1, ORBIS(posix_pthread_join)); LIB_FUNCTION("P41kTWUS3EI", "libkernel", 1, "libkernel", 1, 1, ORBIS(posix_pthread_getschedparam)); + LIB_FUNCTION("oIRFTjoILbg", "libkernel", 1, "libkernel", 1, 1, + ORBIS(posix_pthread_setschedparam)); LIB_FUNCTION("How7B8Oet6k", "libkernel", 1, "libkernel", 1, 1, ORBIS(posix_pthread_getname_np)); LIB_FUNCTION("3kg7rT0NQIs", "libkernel", 1, "libkernel", 1, 1, posix_pthread_exit); LIB_FUNCTION("aI+OeCz8xrQ", "libkernel", 1, "libkernel", 1, 1, posix_pthread_self); diff --git a/src/core/libraries/kernel/threads/pthread.h b/src/core/libraries/kernel/threads/pthread.h index b41ca2abd..089156776 100644 --- a/src/core/libraries/kernel/threads/pthread.h +++ b/src/core/libraries/kernel/threads/pthread.h @@ -11,6 +11,8 @@ #include #include "common/enum.h" +#include "core/libraries/kernel/sync/mutex.h" +#include "core/libraries/kernel/sync/semaphore.h" #include "core/libraries/kernel/time.h" #include "core/thread.h" #include "core/tls.h" @@ -44,7 +46,7 @@ enum class PthreadMutexProt : u32 { }; struct PthreadMutex { - std::timed_mutex m_lock; + TimedMutex m_lock; PthreadMutexFlags m_flags; Pthread* m_owner; int m_count; @@ -121,7 +123,7 @@ struct PthreadCond { int Wait(PthreadMutexT* mutex, const OrbisKernelTimespec* abstime, u64 usec = 0); - int Signal(); + int Signal(Pthread* thread); int Broadcast(); }; using PthreadCondT = PthreadCond*; @@ -259,7 +261,7 @@ struct Pthread { int refcount; PthreadEntryFunc start_routine; void* arg; - Core::Thread native_thr; + Core::NativeThread native_thr; PthreadAttr attr; bool cancel_enable; bool cancel_pending; @@ -288,14 +290,14 @@ struct Pthread { int report_events; int event_mask; std::string name; - std::binary_semaphore wake_sema{0}; + BinarySemaphore wake_sema{0}; SleepQueue* sleepqueue; void* wchan; PthreadMutex* mutex_obj; bool will_sleep; bool has_user_waiters; int nwaiter_defer; - std::binary_semaphore* defer_waiters[MaxDeferWaiters]; + BinarySemaphore* defer_waiters[MaxDeferWaiters]; bool InCritical() const noexcept { return locklevel > 0 || critical_count > 0; diff --git a/src/core/libraries/kernel/threads/rwlock.cpp b/src/core/libraries/kernel/threads/rwlock.cpp index affaaf994..ff211e48c 100644 --- a/src/core/libraries/kernel/threads/rwlock.cpp +++ b/src/core/libraries/kernel/threads/rwlock.cpp @@ -177,13 +177,13 @@ int PS4_SYSV_ABI posix_pthread_rwlock_unlock(PthreadRwlockT* rwlock) { } if (prwlock->owner == curthread) { - prwlock->lock.unlock(); prwlock->owner = nullptr; + prwlock->lock.unlock(); } else { - prwlock->lock.unlock_shared(); if (prwlock->owner == nullptr) { curthread->rdlock_count--; } + prwlock->lock.unlock_shared(); } return 0; diff --git a/src/core/libraries/kernel/threads/semaphore.cpp b/src/core/libraries/kernel/threads/semaphore.cpp index e3c7e9092..f25a76c2b 100644 --- a/src/core/libraries/kernel/threads/semaphore.cpp +++ b/src/core/libraries/kernel/threads/semaphore.cpp @@ -6,7 +6,10 @@ #include #include +#include "core/libraries/kernel/sync/semaphore.h" + #include "common/logging/log.h" +#include "common/slot_vector.h" #include "core/libraries/kernel/kernel.h" #include "core/libraries/kernel/orbis_error.h" #include "core/libraries/kernel/posix_error.h" @@ -21,7 +24,7 @@ constexpr int ORBIS_KERNEL_SEM_VALUE_MAX = 0x7FFFFFFF; struct PthreadSem { explicit PthreadSem(s32 value_) : semaphore{value_}, value{value_} {} - std::counting_semaphore semaphore; + CountingSemaphore semaphore; std::atomic value; }; @@ -75,7 +78,7 @@ public: it = wait_list.erase(it); token_count -= waiter->need_count; waiter->was_signaled = true; - waiter->cv.notify_one(); + waiter->sem.release(); } return true; @@ -88,7 +91,7 @@ public: } for (auto* waiter : wait_list) { waiter->was_cancled = true; - waiter->cv.notify_one(); + waiter->sem.release(); } wait_list.clear(); token_count = set_count < 0 ? init_count : set_count; @@ -99,25 +102,29 @@ public: std::scoped_lock lk{mutex}; for (auto* waiter : wait_list) { waiter->was_deleted = true; - waiter->cv.notify_one(); + waiter->sem.release(); } wait_list.clear(); } public: struct WaitingThread { - std::condition_variable cv; + BinarySemaphore sem; u32 priority; s32 need_count; + std::string thr_name; bool was_signaled{}; bool was_deleted{}; bool was_cancled{}; - explicit WaitingThread(s32 need_count, bool is_fifo) : need_count{need_count} { + explicit WaitingThread(s32 need_count, bool is_fifo) + : sem{0}, priority{0}, need_count{need_count} { // Retrieve calling thread priority for sorting into waiting threads list. if (!is_fifo) { priority = g_curthread->attr.prio; } + + thr_name = g_curthread->name; } int GetResult(bool timed_out) { @@ -134,24 +141,26 @@ public: } int Wait(std::unique_lock& lk, u32* timeout) { + lk.unlock(); if (!timeout) { // Wait indefinitely until we are woken up. - cv.wait(lk); + sem.acquire(); + lk.lock(); return GetResult(false); } // Wait until timeout runs out, recording how much remaining time there was. const auto start = std::chrono::high_resolution_clock::now(); - const auto signaled = cv.wait_for(lk, std::chrono::microseconds(*timeout), - [this] { return was_signaled; }); + sem.try_acquire_for(std::chrono::microseconds(*timeout)); const auto end = std::chrono::high_resolution_clock::now(); const auto time = std::chrono::duration_cast(end - start).count(); - if (signaled) { + lk.lock(); + if (was_signaled) { *timeout -= time; } else { *timeout = 0; } - return GetResult(!signaled); + return GetResult(!was_signaled); } }; @@ -180,7 +189,9 @@ public: bool is_fifo; }; -using OrbisKernelSema = OrbisSem*; +using OrbisKernelSema = Common::SlotId; + +static Common::SlotVector> orbis_sems; s32 PS4_SYSV_ABI sceKernelCreateSema(OrbisKernelSema* sem, const char* pName, u32 attr, s32 initCount, s32 maxCount, const void* pOptParam) { @@ -188,46 +199,48 @@ s32 PS4_SYSV_ABI sceKernelCreateSema(OrbisKernelSema* sem, const char* pName, u3 LOG_ERROR(Lib_Kernel, "Semaphore creation parameters are invalid!"); return ORBIS_KERNEL_ERROR_EINVAL; } - *sem = new OrbisSem(initCount, maxCount, pName, attr == 1); + *sem = orbis_sems.insert( + std::move(std::make_unique(initCount, maxCount, pName, attr == 1))); return ORBIS_OK; } s32 PS4_SYSV_ABI sceKernelWaitSema(OrbisKernelSema sem, s32 needCount, u32* pTimeout) { - if (!sem) { + if (!orbis_sems.is_allocated(sem)) { return ORBIS_KERNEL_ERROR_ESRCH; } - return sem->Wait(true, needCount, pTimeout); + return orbis_sems[sem]->Wait(true, needCount, pTimeout); } s32 PS4_SYSV_ABI sceKernelSignalSema(OrbisKernelSema sem, s32 signalCount) { - if (!sem) { + if (!orbis_sems.is_allocated(sem)) { return ORBIS_KERNEL_ERROR_ESRCH; } - if (!sem->Signal(signalCount)) { + if (!orbis_sems[sem]->Signal(signalCount)) { return ORBIS_KERNEL_ERROR_EINVAL; } return ORBIS_OK; } s32 PS4_SYSV_ABI sceKernelPollSema(OrbisKernelSema sem, s32 needCount) { - if (!sem) { + if (!orbis_sems.is_allocated(sem)) { return ORBIS_KERNEL_ERROR_ESRCH; } - return sem->Wait(false, needCount, nullptr); + return orbis_sems[sem]->Wait(false, needCount, nullptr); } int PS4_SYSV_ABI sceKernelCancelSema(OrbisKernelSema sem, s32 setCount, s32* pNumWaitThreads) { - if (!sem) { + if (!orbis_sems.is_allocated(sem)) { return ORBIS_KERNEL_ERROR_ESRCH; } - return sem->Cancel(setCount, pNumWaitThreads); + return orbis_sems[sem]->Cancel(setCount, pNumWaitThreads); } int PS4_SYSV_ABI sceKernelDeleteSema(OrbisKernelSema sem) { - if (!sem) { + if (!orbis_sems.is_allocated(sem)) { return ORBIS_KERNEL_ERROR_ESRCH; } - sem->Delete(); + orbis_sems[sem]->Delete(); + orbis_sems.erase(sem); return ORBIS_OK; } @@ -242,6 +255,16 @@ int PS4_SYSV_ABI posix_sem_init(PthreadSem** sem, int pshared, u32 value) { return 0; } +int PS4_SYSV_ABI posix_sem_destroy(PthreadSem** sem) { + if (sem == nullptr || *sem == nullptr) { + *__Error() = POSIX_EINVAL; + return -1; + } + delete *sem; + *sem = nullptr; + return 0; +} + int PS4_SYSV_ABI posix_sem_wait(PthreadSem** sem) { if (sem == nullptr || *sem == nullptr) { *__Error() = POSIX_EINVAL; @@ -292,16 +315,6 @@ int PS4_SYSV_ABI posix_sem_post(PthreadSem** sem) { return 0; } -int PS4_SYSV_ABI posix_sem_destroy(PthreadSem** sem) { - if (sem == nullptr || *sem == nullptr) { - *__Error() = POSIX_EINVAL; - return -1; - } - delete *sem; - *sem = nullptr; - return 0; -} - int PS4_SYSV_ABI posix_sem_getvalue(PthreadSem** sem, int* sval) { if (sem == nullptr || *sem == nullptr) { *__Error() = POSIX_EINVAL; @@ -313,6 +326,77 @@ int PS4_SYSV_ABI posix_sem_getvalue(PthreadSem** sem, int* sval) { return 0; } +s32 PS4_SYSV_ABI scePthreadSemInit(PthreadSem** sem, int flag, u32 value, const char* name) { + if (flag != 0) { + return ORBIS_KERNEL_ERROR_EINVAL; + } + + s32 ret = posix_sem_init(sem, 0, value); + if (ret != 0) { + return ErrnoToSceKernelError(*__Error()); + } + + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI scePthreadSemDestroy(PthreadSem** sem) { + s32 ret = posix_sem_destroy(sem); + if (ret != 0) { + return ErrnoToSceKernelError(*__Error()); + } + + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI scePthreadSemWait(PthreadSem** sem) { + s32 ret = posix_sem_wait(sem); + if (ret != 0) { + return ErrnoToSceKernelError(*__Error()); + } + + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI scePthreadSemTrywait(PthreadSem** sem) { + s32 ret = posix_sem_trywait(sem); + if (ret != 0) { + return ErrnoToSceKernelError(*__Error()); + } + + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI scePthreadSemTimedwait(PthreadSem** sem, u32 usec) { + OrbisKernelTimespec time{}; + time.tv_sec = usec / 1000000; + time.tv_nsec = (usec % 1000000) * 1000; + + s32 ret = posix_sem_timedwait(sem, &time); + if (ret != 0) { + return ErrnoToSceKernelError(*__Error()); + } + + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI scePthreadSemPost(PthreadSem** sem) { + s32 ret = posix_sem_post(sem); + if (ret != 0) { + return ErrnoToSceKernelError(*__Error()); + } + + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI scePthreadSemGetvalue(PthreadSem** sem, int* sval) { + s32 ret = posix_sem_getvalue(sem, sval); + if (ret != 0) { + return ErrnoToSceKernelError(*__Error()); + } + + return ORBIS_OK; +} + void RegisterSemaphore(Core::Loader::SymbolsResolver* sym) { // Orbis LIB_FUNCTION("188x57JYp0g", "libkernel", 1, "libkernel", 1, 1, sceKernelCreateSema); @@ -324,12 +408,20 @@ void RegisterSemaphore(Core::Loader::SymbolsResolver* sym) { // Posix LIB_FUNCTION("pDuPEf3m4fI", "libScePosix", 1, "libkernel", 1, 1, posix_sem_init); + LIB_FUNCTION("cDW233RAwWo", "libScePosix", 1, "libkernel", 1, 1, posix_sem_destroy); LIB_FUNCTION("YCV5dGGBcCo", "libScePosix", 1, "libkernel", 1, 1, posix_sem_wait); LIB_FUNCTION("WBWzsRifCEA", "libScePosix", 1, "libkernel", 1, 1, posix_sem_trywait); LIB_FUNCTION("w5IHyvahg-o", "libScePosix", 1, "libkernel", 1, 1, posix_sem_timedwait); LIB_FUNCTION("IKP8typ0QUk", "libScePosix", 1, "libkernel", 1, 1, posix_sem_post); - LIB_FUNCTION("cDW233RAwWo", "libScePosix", 1, "libkernel", 1, 1, posix_sem_destroy); LIB_FUNCTION("Bq+LRV-N6Hk", "libScePosix", 1, "libkernel", 1, 1, posix_sem_getvalue); + + LIB_FUNCTION("GEnUkDZoUwY", "libkernel", 1, "libkernel", 1, 1, scePthreadSemInit); + LIB_FUNCTION("Vwc+L05e6oE", "libkernel", 1, "libkernel", 1, 1, scePthreadSemDestroy); + LIB_FUNCTION("C36iRE0F5sE", "libkernel", 1, "libkernel", 1, 1, scePthreadSemWait); + LIB_FUNCTION("H2a+IN9TP0E", "libkernel", 1, "libkernel", 1, 1, scePthreadSemTrywait); + LIB_FUNCTION("fjN6NQHhK8k", "libkernel", 1, "libkernel", 1, 1, scePthreadSemTimedwait); + LIB_FUNCTION("aishVAiFaYM", "libkernel", 1, "libkernel", 1, 1, scePthreadSemPost); + LIB_FUNCTION("DjpBvGlaWbQ", "libkernel", 1, "libkernel", 1, 1, scePthreadSemGetvalue); } } // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/time.cpp b/src/core/libraries/kernel/time.cpp index b586431ab..2565b8078 100644 --- a/src/core/libraries/kernel/time.cpp +++ b/src/core/libraries/kernel/time.cpp @@ -52,7 +52,22 @@ u64 PS4_SYSV_ABI sceKernelReadTsc() { int PS4_SYSV_ABI sceKernelUsleep(u32 microseconds) { #ifdef _WIN64 - std::this_thread::sleep_for(std::chrono::microseconds(microseconds)); + const auto start_time = std::chrono::high_resolution_clock::now(); + auto total_wait_time = std::chrono::microseconds(microseconds); + + while (total_wait_time.count() > 0) { + auto wait_time = std::chrono::ceil(total_wait_time).count(); + u64 res = SleepEx(static_cast(wait_time), true); + if (res == WAIT_IO_COMPLETION) { + auto elapsedTime = std::chrono::high_resolution_clock::now() - start_time; + auto elapsedMicroseconds = + std::chrono::duration_cast(elapsedTime).count(); + total_wait_time = std::chrono::microseconds(microseconds - elapsedMicroseconds); + } else { + break; + } + } + return 0; #else timespec start; diff --git a/src/core/libraries/save_data/dialog/savedatadialog.cpp b/src/core/libraries/save_data/dialog/savedatadialog.cpp index 0ad7d7dc0..2f0619165 100644 --- a/src/core/libraries/save_data/dialog/savedatadialog.cpp +++ b/src/core/libraries/save_data/dialog/savedatadialog.cpp @@ -1,11 +1,12 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include + #include "common/elf_info.h" #include "common/logging/log.h" #include "core/libraries/libs.h" #include "core/libraries/system/commondialog.h" -#include "magic_enum.hpp" #include "savedatadialog.h" #include "savedatadialog_ui.h" diff --git a/src/core/libraries/save_data/dialog/savedatadialog_ui.cpp b/src/core/libraries/save_data/dialog/savedatadialog_ui.cpp index 4e0d801a6..a6ca8744d 100644 --- a/src/core/libraries/save_data/dialog/savedatadialog_ui.cpp +++ b/src/core/libraries/save_data/dialog/savedatadialog_ui.cpp @@ -3,7 +3,7 @@ #include #include -#include +#include #include "common/elf_info.h" #include "common/singleton.h" diff --git a/src/core/libraries/save_data/save_backup.cpp b/src/core/libraries/save_data/save_backup.cpp index da5172b15..5261cdb11 100644 --- a/src/core/libraries/save_data/save_backup.cpp +++ b/src/core/libraries/save_data/save_backup.cpp @@ -5,7 +5,7 @@ #include #include -#include +#include #include "save_backup.h" #include "save_instance.h" @@ -79,7 +79,7 @@ static void backup(const std::filesystem::path& dir_name) { } static void BackupThreadBody() { - Common::SetCurrentThreadName("shadPS4:SaveData_BackupThread"); + Common::SetCurrentThreadName("shadPS4:SaveData:BackupThread"); while (g_backup_status != WorkerStatus::Stopping) { g_backup_status = WorkerStatus::Waiting; diff --git a/src/core/libraries/save_data/save_instance.cpp b/src/core/libraries/save_data/save_instance.cpp index 0d6c5173c..99daf83cc 100644 --- a/src/core/libraries/save_data/save_instance.cpp +++ b/src/core/libraries/save_data/save_instance.cpp @@ -3,7 +3,7 @@ #include -#include +#include #include "common/assert.h" #include "common/config.h" diff --git a/src/core/libraries/save_data/save_memory.cpp b/src/core/libraries/save_data/save_memory.cpp index e9ef53761..84179bc27 100644 --- a/src/core/libraries/save_data/save_memory.cpp +++ b/src/core/libraries/save_data/save_memory.cpp @@ -66,7 +66,7 @@ static void SaveFileSafe(void* buf, size_t count, const std::filesystem::path& p } [[noreturn]] void SaveThreadLoop() { - Common::SetCurrentThreadName("shadPS4:SaveData_SaveDataMemoryThread"); + Common::SetCurrentThreadName("shadPS4:SaveData:SaveDataMemoryThread"); std::mutex mtx; while (true) { { diff --git a/src/core/libraries/save_data/savedata.cpp b/src/core/libraries/save_data/savedata.cpp index c515ebcbf..66899fb34 100644 --- a/src/core/libraries/save_data/savedata.cpp +++ b/src/core/libraries/save_data/savedata.cpp @@ -5,7 +5,7 @@ #include #include -#include +#include #include "common/assert.h" #include "common/cstring.h" diff --git a/src/core/libraries/system/msgdialog.cpp b/src/core/libraries/system/msgdialog.cpp index 7d924e4ad..8a01f429f 100644 --- a/src/core/libraries/system/msgdialog.cpp +++ b/src/core/libraries/system/msgdialog.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include -#include +#include #include "common/assert.h" #include "common/logging/log.h" diff --git a/src/core/libraries/system/sysmodule.cpp b/src/core/libraries/system/sysmodule.cpp index 9bed4ef31..350f1317b 100644 --- a/src/core/libraries/system/sysmodule.cpp +++ b/src/core/libraries/system/sysmodule.cpp @@ -3,7 +3,7 @@ #define MAGIC_ENUM_RANGE_MIN 0 #define MAGIC_ENUM_RANGE_MAX 300 -#include +#include #include "common/logging/log.h" #include "core/libraries/error_codes.h" diff --git a/src/core/libraries/videodec/videodec2_impl.cpp b/src/core/libraries/videodec/videodec2_impl.cpp index 8daa48828..138d78af3 100644 --- a/src/core/libraries/videodec/videodec2_impl.cpp +++ b/src/core/libraries/videodec/videodec2_impl.cpp @@ -7,16 +7,7 @@ #include "common/logging/log.h" #include "core/libraries/videodec/videodec_error.h" -// The av_err2str macro in libavutil/error.h does not play nice with C++ -#ifdef av_err2str -#undef av_err2str -#include -av_always_inline std::string av_err2string(int errnum) { - char errbuf[AV_ERROR_MAX_STRING_SIZE]; - return av_make_error_string(errbuf, AV_ERROR_MAX_STRING_SIZE, errnum); -} -#define av_err2str(err) av_err2string(err).c_str() -#endif // av_err2str +#include "common/support/avdec.h" namespace Libraries::Vdec2 { diff --git a/src/core/libraries/videodec/videodec_impl.cpp b/src/core/libraries/videodec/videodec_impl.cpp index cf4846971..b5f72e9ce 100644 --- a/src/core/libraries/videodec/videodec_impl.cpp +++ b/src/core/libraries/videodec/videodec_impl.cpp @@ -8,16 +8,7 @@ #include "common/logging/log.h" #include "core/libraries/videodec/videodec_error.h" -// The av_err2str macro in libavutil/error.h does not play nice with C++ -#ifdef av_err2str -#undef av_err2str -#include -av_always_inline std::string av_err2string(int errnum) { - char errbuf[AV_ERROR_MAX_STRING_SIZE]; - return av_make_error_string(errbuf, AV_ERROR_MAX_STRING_SIZE, errnum); -} -#define av_err2str(err) av_err2string(err).c_str() -#endif // av_err2str +#include "common/support/avdec.h" namespace Libraries::Videodec { diff --git a/src/core/linker.h b/src/core/linker.h index 3a1aeb960..d6b5d648a 100644 --- a/src/core/linker.h +++ b/src/core/linker.h @@ -85,6 +85,15 @@ public: return m_modules.at(index).get(); } + u32 FindByName(const std::filesystem::path& name) const { + for (u32 i = 0; i < m_modules.size(); i++) { + if (name == m_modules[i]->file) { + return i; + } + } + return -1; + } + u32 MaxTlsIndex() const { return max_tls_index; } diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 15fde2a57..41db7df4b 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -96,12 +96,12 @@ PAddr MemoryManager::PoolExpand(PAddr search_start, PAddr search_end, size_t siz PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size, u64 alignment, int memory_type) { std::scoped_lock lk{mutex}; + alignment = alignment > 0 ? alignment : 16_KB; auto dmem_area = FindDmemArea(search_start); const auto is_suitable = [&] { - const auto aligned_base = alignment > 0 ? Common::AlignUp(dmem_area->second.base, alignment) - : dmem_area->second.base; + const auto aligned_base = Common::AlignUp(dmem_area->second.base, alignment); const auto alignment_size = aligned_base - dmem_area->second.base; const auto remaining_size = dmem_area->second.size >= alignment_size ? dmem_area->second.size - alignment_size : 0; @@ -114,7 +114,7 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size, // Align free position PAddr free_addr = dmem_area->second.base; - free_addr = alignment > 0 ? Common::AlignUp(free_addr, alignment) : free_addr; + free_addr = Common::AlignUp(free_addr, alignment); // Add the allocated region to the list and commit its pages. auto& area = CarveDmemArea(free_addr, size)->second; @@ -328,7 +328,7 @@ int MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, size_t size, Mem } // Map the file. - impl.MapFile(mapped_addr, size, offset, std::bit_cast(prot), fd); + impl.MapFile(mapped_addr, size_aligned, offset, std::bit_cast(prot), fd); // Add virtual memory area auto& new_vma = CarveVMA(mapped_addr, size_aligned)->second; @@ -375,12 +375,12 @@ void MemoryManager::PoolDecommit(VAddr virtual_addr, size_t size) { TRACK_FREE(virtual_addr, "VMEM"); } -void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) { +s32 MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) { std::scoped_lock lk{mutex}; - UnmapMemoryImpl(virtual_addr, size); + return UnmapMemoryImpl(virtual_addr, size); } -void MemoryManager::UnmapMemoryImpl(VAddr virtual_addr, size_t size) { +s32 MemoryManager::UnmapMemoryImpl(VAddr virtual_addr, size_t size) { const auto it = FindVMA(virtual_addr); const auto& vma_base = it->second; ASSERT_MSG(vma_base.Contains(virtual_addr, size), @@ -415,6 +415,8 @@ void MemoryManager::UnmapMemoryImpl(VAddr virtual_addr, size_t size) { impl.Unmap(vma_base_addr, vma_base_size, start_in_vma, start_in_vma + size, phys_base, is_exec, has_backing, readonly_file); TRACK_FREE(virtual_addr, "VMEM"); + + return ORBIS_OK; } int MemoryManager::QueryProtection(VAddr addr, void** start, void** end, u32* prot) { @@ -512,9 +514,8 @@ int MemoryManager::VirtualQuery(VAddr addr, int flags, info->is_flexible.Assign(vma.type == VMAType::Flexible); info->is_direct.Assign(vma.type == VMAType::Direct); info->is_stack.Assign(vma.type == VMAType::Stack); - info->is_pooled.Assign(vma.type == VMAType::Pooled); - info->is_committed.Assign(vma.type != VMAType::Free && vma.type != VMAType::Reserved && - vma.type != VMAType::PoolReserved); + info->is_pooled.Assign(vma.type == VMAType::PoolReserved); + info->is_committed.Assign(vma.type == VMAType::Pooled); vma.name.copy(info->name.data(), std::min(info->name.size(), vma.name.size())); if (vma.type == VMAType::Direct) { const auto dmem_it = FindDmemArea(vma.phys_base); @@ -585,6 +586,13 @@ void MemoryManager::NameVirtualRange(VAddr virtual_addr, size_t size, std::strin "Range provided is not fully contained in vma"); it->second.name = name; } + +void MemoryManager::InvalidateMemory(const VAddr addr, const u64 size) const { + if (rasterizer) { + rasterizer->InvalidateMemory(addr, size); + } +} + VAddr MemoryManager::SearchFree(VAddr virtual_addr, size_t size, u32 alignment) { // If the requested address is below the mapped range, start search from the lowest address auto min_search_address = impl.SystemManagedVirtualBase(); @@ -691,7 +699,7 @@ MemoryManager::DMemHandle MemoryManager::Split(DMemHandle dmem_handle, size_t of new_area.size -= offset_in_area; return dmem_map.emplace_hint(std::next(dmem_handle), new_area.base, new_area); -}; +} int MemoryManager::GetDirectMemoryType(PAddr addr, int* directMemoryTypeOut, void** directMemoryStartOut, void** directMemoryEndOut) { diff --git a/src/core/memory.h b/src/core/memory.h index 2efa02763..a9f2df322 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -192,7 +192,7 @@ public: void PoolDecommit(VAddr virtual_addr, size_t size); - void UnmapMemory(VAddr virtual_addr, size_t size); + s32 UnmapMemory(VAddr virtual_addr, size_t size); int QueryProtection(VAddr addr, void** start, void** end, u32* prot); @@ -211,6 +211,8 @@ public: void NameVirtualRange(VAddr virtual_addr, size_t size, std::string_view name); + void InvalidateMemory(VAddr addr, u64 size) const; + private: VMAHandle FindVMA(VAddr target) { return std::prev(vma_map.upper_bound(target)); @@ -250,7 +252,7 @@ private: DMemHandle Split(DMemHandle dmem_handle, size_t offset_in_area); - void UnmapMemoryImpl(VAddr virtual_addr, size_t size); + s32 UnmapMemoryImpl(VAddr virtual_addr, size_t size); private: AddressSpace impl; diff --git a/src/core/module.cpp b/src/core/module.cpp index ef34f25c1..70afb932c 100644 --- a/src/core/module.cpp +++ b/src/core/module.cpp @@ -470,8 +470,8 @@ OrbisKernelModuleInfoEx Module::GetModuleInfoEx() const { .tls_align = tls.align, .init_proc_addr = base_virtual_addr + dynamic_info.init_virtual_addr, .fini_proc_addr = base_virtual_addr + dynamic_info.fini_virtual_addr, - .eh_frame_hdr_addr = eh_frame_hdr_addr, - .eh_frame_addr = eh_frame_addr, + .eh_frame_hdr_addr = base_virtual_addr + eh_frame_hdr_addr, + .eh_frame_addr = base_virtual_addr + eh_frame_addr, .eh_frame_hdr_size = eh_frame_hdr_size, .eh_frame_size = eh_frame_size, .segments = info.segments, diff --git a/src/core/platform.h b/src/core/platform.h index 03bd79e86..bdb50701b 100644 --- a/src/core/platform.h +++ b/src/core/platform.h @@ -7,7 +7,8 @@ #include "common/logging/log.h" #include "common/singleton.h" #include "common/types.h" -#include "magic_enum.hpp" + +#include #include #include diff --git a/src/core/thread.cpp b/src/core/thread.cpp index a93f16c8d..07681e6b9 100644 --- a/src/core/thread.cpp +++ b/src/core/thread.cpp @@ -1,48 +1,151 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include "libraries/kernel/threads/pthread.h" +#include "common/alignment.h" +#include "core/libraries/kernel/threads/pthread.h" #include "thread.h" #ifdef _WIN64 #include +#include "common/ntapi.h" #else +#include #include #endif namespace Core { -Thread::Thread() : native_handle{0} {} - -Thread::~Thread() {} - -int Thread::Create(ThreadFunc func, void* arg, const ::Libraries::Kernel::PthreadAttr* attr) { #ifdef _WIN64 - native_handle = CreateThread(nullptr, 0, (LPTHREAD_START_ROUTINE)func, arg, 0, nullptr); - return native_handle ? 0 : -1; -#else +#define KGDT64_R3_DATA (0x28) +#define KGDT64_R3_CODE (0x30) +#define KGDT64_R3_CMTEB (0x50) +#define RPL_MASK (0x03) + +#define INITIAL_FPUCW (0x037f) +#define INITIAL_MXCSR_MASK (0xffbf) +#define EFLAGS_INTERRUPT_MASK (0x200) + +void InitializeTeb(INITIAL_TEB* teb, const ::Libraries::Kernel::PthreadAttr* attr) { + teb->StackBase = (void*)((u64)attr->stackaddr_attr + attr->stacksize_attr); + teb->StackLimit = nullptr; + teb->StackAllocationBase = attr->stackaddr_attr; +} + +void InitializeContext(CONTEXT* ctx, ThreadFunc func, void* arg, + const ::Libraries::Kernel::PthreadAttr* attr) { + /* Note: The stack has to be reversed */ + ctx->Rsp = (u64)attr->stackaddr_attr + attr->stacksize_attr; + ctx->Rbp = (u64)attr->stackaddr_attr + attr->stacksize_attr; + ctx->Rcx = (u64)arg; + ctx->Rip = (u64)func; + + ctx->SegGs = KGDT64_R3_DATA | RPL_MASK; + ctx->SegEs = KGDT64_R3_DATA | RPL_MASK; + ctx->SegDs = KGDT64_R3_DATA | RPL_MASK; + ctx->SegCs = KGDT64_R3_CODE | RPL_MASK; + ctx->SegSs = KGDT64_R3_DATA | RPL_MASK; + ctx->SegFs = KGDT64_R3_CMTEB | RPL_MASK; + + ctx->EFlags = 0x3000 | EFLAGS_INTERRUPT_MASK; + ctx->MxCsr = INITIAL_MXCSR; + + ctx->FltSave.ControlWord = INITIAL_FPUCW; + ctx->FltSave.MxCsr = INITIAL_MXCSR; + ctx->FltSave.MxCsr_Mask = INITIAL_MXCSR_MASK; + + ctx->ContextFlags = + CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_SEGMENTS | CONTEXT_FLOATING_POINT; +} +#endif + +NativeThread::NativeThread() : native_handle{0} {} + +NativeThread::~NativeThread() {} + +int NativeThread::Create(ThreadFunc func, void* arg, const ::Libraries::Kernel::PthreadAttr* attr) { +#ifndef _WIN64 pthread_t* pthr = reinterpret_cast(&native_handle); pthread_attr_t pattr; pthread_attr_init(&pattr); pthread_attr_setstack(&pattr, attr->stackaddr_attr, attr->stacksize_attr); return pthread_create(pthr, &pattr, (PthreadFunc)func, arg); +#else + CLIENT_ID clientId{}; + INITIAL_TEB teb{}; + CONTEXT ctx{}; + + clientId.UniqueProcess = GetCurrentProcess(); + clientId.UniqueThread = GetCurrentThread(); + + InitializeTeb(&teb, attr); + InitializeContext(&ctx, func, arg, attr); + + return NtCreateThread(&native_handle, THREAD_ALL_ACCESS, nullptr, GetCurrentProcess(), + &clientId, &ctx, &teb, false); #endif } -void Thread::Exit() { +void NativeThread::Exit() { if (!native_handle) { return; } + tid = 0; + #ifdef _WIN64 - CloseHandle(native_handle); + NtClose(native_handle); native_handle = nullptr; - // We call this assuming the thread has finished execution. - ExitThread(0); + /* The Windows kernel will free the stack + given at thread creation via INITIAL_TEB + (StackAllocationBase) upon thread termination. + + In earlier Windows versions (NT4 to Windows Server 2003), + you could get around this via disabling FreeStackOnTermination + on the TEB. This has been removed since then. + + To avoid this, we must forcefully set the TEB + deallocation stack pointer to NULL so ZwFreeVirtualMemory fails + in the kernel and our stack is not freed. + */ + auto* teb = reinterpret_cast(NtCurrentTeb()); + teb->DeallocationStack = nullptr; + + NtTerminateThread(nullptr, 0); #else + // Disable and free the signal stack. + constexpr stack_t sig_stack = { + .ss_flags = SS_DISABLE, + }; + sigaltstack(&sig_stack, nullptr); + + if (sig_stack_ptr) { + free(sig_stack_ptr); + sig_stack_ptr = nullptr; + } + pthread_exit(nullptr); #endif } -} // namespace Core \ No newline at end of file +void NativeThread::Initialize() { +#if _WIN64 + tid = GetCurrentThreadId(); +#else + tid = (u64)pthread_self(); + + // Set up an alternate signal handler stack to avoid overflowing small thread stacks. + const size_t page_size = getpagesize(); + const size_t sig_stack_size = Common::AlignUp(std::max(64_KB, MINSIGSTKSZ), page_size); + ASSERT_MSG(posix_memalign(&sig_stack_ptr, page_size, sig_stack_size) == 0, + "Failed to allocate signal stack: {}", errno); + + stack_t sig_stack; + sig_stack.ss_sp = sig_stack_ptr; + sig_stack.ss_size = sig_stack_size; + sig_stack.ss_flags = 0; + ASSERT_MSG(sigaltstack(&sig_stack, nullptr) == 0, "Failed to set signal stack: {}", errno); +#endif +} + +} // namespace Core diff --git a/src/core/thread.h b/src/core/thread.h index cfb8b8309..bd777a2e6 100644 --- a/src/core/thread.h +++ b/src/core/thread.h @@ -11,27 +11,35 @@ struct PthreadAttr; namespace Core { -class Thread { -public: - using ThreadFunc = void (*)(void*); - using PthreadFunc = void* (*)(void*); +using ThreadFunc = void (*)(void*); +using PthreadFunc = void* (*)(void*); - Thread(); - ~Thread(); +class NativeThread { +public: + NativeThread(); + ~NativeThread(); int Create(ThreadFunc func, void* arg, const ::Libraries::Kernel::PthreadAttr* attr); void Exit(); + void Initialize(); + uintptr_t GetHandle() { return reinterpret_cast(native_handle); } + u64 GetTid() { + return tid; + } + private: -#if _WIN64 +#ifdef _WIN64 void* native_handle; #else uintptr_t native_handle; + void* sig_stack_ptr; #endif + u64 tid; }; } // namespace Core \ No newline at end of file diff --git a/src/emulator.cpp b/src/emulator.cpp index 1d2542d2b..60d6e18d7 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -75,6 +75,9 @@ Emulator::Emulator() { LOG_INFO(Config, "Vulkan rdocMarkersEnable: {}", Config::vkMarkersEnabled()); LOG_INFO(Config, "Vulkan crashDiagnostics: {}", Config::vkCrashDiagnosticEnabled()); + // Create stdin/stdout/stderr + Common::Singleton::Instance()->CreateStdHandles(); + // Defer until after logging is initialized. memory = Core::Memory::Instance(); controller = Common::Singleton::Instance(); diff --git a/src/images/discord.png b/src/images/discord.png new file mode 100644 index 000000000..2fa455fd1 Binary files /dev/null and b/src/images/discord.png differ diff --git a/src/images/github.png b/src/images/github.png new file mode 100644 index 000000000..22b101798 Binary files /dev/null and b/src/images/github.png differ diff --git a/src/images/ko-fi.png b/src/images/ko-fi.png new file mode 100644 index 000000000..d19991b5f Binary files /dev/null and b/src/images/ko-fi.png differ diff --git a/src/images/website.png b/src/images/website.png new file mode 100644 index 000000000..9584f6b82 Binary files /dev/null and b/src/images/website.png differ diff --git a/src/images/youtube.png b/src/images/youtube.png new file mode 100644 index 000000000..362ac5781 Binary files /dev/null and b/src/images/youtube.png differ diff --git a/src/imgui/renderer/texture_manager.cpp b/src/imgui/renderer/texture_manager.cpp index 7f9c69d49..f13c995be 100644 --- a/src/imgui/renderer/texture_manager.cpp +++ b/src/imgui/renderer/texture_manager.cpp @@ -4,12 +4,12 @@ #include #include -#include - #include "common/assert.h" #include "common/config.h" #include "common/io_file.h" #include "common/polyfill_thread.h" +#include "common/stb.h" +#include "common/thread.h" #include "imgui_impl_vulkan.h" #include "texture_manager.h" @@ -82,6 +82,7 @@ RefCountedTexture::~RefCountedTexture() { } } } + RefCountedTexture::Image RefCountedTexture::GetTexture() const { if (inner == nullptr) { return {}; @@ -92,6 +93,7 @@ RefCountedTexture::Image RefCountedTexture::GetTexture() const { .height = inner->height, }; } + RefCountedTexture::operator bool() const { return inner != nullptr && inner->texture_id != nullptr; } @@ -131,6 +133,7 @@ Inner::~Inner() { } void WorkerLoop() { + Common::SetCurrentThreadName("shadPS4:ImGuiTextureManager"); std::mutex mtx; while (g_is_worker_running) { std::unique_lock lk{mtx}; diff --git a/src/qt_gui/about_dialog.cpp b/src/qt_gui/about_dialog.cpp index a932d65a0..90fb14236 100644 --- a/src/qt_gui/about_dialog.cpp +++ b/src/qt_gui/about_dialog.cpp @@ -1,13 +1,194 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include +#include +#include +#include +#include +#include +#include #include "about_dialog.h" +#include "main_window_themes.h" #include "ui_about_dialog.h" AboutDialog::AboutDialog(QWidget* parent) : QDialog(parent), ui(new Ui::AboutDialog) { ui->setupUi(this); + preloadImages(); + + ui->image_1->setAttribute(Qt::WA_Hover, true); + ui->image_2->setAttribute(Qt::WA_Hover, true); + ui->image_3->setAttribute(Qt::WA_Hover, true); + ui->image_4->setAttribute(Qt::WA_Hover, true); + ui->image_5->setAttribute(Qt::WA_Hover, true); + + ui->image_1->installEventFilter(this); + ui->image_2->installEventFilter(this); + ui->image_3->installEventFilter(this); + ui->image_4->installEventFilter(this); + ui->image_5->installEventFilter(this); } AboutDialog::~AboutDialog() { delete ui; } + +void AboutDialog::preloadImages() { + originalImages[0] = ui->image_1->pixmap().copy(); + originalImages[1] = ui->image_2->pixmap().copy(); + originalImages[2] = ui->image_3->pixmap().copy(); + originalImages[3] = ui->image_4->pixmap().copy(); + originalImages[4] = ui->image_5->pixmap().copy(); + + for (int i = 0; i < 5; ++i) { + QImage image = originalImages[i].toImage(); + for (int y = 0; y < image.height(); ++y) { + for (int x = 0; x < image.width(); ++x) { + QColor color = image.pixelColor(x, y); + color.setRed(255 - color.red()); + color.setGreen(255 - color.green()); + color.setBlue(255 - color.blue()); + image.setPixelColor(x, y, color); + } + } + invertedImages[i] = QPixmap::fromImage(image); + } + updateImagesForCurrentTheme(); +} + +void AboutDialog::updateImagesForCurrentTheme() { + Theme currentTheme = static_cast(Config::getMainWindowTheme()); + bool isDarkTheme = (currentTheme == Theme::Dark || currentTheme == Theme::Green || + currentTheme == Theme::Blue || currentTheme == Theme::Violet); + if (isDarkTheme) { + ui->image_1->setPixmap(invertedImages[0]); + ui->image_2->setPixmap(invertedImages[1]); + ui->image_3->setPixmap(invertedImages[2]); + ui->image_4->setPixmap(invertedImages[3]); + ui->image_5->setPixmap(invertedImages[4]); + } else { + ui->image_1->setPixmap(originalImages[0]); + ui->image_2->setPixmap(originalImages[1]); + ui->image_3->setPixmap(originalImages[2]); + ui->image_4->setPixmap(originalImages[3]); + ui->image_5->setPixmap(originalImages[4]); + } +} + +bool AboutDialog::eventFilter(QObject* obj, QEvent* event) { + if (event->type() == QEvent::Enter) { + if (obj == ui->image_1) { + if (isDarkTheme()) { + ui->image_1->setPixmap(originalImages[0]); + } else { + ui->image_1->setPixmap(invertedImages[0]); + } + applyHoverEffect(ui->image_1); + } else if (obj == ui->image_2) { + if (isDarkTheme()) { + ui->image_2->setPixmap(originalImages[1]); + } else { + ui->image_2->setPixmap(invertedImages[1]); + } + applyHoverEffect(ui->image_2); + } else if (obj == ui->image_3) { + if (isDarkTheme()) { + ui->image_3->setPixmap(originalImages[2]); + } else { + ui->image_3->setPixmap(invertedImages[2]); + } + applyHoverEffect(ui->image_3); + } else if (obj == ui->image_4) { + if (isDarkTheme()) { + ui->image_4->setPixmap(originalImages[3]); + } else { + ui->image_4->setPixmap(invertedImages[3]); + } + applyHoverEffect(ui->image_4); + } else if (obj == ui->image_5) { + if (isDarkTheme()) { + ui->image_5->setPixmap(originalImages[4]); + } else { + ui->image_5->setPixmap(invertedImages[4]); + } + applyHoverEffect(ui->image_5); + } + } else if (event->type() == QEvent::Leave) { + if (obj == ui->image_1) { + if (isDarkTheme()) { + ui->image_1->setPixmap(invertedImages[0]); + } else { + ui->image_1->setPixmap(originalImages[0]); + } + removeHoverEffect(ui->image_1); + } else if (obj == ui->image_2) { + if (isDarkTheme()) { + ui->image_2->setPixmap(invertedImages[1]); + } else { + ui->image_2->setPixmap(originalImages[1]); + } + removeHoverEffect(ui->image_2); + } else if (obj == ui->image_3) { + if (isDarkTheme()) { + ui->image_3->setPixmap(invertedImages[2]); + } else { + ui->image_3->setPixmap(originalImages[2]); + } + removeHoverEffect(ui->image_3); + } else if (obj == ui->image_4) { + if (isDarkTheme()) { + ui->image_4->setPixmap(invertedImages[3]); + } else { + ui->image_4->setPixmap(originalImages[3]); + } + removeHoverEffect(ui->image_4); + } else if (obj == ui->image_5) { + if (isDarkTheme()) { + ui->image_5->setPixmap(invertedImages[4]); + } else { + ui->image_5->setPixmap(originalImages[4]); + } + removeHoverEffect(ui->image_5); + } + } else if (event->type() == QEvent::MouseButtonPress) { + if (obj == ui->image_1) { + QDesktopServices::openUrl(QUrl("https://github.com/shadps4-emu/shadPS4")); + } else if (obj == ui->image_2) { + QDesktopServices::openUrl(QUrl("https://discord.gg/bFJxfftGW6")); + } else if (obj == ui->image_3) { + QDesktopServices::openUrl(QUrl("https://www.youtube.com/@shadPS4/videos")); + } else if (obj == ui->image_4) { + QDesktopServices::openUrl(QUrl("https://ko-fi.com/shadps4")); + } else if (obj == ui->image_5) { + QDesktopServices::openUrl(QUrl("https://shadps4.net")); + } + return true; + } + return QDialog::eventFilter(obj, event); +} + +void AboutDialog::applyHoverEffect(QLabel* label) { + QColor shadowColor = isDarkTheme() ? QColor(0, 0, 0) : QColor(169, 169, 169); + QGraphicsDropShadowEffect* shadow = new QGraphicsDropShadowEffect; + shadow->setBlurRadius(5); + shadow->setXOffset(2); + shadow->setYOffset(2); + shadow->setColor(shadowColor); + label->setGraphicsEffect(shadow); +} + +void AboutDialog::removeHoverEffect(QLabel* label) { + QColor shadowColor = isDarkTheme() ? QColor(50, 50, 50) : QColor(169, 169, 169); + QGraphicsDropShadowEffect* shadow = new QGraphicsDropShadowEffect; + shadow->setBlurRadius(3); + shadow->setXOffset(0); + shadow->setYOffset(0); + shadow->setColor(shadowColor); + label->setGraphicsEffect(shadow); +} + +bool AboutDialog::isDarkTheme() const { + Theme currentTheme = static_cast(Config::getMainWindowTheme()); + return currentTheme == Theme::Dark || currentTheme == Theme::Green || + currentTheme == Theme::Blue || currentTheme == Theme::Violet; +} diff --git a/src/qt_gui/about_dialog.h b/src/qt_gui/about_dialog.h index 8c802221b..42e8d557a 100644 --- a/src/qt_gui/about_dialog.h +++ b/src/qt_gui/about_dialog.h @@ -3,7 +3,11 @@ #pragma once +#include #include +#include +#include +#include namespace Ui { class AboutDialog; @@ -15,7 +19,18 @@ class AboutDialog : public QDialog { public: explicit AboutDialog(QWidget* parent = nullptr); ~AboutDialog(); + bool eventFilter(QObject* obj, QEvent* event); private: Ui::AboutDialog* ui; -}; \ No newline at end of file + + void preloadImages(); + void updateImagesForCurrentTheme(); + void applyHoverEffect(QLabel* label); + void removeHoverEffect(QLabel* label); + + bool isDarkTheme() const; + + QPixmap originalImages[5]; + QPixmap invertedImages[5]; +}; diff --git a/src/qt_gui/about_dialog.ui b/src/qt_gui/about_dialog.ui index e2e76f4c4..19840e452 100644 --- a/src/qt_gui/about_dialog.ui +++ b/src/qt_gui/about_dialog.ui @@ -9,7 +9,7 @@ 0 0 780 - 320 + 310 @@ -22,14 +22,14 @@ - 10 - 30 + 15 + 15 271 - 261 + 271 - QFrame::Shape::NoFrame + QFrame::NoFrame @@ -45,7 +45,7 @@ 310 - 40 + 15 171 41 @@ -64,9 +64,9 @@ 310 - 90 + 60 451 - 101 + 70 @@ -85,9 +85,9 @@ 310 - 180 + 130 451 - 101 + 70 @@ -102,6 +102,131 @@ true + + + + 310 + 210 + 80 + 80 + + + + ArrowCursor + + + QFrame::NoFrame + + + + + + :/images/github.png + + + true + + + + + + 400 + 210 + 80 + 80 + + + + ArrowCursor + + + QFrame::NoFrame + + + + + + :/images/discord.png + + + true + + + + + + 490 + 210 + 80 + 80 + + + + ArrowCursor + + + QFrame::NoFrame + + + + + + :/images/youtube.png + + + true + + + + + + 580 + 210 + 80 + 80 + + + + ArrowCursor + + + QFrame::NoFrame + + + + + + :/images/ko-fi.png + + + true + + + + + + 670 + 210 + 80 + 80 + + + + ArrowCursor + + + QFrame::NoFrame + + + + + + :/images/website.png + + + true + + diff --git a/src/qt_gui/cheats_patches.cpp b/src/qt_gui/cheats_patches.cpp index a35136f12..2fea0b6ea 100644 --- a/src/qt_gui/cheats_patches.cpp +++ b/src/qt_gui/cheats_patches.cpp @@ -39,7 +39,7 @@ CheatsPatches::CheatsPatches(const QString& gameName, const QString& gameSerial, m_gameSize(gameSize), m_gameImage(gameImage), manager(new QNetworkAccessManager(this)) { setupUI(); resize(500, 400); - setWindowTitle(tr("Cheats / Patches")); + setWindowTitle(tr("Cheats / Patches for ") + m_gameName); } CheatsPatches::~CheatsPatches() {} @@ -51,6 +51,9 @@ void CheatsPatches::setupUI() { QString CHEATS_DIR_QString; Common::FS::PathToQString(CHEATS_DIR_QString, Common::FS::GetUserPath(Common::FS::PathType::CheatsDir)); + QString PATCHS_DIR_QString; + Common::FS::PathToQString(PATCHS_DIR_QString, + Common::FS::GetUserPath(Common::FS::PathType::PatchesDir)); QString NameCheatJson = m_gameSerial + "_" + m_gameVersion + ".json"; m_cheatFilePath = CHEATS_DIR_QString + "/" + NameCheatJson; @@ -237,9 +240,45 @@ void CheatsPatches::setupUI() { }); patchesControlLayout->addWidget(patchesButton); + QPushButton* deletePatchButton = new QPushButton(tr("Delete File")); + connect(deletePatchButton, &QPushButton::clicked, [this, PATCHS_DIR_QString]() { + QStringListModel* model = qobject_cast(patchesListView->model()); + if (!model) { + return; + } + QItemSelectionModel* selectionModel = patchesListView->selectionModel(); + if (!selectionModel) { + return; + } + QModelIndexList selectedIndexes = selectionModel->selectedIndexes(); + if (selectedIndexes.isEmpty()) { + QMessageBox::warning(this, tr("Delete File"), tr("No files selected.")); + return; + } + QModelIndex selectedIndex = selectedIndexes.first(); + QString selectedFileName = model->data(selectedIndex).toString(); + + int ret = QMessageBox::warning( + this, tr("Delete File"), + QString(tr("Do you want to delete the selected file?\\n%1").replace("\\n", "\n")) + .arg(selectedFileName), + QMessageBox::Yes | QMessageBox::No); + + if (ret == QMessageBox::Yes) { + QString fileName = selectedFileName.split('|').first().trimmed(); + QString directoryName = selectedFileName.split('|').last().trimmed(); + QString filePath = PATCHS_DIR_QString + "/" + directoryName + "/" + fileName; + + QFile::remove(filePath); + createFilesJson(directoryName); + populateFileListPatches(); + } + }); + QPushButton* saveButton = new QPushButton(tr("Save")); connect(saveButton, &QPushButton::clicked, this, &CheatsPatches::onSaveButtonClicked); + patchesControlLayout->addWidget(deletePatchButton); patchesControlLayout->addWidget(saveButton); patchesLayout->addLayout(patchesControlLayout); @@ -916,15 +955,33 @@ void CheatsPatches::createFilesJson(const QString& repository) { jsonFile.close(); } -void CheatsPatches::addCheatsToLayout(const QJsonArray& modsArray, const QJsonArray& creditsArray) { +void CheatsPatches::clearListCheats() { QLayoutItem* item; while ((item = rightLayout->takeAt(0)) != nullptr) { - delete item->widget(); - delete item; + QWidget* widget = item->widget(); + if (widget) { + delete widget; + } else { + QLayout* layout = item->layout(); + if (layout) { + QLayoutItem* innerItem; + while ((innerItem = layout->takeAt(0)) != nullptr) { + QWidget* innerWidget = innerItem->widget(); + if (innerWidget) { + delete innerWidget; + } + delete innerItem; + } + delete layout; + } + } } m_cheats.clear(); m_cheatCheckBoxes.clear(); +} +void CheatsPatches::addCheatsToLayout(const QJsonArray& modsArray, const QJsonArray& creditsArray) { + clearListCheats(); int maxWidthButton = 0; for (const QJsonValue& modValue : modsArray) { @@ -1017,6 +1074,8 @@ void CheatsPatches::addCheatsToLayout(const QJsonArray& modsArray, const QJsonAr } void CheatsPatches::populateFileListCheats() { + clearListCheats(); + QString cheatsDir; Common::FS::PathToQString(cheatsDir, Common::FS::GetUserPath(Common::FS::PathType::CheatsDir)); diff --git a/src/qt_gui/cheats_patches.h b/src/qt_gui/cheats_patches.h index b07e828c2..4217436f6 100644 --- a/src/qt_gui/cheats_patches.h +++ b/src/qt_gui/cheats_patches.h @@ -36,6 +36,7 @@ public: const QString& m_gameVersion, bool showMessageBox); void downloadPatches(const QString repository, const bool showMessageBox); void createFilesJson(const QString& repository); + void clearListCheats(); void compatibleVersionNotice(const QString repository); signals: diff --git a/src/qt_gui/game_list_frame.cpp b/src/qt_gui/game_list_frame.cpp index 3dcc38025..8be7c9101 100644 --- a/src/qt_gui/game_list_frame.cpp +++ b/src/qt_gui/game_list_frame.cpp @@ -37,7 +37,7 @@ GameListFrame::GameListFrame(std::shared_ptr game_info_get, this->setColumnWidth(5, 90); // Firmware this->setColumnWidth(6, 90); // Size this->setColumnWidth(7, 90); // Version - this->setColumnWidth(8, 100); // Play Time + this->setColumnWidth(8, 120); // Play Time QStringList headers; headers << tr("Icon") << tr("Name") << tr("Compatibility") << tr("Serial") << tr("Region") << tr("Firmware") << tr("Size") << tr("Version") << tr("Play Time") << tr("Path"); @@ -121,7 +121,7 @@ void GameListFrame::PopulateGameList() { QString playTime = GetPlayTime(m_game_info->m_games[i].serial); if (playTime.isEmpty()) { m_game_info->m_games[i].play_time = "0:00:00"; - SetTableItem(i, 8, "0"); + SetTableItem(i, 8, tr("Never Played")); } else { QStringList timeParts = playTime.split(':'); int hours = timeParts[0].toInt(); diff --git a/src/qt_gui/gui_context_menus.h b/src/qt_gui/gui_context_menus.h index 7da7341da..6eef1230c 100644 --- a/src/qt_gui/gui_context_menus.h +++ b/src/qt_gui/gui_context_menus.h @@ -360,6 +360,7 @@ public: QMessageBox::Yes | QMessageBox::No); if (reply == QMessageBox::Yes) { dir.removeRecursively(); + widget->removeRow(itemID); } } } diff --git a/src/qt_gui/settings_dialog.cpp b/src/qt_gui/settings_dialog.cpp index abbd39edd..1fd4b6e8b 100644 --- a/src/qt_gui/settings_dialog.cpp +++ b/src/qt_gui/settings_dialog.cpp @@ -6,6 +6,9 @@ #include #include +#ifdef ENABLE_DISCORD_RPC +#include "common/discord_rpc_handler.h" +#endif #ifdef ENABLE_UPDATER #include "check_update.h" #endif diff --git a/src/qt_gui/translations/ar.ts b/src/qt_gui/translations/ar.ts index 25e215183..3f861187e 100644 --- a/src/qt_gui/translations/ar.ts +++ b/src/qt_gui/translations/ar.ts @@ -840,8 +840,8 @@ CheatsPatches - Cheats / Patches - الغش / التصحيحات + Cheats / Patches for + Cheats / Patches for @@ -1359,6 +1359,11 @@ Play Time وقت اللعب + + + Never Played + Never Played + CheckUpdate diff --git a/src/qt_gui/translations/da_DK.ts b/src/qt_gui/translations/da_DK.ts index 14c42f1d9..3539159e2 100644 --- a/src/qt_gui/translations/da_DK.ts +++ b/src/qt_gui/translations/da_DK.ts @@ -840,8 +840,8 @@ CheatsPatches - Cheats / Patches - Snyd / Patches + Cheats / Patches for + Cheats / Patches for @@ -1359,6 +1359,11 @@ Play Time Spilletid + + + Never Played + Never Played + CheckUpdate diff --git a/src/qt_gui/translations/de.ts b/src/qt_gui/translations/de.ts index 64a6c6480..f34402ac9 100644 --- a/src/qt_gui/translations/de.ts +++ b/src/qt_gui/translations/de.ts @@ -840,8 +840,8 @@ CheatsPatches - Cheats / Patches - Cheats / Patches + Cheats / Patches for + Cheats / Patches for @@ -1359,6 +1359,11 @@ Play Time Spielzeit + + + Never Played + Never Played + CheckUpdate diff --git a/src/qt_gui/translations/el.ts b/src/qt_gui/translations/el.ts index e064f8c26..65cee641a 100644 --- a/src/qt_gui/translations/el.ts +++ b/src/qt_gui/translations/el.ts @@ -840,8 +840,8 @@ CheatsPatches - Cheats / Patches - Cheats / Patches + Cheats / Patches for + Cheats / Patches for @@ -1359,6 +1359,11 @@ Play Time Χρόνος παιχνιδιού + + + Never Played + Never Played + CheckUpdate diff --git a/src/qt_gui/translations/en.ts b/src/qt_gui/translations/en.ts index 9bf7c7188..7ae583040 100644 --- a/src/qt_gui/translations/en.ts +++ b/src/qt_gui/translations/en.ts @@ -840,8 +840,8 @@ CheatsPatches - Cheats / Patches - Cheats / Patches + Cheats / Patches for + Cheats / Patches for @@ -1359,6 +1359,11 @@ Play Time Play Time + + + Never Played + Never Played + CheckUpdate diff --git a/src/qt_gui/translations/es_ES.ts b/src/qt_gui/translations/es_ES.ts index 5d637249e..3d1f291a6 100644 --- a/src/qt_gui/translations/es_ES.ts +++ b/src/qt_gui/translations/es_ES.ts @@ -840,8 +840,8 @@ CheatsPatches - Cheats / Patches - Trucos / Parches + Cheats / Patches for + Cheats / Patches for @@ -1359,6 +1359,11 @@ Play Time Tiempo de Juego + + + Never Played + Never Played + CheckUpdate diff --git a/src/qt_gui/translations/fa_IR.ts b/src/qt_gui/translations/fa_IR.ts index 55a2fdf53..58de03346 100644 --- a/src/qt_gui/translations/fa_IR.ts +++ b/src/qt_gui/translations/fa_IR.ts @@ -840,8 +840,8 @@ CheatsPatches - Cheats / Patches - چیت / پچ ها + Cheats / Patches for + Cheats / Patches for ا @@ -1359,6 +1359,11 @@ Play Time زمان بازی + + + Never Played + Never Played + CheckUpdate diff --git a/src/qt_gui/translations/fi.ts b/src/qt_gui/translations/fi.ts index 4d160bf6b..0a7f2b250 100644 --- a/src/qt_gui/translations/fi.ts +++ b/src/qt_gui/translations/fi.ts @@ -840,8 +840,8 @@ CheatsPatches - Cheats / Patches - Huijaukset / Korjaukset + Cheats / Patches for + Cheats / Patches for @@ -1359,6 +1359,11 @@ Play Time Peliaika + + + Never Played + Never Played + CheckUpdate diff --git a/src/qt_gui/translations/fr.ts b/src/qt_gui/translations/fr.ts index 39cd11bf6..fad90622a 100644 --- a/src/qt_gui/translations/fr.ts +++ b/src/qt_gui/translations/fr.ts @@ -62,7 +62,7 @@ Select which directory you want to install to. - Select which directory you want to install to. + Sélectionnez le répertoire où vous souhaitez effectuer l'installation. @@ -158,22 +158,22 @@ Delete... - Delete... + Supprimer... Delete Game - Delete Game + Supprimer jeu Delete Update - Delete Update + Supprimer MÀJ Delete DLC - Delete DLC + Supprimer DLC @@ -203,7 +203,7 @@ Game - Game + Jeu @@ -213,17 +213,17 @@ This game has no update to delete! - This game has no update to delete! + Ce jeu n'a pas de mise à jour à supprimer! Update - Update + Mise à jour This game has no DLC to delete! - This game has no DLC to delete! + Ce jeu n'a pas de DLC à supprimer! @@ -233,12 +233,12 @@ Delete %1 - Delete %1 + Supprime %1 Are you sure you want to delete %1's %2 directory? - Are you sure you want to delete %1's %2 directory? + Êtes vous sûr de vouloir supprimer le répertoire %1 %2 ? @@ -495,7 +495,7 @@ Enable Separate Update Folder - Enable Separate Update Folder + Dossier séparé pour les mises à jours @@ -510,7 +510,7 @@ Enable Discord Rich Presence - Activer Discord Rich Presence + Activer la présence Discord @@ -840,8 +840,8 @@ CheatsPatches - Cheats / Patches - Cheats/Patches + Cheats / Patches for + Cheats/Patchs pour @@ -1159,7 +1159,7 @@ separateUpdatesCheckBox - Enable Separate Update Folder:\nEnables installing game updates into a separate folder for easy management. + Dossier séparé pour les mises à jours:\nInstalle les mises à jours des jeux dans un dossier séparé pour une gestion plus facile. @@ -1169,7 +1169,7 @@ ps4proCheckBox - Est-ce un PS4 Pro:\nFait en sorte que l'émulateur se comporte comme un PS4 PRO, ce qui peut activer des fonctionnalités spéciales dans les jeux qui le prennent en charge. + Mode PS4 Pro:\nFait en sorte que l'émulateur se comporte comme un PS4 PRO, ce qui peut activer des fonctionnalités spéciales dans les jeux qui le prennent en charge. @@ -1359,6 +1359,11 @@ Play Time Temps de jeu + + + Never Played + Jamais joué + CheckUpdate diff --git a/src/qt_gui/translations/hu_HU.ts b/src/qt_gui/translations/hu_HU.ts index a43b8d371..937e3f188 100644 --- a/src/qt_gui/translations/hu_HU.ts +++ b/src/qt_gui/translations/hu_HU.ts @@ -840,8 +840,8 @@ CheatsPatches - Cheats / Patches - Csalások / Javítások + Cheats / Patches for + Cheats / Patches for @@ -1359,6 +1359,11 @@ Play Time Játékidő + + + Never Played + Never Played + CheckUpdate diff --git a/src/qt_gui/translations/id.ts b/src/qt_gui/translations/id.ts index d616f1cf3..80873daa9 100644 --- a/src/qt_gui/translations/id.ts +++ b/src/qt_gui/translations/id.ts @@ -840,8 +840,8 @@ CheatsPatches - Cheats / Patches - Cheat / Patch + Cheats / Patches for + Cheats / Patches for @@ -1359,6 +1359,11 @@ Play Time Waktu Bermain + + + Never Played + Never Played + CheckUpdate diff --git a/src/qt_gui/translations/it.ts b/src/qt_gui/translations/it.ts index c59289314..9094a7ed5 100644 --- a/src/qt_gui/translations/it.ts +++ b/src/qt_gui/translations/it.ts @@ -840,8 +840,8 @@ CheatsPatches - Cheats / Patches - Trucchi / Patch + Cheats / Patches for + Cheats / Patches for @@ -1359,6 +1359,11 @@ Play Time Tempo di Gioco + + + Never Played + Never Played + CheckUpdate diff --git a/src/qt_gui/translations/ja_JP.ts b/src/qt_gui/translations/ja_JP.ts index f4a4b15ad..ad1f383fe 100644 --- a/src/qt_gui/translations/ja_JP.ts +++ b/src/qt_gui/translations/ja_JP.ts @@ -840,8 +840,8 @@ CheatsPatches - Cheats / Patches - チート / パッチ + Cheats / Patches for + Cheats / Patches for @@ -1359,6 +1359,11 @@ Play Time プレイ時間 + + + Never Played + Never Played + CheckUpdate diff --git a/src/qt_gui/translations/ko_KR.ts b/src/qt_gui/translations/ko_KR.ts index 2fa3ee153..a528db295 100644 --- a/src/qt_gui/translations/ko_KR.ts +++ b/src/qt_gui/translations/ko_KR.ts @@ -840,8 +840,8 @@ CheatsPatches - Cheats / Patches - Cheats / Patches + Cheats / Patches for + Cheats / Patches for @@ -1359,6 +1359,11 @@ Play Time Play Time + + + Never Played + Never Played + CheckUpdate diff --git a/src/qt_gui/translations/lt_LT.ts b/src/qt_gui/translations/lt_LT.ts index 16aaf5d86..4a2820399 100644 --- a/src/qt_gui/translations/lt_LT.ts +++ b/src/qt_gui/translations/lt_LT.ts @@ -840,8 +840,8 @@ CheatsPatches - Cheats / Patches - Sukčiavimai / Pataisos + Cheats / Patches for + Cheats / Patches for @@ -1359,6 +1359,11 @@ Play Time Žaidimo laikas + + + Never Played + Never Played + CheckUpdate diff --git a/src/qt_gui/translations/nb_NO.ts b/src/qt_gui/translations/nb.ts similarity index 94% rename from src/qt_gui/translations/nb_NO.ts rename to src/qt_gui/translations/nb.ts index e02f24182..028646740 100644 --- a/src/qt_gui/translations/nb_NO.ts +++ b/src/qt_gui/translations/nb.ts @@ -90,7 +90,7 @@ The value for location to install games is not valid. - Verdien for mappen for å installere spill er ikke gyldig. + Stien for å installere spillet er ikke gyldig. @@ -123,7 +123,7 @@ Open Game Folder - Åpne Spillmappe + Åpne Spillmappen @@ -208,7 +208,7 @@ requiresEnableSeparateUpdateFolder_MSG - Denne funksjonen krever 'Aktiver seperat oppdateringsmappe' konfigurasjonsalternativet. Hvis du vil bruke denne funksjonen, vennligst aktiver den. + Denne funksjonen krever 'Aktiver seperat oppdateringsmappe' konfigurasjonsalternativet. Hvis du vil bruke denne funksjonen, må du aktiver den. @@ -261,7 +261,7 @@ Check for Updates - Sjekk etter oppdateringer + Se etter oppdateringer @@ -500,7 +500,7 @@ Show Splash - Vis Velkomst + Vis Velkomstbilde @@ -655,7 +655,7 @@ Check for Updates at Startup - Sjekk etter oppdateringer ved oppstart + Se etter oppdateringer ved oppstart @@ -665,7 +665,7 @@ Check for Updates - Sjekk for oppdateringer + Se etter oppdateringer @@ -718,7 +718,7 @@ Patches Downloaded Successfully! - Programrettelser lastet ned vellykket! + Programrettelser ble lastet ned! @@ -828,7 +828,7 @@ Game successfully installed at %1 - Spillet ble installert vellykket på %1 + Spillet ble installert i %1 @@ -840,13 +840,13 @@ CheatsPatches - Cheats / Patches - Juks / Programrettelse + Cheats / Patches for + Juks / Programrettelser for defaultTextEdit_MSG - Juks/programrettelse er eksperimentelle.\nBruk med forsiktighet.\n\nLast ned juks individuelt ved å velge pakkebrønn og klikke på nedlastingsknappen.\nPå fanen programrettelse kan du laste ned alle programrettelser samtidig, velge hvilke du ønsker å bruke, og lagre valget ditt.\n\nSiden vi ikke utvikler Juksene/Programrettelsene,\nvær vennlig å rapportere problemer til juks-utvikleren.\n\nHar du laget en ny juks? Besøk:\nhttps://github.com/shadps4-emu/ps4_cheats + Juks/programrettelse er eksperimentelle.\nBruk med forsiktighet.\n\nLast ned juks individuelt ved å velge pakkebrønn og klikke på nedlastingsknappen.\nPå fanen programrettelse kan du laste ned alle programrettelser samtidig, velge hvilke du ønsker å bruke, og lagre valget ditt.\n\nSiden vi ikke utvikler Juksene/Programrettelsene,\nvær vennlig å rapportere problemer til jukse/programrettelse utvikleren.\n\nHar du laget en ny juks? Besøk:\nhttps://github.com/shadps4-emu/ps4_cheats @@ -871,7 +871,7 @@ Select Cheat File: - Velg juksfil: + Velg juksefil: @@ -896,7 +896,7 @@ You can delete the cheats you don't want after downloading them. - Du kan slette jukser du ikke ønsker etter å ha lastet dem ned. + Du kan slette juksene du ikke ønsker etter å ha lastet dem ned. @@ -971,12 +971,12 @@ Options saved successfully. - Alternativer lagret vellykket. + Alternativer ble lagret. Invalid Source - Ugyldig kilde + Ugyldig Kilde @@ -986,7 +986,7 @@ File Exists - Filen eksisterer + Filen Eksisterer @@ -996,17 +996,17 @@ Failed to save file: - Kunne ikke lagre fil: + Kunne ikke lagre filen: Failed to download file: - Kunne ikke laste ned fil: + Kunne ikke laste ned filen: Cheats Not Found - Jukser ikke funnet + Fant ikke juksene @@ -1016,12 +1016,12 @@ Cheats Downloaded Successfully - Jukser lastet ned vellykket + Juksene ble lastet ned CheatsDownloadedSuccessfully_MSG - Du har lastet ned jukser vellykket for denne versjonen av spillet fra den valgte pakkebrønnen. Du kan prøve å laste ned fra en annen pakkebrønn, hvis det er tilgjengelig, vil det også være mulig å bruke det ved å velge filen fra listen. + Du har lastet ned jukser for denne versjonen av spillet fra den valgte pakkebrønnen. Du kan prøve å laste ned fra en annen pakkebrønn, hvis det er tilgjengelig, vil det også være mulig å bruke det ved å velge filen fra listen. @@ -1041,7 +1041,7 @@ DownloadComplete_MSG - Oppdateringer lastet ned vellykket! Alle programrettelsene tilgjengelige for alle spill har blitt lastet ned, det er ikke nødvendig å laste dem ned individuelt for hvert spill som skjer med jukser. Hvis programrettelsen ikke vises, kan det hende at den ikke finnes for den spesifikke serienummeret og versjonen av spillet. + Programrettelser ble lastet ned! Alle programrettelsene tilgjengelige for alle spill har blitt lastet ned, det er ikke nødvendig å laste dem ned individuelt for hvert spill som skjer med jukser. Hvis programrettelsen ikke vises, kan det hende at den ikke finnes for den spesifikke serienummeret og versjonen av spillet. @@ -1076,7 +1076,7 @@ Failed to open file: - Kunne ikke åpne fil: + Kunne ikke åpne filen: @@ -1111,7 +1111,7 @@ Can't apply cheats before the game is started - Kan ikke bruke juksetriks før spillet er startet. + Kan ikke bruke juksene før spillet er startet. @@ -1154,7 +1154,7 @@ fullscreenCheckBox - Aktiver fullskjerm:\nSetter automatisk spillvinduet i fullskjermmodus.\nDette kan slås av ved å trykke på F11-tasten. + Aktiver fullskjerm:\nSetter spillvinduet automatisk i fullskjermmodus.\nDette kan slås av ved å trykke på F11-tasten. @@ -1164,12 +1164,12 @@ showSplashCheckBox - Vis startskjerm:\nViser spillets startskjerm (et spesialbilde) når spillet starter. + Vis Velkomstbilde:\nViser spillets velkomstbilde (et spesialbilde) når spillet starter. ps4proCheckBox - Er PS4 Pro:\nFår emulatoren til å fungere som en PS4 PRO, noe som kan aktivere spesielle funksjoner i spill som støtter dette. + Er PS4 Pro:\nFår etterligneren til å fungere som en PS4 PRO, noe som kan aktivere spesielle funksjoner i spill som støtter dette. @@ -1199,7 +1199,7 @@ GUIgroupBox - Spille tittelmusikk:\nHvis et spill støtter det, aktiverer spesiell musikk når du velger spillet i menyen. + Spille tittelmusikk:\nHvis et spill støtter det, så aktiveres det spesiell musikk når du velger spillet i menyen. @@ -1254,7 +1254,7 @@ graphicsAdapterGroupBox - Grafikkenhet:\nI systemer med flere GPU-er, velg GPU-en etterligneren skal bruke fra rullegardinlisten,\neller velg "Auto Select" for å bestemme det automatisk. + Grafikkenhet:\nI systemer med flere GPU-er, velg GPU-en etterligneren skal bruke fra rullegardinlisten,\neller velg "Auto Select" for å bestemme den automatisk. @@ -1264,7 +1264,7 @@ heightDivider - Vblank Skillelinje:\nBildehastigheten som etterligneren oppdaterer ved, multipliseres med dette tallet. Endring av dette kan ha negative effekter, som å øke hastigheten på spillet, eller ødelegge kritisk spillfunksjonalitet som ikke forventer at dette endres! + Vblank Skillelinje:\nBildehastigheten som etterligneren oppdaterer ved, multipliseres med dette tallet. Endring av dette kan ha negative effekter, som å øke hastigheten av spillet, eller ødelegge kritisk spillfunksjonalitet som ikke forventer at dette endres! @@ -1274,12 +1274,12 @@ nullGpuCheckBox - Aktiver Null GPU:\nFor teknisk feilsøking deaktiverer spillgjengivelse som om det ikke var noe grafikkort. + Aktiver Null GPU:\nFor teknisk feilsøking deaktiverer spillets-gjengivelse som om det ikke var noe grafikkort. gameFoldersBox - Spillmapper:\nListen over mapper for å sjekke installerte spill. + Spillmapper:\nListen over mapper som brukes for å se etter installerte spill. @@ -1299,12 +1299,12 @@ vkValidationCheckBox - Aktiver Vulkan valideringslag:\nAktiverer et system som validerer tilstanden til Vulkan-gjengiveren og logger informasjon om dens indre tilstand. Dette vil redusere ytelsen og sannsynligvis endre etterlignerens oppførsel. + Aktiver Vulkan valideringslag:\nAktiverer et system som validerer tilstanden til Vulkan-gjengiveren og logger informasjon om dens indre tilstand. Dette vil redusere ytelsen og sannsynligvis endre etterlignerens atferd. vkSyncValidationCheckBox - Aktiver Vulkan synkronisering validering:\nAktiverer et system som validerer frekvens tiden av Vulkan-gjengivelsensoppgaver. Dette vil redusere ytelsen og sannsynligvis endre etterlignerens oppførsel. + Aktiver Vulkan synkronisering validering:\nAktiverer et system som validerer frekvens tiden av Vulkan-gjengivelsensoppgaver. Dette vil redusere ytelsen og sannsynligvis endre etterlignerens atferd. @@ -1359,13 +1359,18 @@ Play Time Spilletid + + + Never Played + Never Played + CheckUpdate Auto Updater - Automatisk oppdaterer + Automatisk oppdaterering @@ -1435,7 +1440,7 @@ Check for Updates at Startup - Sjekk etter oppdateringer ved oppstart + Se etter oppdateringer ved oppstart diff --git a/src/qt_gui/translations/nl.ts b/src/qt_gui/translations/nl.ts index b0cfaff5e..b66cb94e4 100644 --- a/src/qt_gui/translations/nl.ts +++ b/src/qt_gui/translations/nl.ts @@ -840,8 +840,8 @@ CheatsPatches - Cheats / Patches - Cheats / Patches + Cheats / Patches for + Cheats / Patches for @@ -1359,6 +1359,11 @@ Play Time Speeltijd + + + Never Played + Never Played + CheckUpdate diff --git a/src/qt_gui/translations/pl_PL.ts b/src/qt_gui/translations/pl_PL.ts index 4d11c13f6..8236cf720 100644 --- a/src/qt_gui/translations/pl_PL.ts +++ b/src/qt_gui/translations/pl_PL.ts @@ -840,8 +840,8 @@ CheatsPatches - Cheats / Patches - Kody / poprawki + Cheats / Patches for + Cheats / Patches for @@ -1359,6 +1359,11 @@ Play Time Czas gry + + + Never Played + Never Played + CheckUpdate diff --git a/src/qt_gui/translations/pt_BR.ts b/src/qt_gui/translations/pt_BR.ts index f1d3631d8..5faccf6c5 100644 --- a/src/qt_gui/translations/pt_BR.ts +++ b/src/qt_gui/translations/pt_BR.ts @@ -840,8 +840,8 @@ CheatsPatches - Cheats / Patches - Cheats / Patches + Cheats / Patches for + Cheats / Patches para @@ -1359,6 +1359,11 @@ Play Time Tempo Jogado + + + Never Played + Nunca jogado + CheckUpdate diff --git a/src/qt_gui/translations/ro_RO.ts b/src/qt_gui/translations/ro_RO.ts index fff0bcddb..2439e69e2 100644 --- a/src/qt_gui/translations/ro_RO.ts +++ b/src/qt_gui/translations/ro_RO.ts @@ -840,8 +840,8 @@ CheatsPatches - Cheats / Patches - Cheats / Patches + Cheats / Patches for + Cheats / Patches for @@ -1359,6 +1359,11 @@ Play Time Timp de Joacă + + + Never Played + Never Played + CheckUpdate diff --git a/src/qt_gui/translations/ru_RU.ts b/src/qt_gui/translations/ru_RU.ts index 052623235..ccee34517 100644 --- a/src/qt_gui/translations/ru_RU.ts +++ b/src/qt_gui/translations/ru_RU.ts @@ -840,8 +840,8 @@ CheatsPatches - Cheats / Patches - Читы и патчи + Cheats / Patches for + Cheats / Patches for @@ -1359,6 +1359,11 @@ Play Time Времени в игре + + + Never Played + Never Played + CheckUpdate diff --git a/src/qt_gui/translations/sq.ts b/src/qt_gui/translations/sq.ts index f7144a001..4a02298e8 100644 --- a/src/qt_gui/translations/sq.ts +++ b/src/qt_gui/translations/sq.ts @@ -840,8 +840,8 @@ CheatsPatches - Cheats / Patches - Mashtrime / Arna + Cheats / Patches for + Cheats / Patches for @@ -1359,6 +1359,11 @@ Play Time Koha e luajtjes + + + Never Played + Never Played + CheckUpdate diff --git a/src/qt_gui/translations/tr_TR.ts b/src/qt_gui/translations/tr_TR.ts index 335465778..4c77bc16a 100644 --- a/src/qt_gui/translations/tr_TR.ts +++ b/src/qt_gui/translations/tr_TR.ts @@ -351,7 +351,7 @@ Download Cheats/Patches - Hileler / Yamanlar İndir + Hileleri/Yamaları İndir @@ -505,7 +505,7 @@ Is PS4 Pro - PS4 Pro mu + PS4 Pro @@ -545,12 +545,12 @@ Hide Cursor - İmleci gizle + İmleci Gizle Hide Cursor Idle Timeout - İmleç için hareketsizlik zaman aşımı + İmleç İçin Hareketsizlik Zaman Aşımı @@ -665,7 +665,7 @@ Check for Updates - Güncellemeleri kontrol et + Güncellemeleri Kontrol Et @@ -703,7 +703,7 @@ Download Patches For All Games - Tüm Oyunlar İçin Yamanları İndir + Tüm Oyunlar İçin Yamaları İndir @@ -758,7 +758,7 @@ Patch detected! - Yamanın tespit edildi! + Yama tespit edildi! @@ -840,8 +840,8 @@ CheatsPatches - Cheats / Patches - Hileler / Yamalar + Cheats / Patches for + Cheats / Patches for @@ -941,7 +941,7 @@ Unable to open files.json for reading. - files.json dosyasını okumak için açılamadı. + files.json dosyası okumak için açılamadı. @@ -1169,7 +1169,7 @@ ps4proCheckBox - PS4 Pro Mu:\nEmülatörü bir PS4 PRO gibi çalıştırır; bu, bunu destekleyen oyunlarda özel özellikleri etkinleştirebilir. + PS4 Pro:\nEmülatörü bir PS4 PRO gibi çalıştırır; bu, bunu destekleyen oyunlarda özel özellikleri etkinleştirebilir. @@ -1359,6 +1359,11 @@ Play Time Oynama Süresi + + + Never Played + Never Played + CheckUpdate @@ -1488,4 +1493,4 @@ Güncelleme betiği dosyası oluşturulamadı - \ No newline at end of file + diff --git a/src/qt_gui/translations/uk_UA.ts b/src/qt_gui/translations/uk_UA.ts index 31bfe9dba..805fff151 100644 --- a/src/qt_gui/translations/uk_UA.ts +++ b/src/qt_gui/translations/uk_UA.ts @@ -840,8 +840,8 @@ CheatsPatches - Cheats / Patches - Чити та Патчі + Cheats / Patches for + Cheats / Patches for @@ -1359,6 +1359,11 @@ Play Time Час у грі + + + Never Played + Never Played + CheckUpdate diff --git a/src/qt_gui/translations/vi_VN.ts b/src/qt_gui/translations/vi_VN.ts index 223cb9ed0..1ac3d042d 100644 --- a/src/qt_gui/translations/vi_VN.ts +++ b/src/qt_gui/translations/vi_VN.ts @@ -840,8 +840,8 @@ CheatsPatches - Cheats / Patches - Cheat / Bản vá + Cheats / Patches for + Cheats / Patches for @@ -1359,6 +1359,11 @@ Play Time Thời gian chơi + + + Never Played + Never Played + CheckUpdate diff --git a/src/qt_gui/translations/zh_CN.ts b/src/qt_gui/translations/zh_CN.ts index 4fe1f7c42..19fb8edff 100644 --- a/src/qt_gui/translations/zh_CN.ts +++ b/src/qt_gui/translations/zh_CN.ts @@ -840,8 +840,8 @@ CheatsPatches - Cheats / Patches - 作弊码 / 补丁 + Cheats / Patches for + Cheats / Patches for @@ -1359,6 +1359,11 @@ Play Time 游戏时间 + + + Never Played + Never Played + CheckUpdate diff --git a/src/qt_gui/translations/zh_TW.ts b/src/qt_gui/translations/zh_TW.ts index 4db00775d..fbd6d624d 100644 --- a/src/qt_gui/translations/zh_TW.ts +++ b/src/qt_gui/translations/zh_TW.ts @@ -840,8 +840,8 @@ CheatsPatches - Cheats / Patches - 作弊碼 / 修補檔 + Cheats / Patches for + Cheats / Patches for @@ -1359,6 +1359,11 @@ Play Time 遊玩時間 + + + Never Played + Never Played + CheckUpdate diff --git a/src/sdl_window.cpp b/src/sdl_window.cpp index d95e8d634..f6b57436f 100644 --- a/src/sdl_window.cpp +++ b/src/sdl_window.cpp @@ -168,6 +168,21 @@ void WindowSDL::InitTimers() { SDL_AddTimer(100, &PollController, controller); } +void WindowSDL::RequestKeyboard() { + if (keyboard_grab == 0) { + SDL_StartTextInput(window); + } + keyboard_grab++; +} + +void WindowSDL::ReleaseKeyboard() { + ASSERT(keyboard_grab > 0); + keyboard_grab--; + if (keyboard_grab == 0) { + SDL_StopTextInput(window); + } +} + void WindowSDL::OnResize() { SDL_GetWindowSizeInPixels(window, &width, &height); ImGui::Core::OnResize(); diff --git a/src/sdl_window.h b/src/sdl_window.h index 78d0e582f..78d4bbc39 100644 --- a/src/sdl_window.h +++ b/src/sdl_window.h @@ -41,6 +41,8 @@ struct WindowSystemInfo { }; class WindowSDL { + int keyboard_grab = 0; + public: explicit WindowSDL(s32 width, s32 height, Input::GameController* controller, std::string_view window_title); @@ -69,6 +71,9 @@ public: void WaitEvent(); void InitTimers(); + void RequestKeyboard(); + void ReleaseKeyboard(); + private: void OnResize(); void OnKeyPress(const SDL_Event* event); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 1e7032f10..23800fc49 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -284,7 +284,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { ctx.AddExtension("SPV_EXT_demote_to_helper_invocation"); ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT); } - if (info.stores.Get(IR::Attribute::Depth)) { + if (info.stores.GetAny(IR::Attribute::Depth)) { ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); } break; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index d8c0a17bd..d005169c4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -5,7 +5,7 @@ #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h" -#include +#include namespace Shader::Backend::SPIRV { namespace { @@ -326,7 +326,9 @@ Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { const auto& buffer = ctx.texture_buffers[handle]; const Id tex_buffer = ctx.OpLoad(buffer.image_type, buffer.id); - const Id coord = ctx.OpIAdd(ctx.U32[1], address, buffer.coord_offset); + const Id coord = + ctx.OpIAdd(ctx.U32[1], ctx.OpShiftLeftLogical(ctx.U32[1], address, buffer.coord_shift), + buffer.coord_offset); Id texel = buffer.is_storage ? ctx.OpImageRead(buffer.result_type, tex_buffer, coord) : ctx.OpImageFetch(buffer.result_type, tex_buffer, coord); if (buffer.is_integer) { @@ -372,7 +374,9 @@ void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { const auto& buffer = ctx.texture_buffers[handle]; const Id tex_buffer = ctx.OpLoad(buffer.image_type, buffer.id); - const Id coord = ctx.OpIAdd(ctx.U32[1], address, buffer.coord_offset); + const Id coord = + ctx.OpIAdd(ctx.U32[1], ctx.OpShiftLeftLogical(ctx.U32[1], address, buffer.coord_shift), + buffer.coord_offset); if (buffer.is_integer) { value = ctx.OpBitcast(buffer.result_type, value); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 4ce9f4221..5c7278c6b 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -207,6 +207,8 @@ void EmitContext::DefineBufferOffsets() { push_data_block, ConstU32(half), ConstU32(comp))}; const Id value{OpLoad(U32[1], ptr)}; tex_buffer.coord_offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(6U)); + tex_buffer.coord_shift = + OpBitFieldUExtract(U32[1], value, ConstU32(offset + 6U), ConstU32(2U)); Name(tex_buffer.coord_offset, fmt::format("texbuf{}_off", binding)); } } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 1c5da946d..4e5e7dd3b 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -223,6 +223,7 @@ public: struct TextureBufferDefinition { Id id; Id coord_offset; + Id coord_shift; u32 binding; Id image_type; Id result_type; diff --git a/src/shader_recompiler/frontend/control_flow_graph.cpp b/src/shader_recompiler/frontend/control_flow_graph.cpp index 354196d31..8c3122b28 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.cpp +++ b/src/shader_recompiler/frontend/control_flow_graph.cpp @@ -47,6 +47,15 @@ static IR::Condition MakeCondition(const GcnInst& inst) { } } +static bool IgnoresExecMask(Opcode opcode) { + switch (opcode) { + case Opcode::V_WRITELANE_B32: + return true; + default: + return false; + } +} + static constexpr size_t LabelReserveSize = 32; CFG::CFG(Common::ObjectPool& block_pool_, std::span inst_list_) @@ -133,20 +142,26 @@ void CFG::EmitDivergenceLabels() { curr_begin = -1; continue; } - // Add a label to the instruction right after the open scope call. - // It is the start of a new basic block. - const auto& save_inst = inst_list[curr_begin]; - const Label label = index_to_pc[curr_begin] + save_inst.length; - AddLabel(label); - // Add a label to the close scope instruction. - // There are 3 cases where we need to close a scope. - // * Close scope instruction inside the block - // * Close scope instruction at the end of the block (cbranch or endpgm) - // * Normal instruction at the end of the block - // For the last case we must NOT add a label as that would cause - // the instruction to be separated into its own basic block. - if (is_close) { - AddLabel(index_to_pc[index]); + // If all instructions in the scope ignore exec masking, we shouldn't insert a + // scope. + const auto start = inst_list.begin() + curr_begin + 1; + if (!std::ranges::all_of(start, inst_list.begin() + index, IgnoresExecMask, + &GcnInst::opcode)) { + // Add a label to the instruction right after the open scope call. + // It is the start of a new basic block. + const auto& save_inst = inst_list[curr_begin]; + const Label label = index_to_pc[curr_begin] + save_inst.length; + AddLabel(label); + // Add a label to the close scope instruction. + // There are 3 cases where we need to close a scope. + // * Close scope instruction inside the block + // * Close scope instruction at the end of the block (cbranch or endpgm) + // * Normal instruction at the end of the block + // For the last case we must NOT add a label as that would cause + // the instruction to be separated into its own basic block. + if (is_close) { + AddLabel(index_to_pc[index]); + } } // Reset scope begin. curr_begin = -1; diff --git a/src/shader_recompiler/frontend/decode.cpp b/src/shader_recompiler/frontend/decode.cpp index 796bed127..a5187aebd 100644 --- a/src/shader_recompiler/frontend/decode.cpp +++ b/src/shader_recompiler/frontend/decode.cpp @@ -5,7 +5,7 @@ #include "common/assert.h" #include "shader_recompiler/frontend/decode.h" -#include "magic_enum.hpp" +#include namespace Shader::Gcn { diff --git a/src/shader_recompiler/frontend/fetch_shader.h b/src/shader_recompiler/frontend/fetch_shader.h index ee9f5c805..080b0eb22 100644 --- a/src/shader_recompiler/frontend/fetch_shader.h +++ b/src/shader_recompiler/frontend/fetch_shader.h @@ -58,19 +58,6 @@ struct FetchShaderData { }) != attributes.end(); } - [[nodiscard]] std::pair GetDrawOffsets(const AmdGpu::Liverpool::Regs& regs, - const Info& info) const { - u32 vertex_offset = regs.index_offset; - u32 instance_offset = 0; - if (vertex_offset == 0 && vertex_offset_sgpr != -1) { - vertex_offset = info.user_data[vertex_offset_sgpr]; - } - if (instance_offset_sgpr != -1) { - instance_offset = info.user_data[instance_offset_sgpr]; - } - return {vertex_offset, instance_offset}; - } - bool operator==(const FetchShaderData& other) const { return attributes == other.attributes && vertex_offset_sgpr == other.vertex_offset_sgpr && instance_offset_sgpr == other.instance_offset_sgpr; diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index de8b9da87..5b411d83e 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -50,6 +50,8 @@ void Translator::EmitScalarAlu(const GcnInst& inst) { return S_OR_B64(NegateMode::None, false, inst); case Opcode::S_XOR_B32: return S_XOR_B32(inst); + case Opcode::S_NOT_B32: + return S_NOT_B32(inst); case Opcode::S_XOR_B64: return S_OR_B64(NegateMode::None, true, inst); case Opcode::S_ANDN2_B32: @@ -94,10 +96,14 @@ void Translator::EmitScalarAlu(const GcnInst& inst) { return S_BREV_B32(inst); case Opcode::S_BCNT1_I32_B64: return S_BCNT1_I32_B64(inst); + case Opcode::S_FF1_I32_B32: + return S_FF1_I32_B32(inst); case Opcode::S_AND_SAVEEXEC_B64: return S_SAVEEXEC_B64(NegateMode::None, false, inst); case Opcode::S_ORN2_SAVEEXEC_B64: return S_SAVEEXEC_B64(NegateMode::Src1, true, inst); + case Opcode::S_ABS_I32: + return S_ABS_I32(inst); default: LogMissingOpcode(inst); } @@ -301,6 +307,10 @@ void Translator::S_AND_B64(NegateMode negate, const GcnInst& inst) { ASSERT_MSG(-s32(operand.code) + SignedConstIntNegMin - 1 == -1, "SignedConstIntNeg must be -1"); return ir.Imm1(true); + case OperandField::LiteralConst: + ASSERT_MSG(operand.code == 0 || operand.code == std::numeric_limits::max(), + "Unsupported literal {:#x}", operand.code); + return ir.Imm1(operand.code & 1); default: UNREACHABLE(); } @@ -382,6 +392,13 @@ void Translator::S_XOR_B32(const GcnInst& inst) { ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); } +void Translator::S_NOT_B32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 result{ir.BitwiseNot(src0)}; + SetDst(inst.dst[0], result); + ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); +} + void Translator::S_LSHL_B32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; @@ -560,6 +577,12 @@ void Translator::S_BCNT1_I32_B64(const GcnInst& inst) { ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); } +void Translator::S_FF1_I32_B32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 result{ir.Select(ir.IEqual(src0, ir.Imm32(0U)), ir.Imm32(-1), ir.FindILsb(src0))}; + SetDst(inst.dst[0], result); +} + void Translator::S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst) { // This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs) // However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination @@ -599,6 +622,12 @@ void Translator::S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& in ir.SetScc(result); } +void Translator::S_ABS_I32(const GcnInst& inst) { + const auto result = ir.IAbs(GetSrc(inst.src[0])); + SetDst(inst.dst[0], result); + ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); +} + // SOPC void Translator::S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst) { diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 68625a12b..97978ff6b 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -14,7 +14,7 @@ #define MAGIC_ENUM_RANGE_MIN 0 #define MAGIC_ENUM_RANGE_MAX 1515 -#include "magic_enum.hpp" +#include namespace Shader::Gcn { @@ -53,15 +53,74 @@ void Translator::EmitPrologue() { } break; case Stage::Fragment: - // https://github.com/chaotic-cx/mesa-mirror/blob/72326e15/src/amd/vulkan/radv_shader_args.c#L258 - // The first two VGPRs are used for i/j barycentric coordinates. In the vast majority of - // cases it will be only those two, but if shader is using both e.g linear and perspective - // inputs it can be more For now assume that this isn't the case. - dst_vreg = IR::VectorReg::V2; - for (u32 i = 0; i < 4; i++) { - ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, i)); + dst_vreg = IR::VectorReg::V0; + if (runtime_info.fs_info.addr_flags.persp_sample_ena) { + ++dst_vreg; // I + ++dst_vreg; // J + } + if (runtime_info.fs_info.addr_flags.persp_center_ena) { + ++dst_vreg; // I + ++dst_vreg; // J + } + if (runtime_info.fs_info.addr_flags.persp_centroid_ena) { + ++dst_vreg; // I + ++dst_vreg; // J + } + if (runtime_info.fs_info.addr_flags.persp_pull_model_ena) { + ++dst_vreg; // I/W + ++dst_vreg; // J/W + ++dst_vreg; // 1/W + } + if (runtime_info.fs_info.addr_flags.linear_sample_ena) { + ++dst_vreg; // I + ++dst_vreg; // J + } + if (runtime_info.fs_info.addr_flags.linear_center_ena) { + ++dst_vreg; // I + ++dst_vreg; // J + } + if (runtime_info.fs_info.addr_flags.linear_centroid_ena) { + ++dst_vreg; // I + ++dst_vreg; // J + } + if (runtime_info.fs_info.addr_flags.line_stipple_tex_ena) { + ++dst_vreg; + } + if (runtime_info.fs_info.addr_flags.pos_x_float_ena) { + if (runtime_info.fs_info.en_flags.pos_x_float_ena) { + ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 0)); + } else { + ir.SetVectorReg(dst_vreg++, ir.Imm32(0.0f)); + } + } + if (runtime_info.fs_info.addr_flags.pos_y_float_ena) { + if (runtime_info.fs_info.en_flags.pos_y_float_ena) { + ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 1)); + } else { + ir.SetVectorReg(dst_vreg++, ir.Imm32(0.0f)); + } + } + if (runtime_info.fs_info.addr_flags.pos_z_float_ena) { + if (runtime_info.fs_info.en_flags.pos_z_float_ena) { + ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 2)); + } else { + ir.SetVectorReg(dst_vreg++, ir.Imm32(0.0f)); + } + } + if (runtime_info.fs_info.addr_flags.pos_w_float_ena) { + if (runtime_info.fs_info.en_flags.pos_w_float_ena) { + ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 3)); + } else { + ir.SetVectorReg(dst_vreg++, ir.Imm32(0.0f)); + } + } + if (runtime_info.fs_info.addr_flags.front_face_ena) { + if (runtime_info.fs_info.en_flags.front_face_ena) { + ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::IsFrontFace)); + } else { + ir.SetVectorReg(dst_vreg++, ir.Imm32(0)); + } } - ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::IsFrontFace)); break; case Stage::Compute: ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 0)); diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 3b89372bd..43f3ccef2 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -96,6 +96,7 @@ public: void S_MUL_I32(const GcnInst& inst); void S_BFE_U32(const GcnInst& inst); void S_ABSDIFF_I32(const GcnInst& inst); + void S_NOT_B32(const GcnInst& inst); // SOPK void S_MOVK(const GcnInst& inst); @@ -109,8 +110,10 @@ public: void S_NOT_B64(const GcnInst& inst); void S_BREV_B32(const GcnInst& inst); void S_BCNT1_I32_B64(const GcnInst& inst); + void S_FF1_I32_B32(const GcnInst& inst); void S_GETPC_B64(u32 pc, const GcnInst& inst); void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst); + void S_ABS_I32(const GcnInst& inst); // SOPC void S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index eb90c256e..8149230db 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -1155,15 +1155,23 @@ void Translator::V_LSHL_B64(const GcnInst& inst) { const IR::U64 src0{GetSrc64(inst.src[0])}; const IR::U64 src1{GetSrc64(inst.src[1])}; const IR::VectorReg dst_reg{inst.dst[0].code}; - if (src0.IsImmediate() && src0.U64() == -1) { - ir.SetVectorReg(dst_reg, ir.Imm32(0xFFFFFFFF)); - ir.SetVectorReg(dst_reg + 1, ir.Imm32(0xFFFFFFFF)); - return; + if (src0.IsImmediate()) { + if (src0.U64() == -1) { + // If src0 is a fixed -1, the result will always be -1. + ir.SetVectorReg(dst_reg, ir.Imm32(0xFFFFFFFF)); + ir.SetVectorReg(dst_reg + 1, ir.Imm32(0xFFFFFFFF)); + return; + } + if (src1.IsImmediate()) { + // If both src0 and src1 are immediates, we can calculate the result now. + // Note that according to the manual, only bits 4:0 are used from src1. + const u64 result = src0.U64() << (src1.U64() & 0x1F); + ir.SetVectorReg(dst_reg, ir.Imm32(static_cast(result))); + ir.SetVectorReg(dst_reg + 1, ir.Imm32(static_cast(result >> 32))); + return; + } } - ASSERT_MSG(src0.IsImmediate() && src0.U64() == 0 && src1.IsImmediate() && src1.U64() == 0, - "V_LSHL_B64 with non-zero src0 or src1 is not supported"); - ir.SetVectorReg(dst_reg, ir.Imm32(0)); - ir.SetVectorReg(dst_reg + 1, ir.Imm32(0)); + UNREACHABLE_MSG("Unimplemented V_LSHL_B64 arguments"); } void Translator::V_MUL_F64(const GcnInst& inst) { diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index d382d0e7c..494bbb4bb 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -105,6 +105,11 @@ struct PushData { ASSERT(offset < 256 && binding < buf_offsets.size()); buf_offsets[binding] = offset; } + + void AddTexelOffset(u32 binding, u32 multiplier, u32 texel_offset) { + ASSERT(texel_offset < 64 && multiplier < 16); + buf_offsets[binding] = texel_offset | ((std::bit_width(multiplier) - 1) << 6); + } }; static_assert(sizeof(PushData) <= 128, "PushData size is greater than minimum size guaranteed by Vulkan spec"); diff --git a/src/shader_recompiler/ir/basic_block.cpp b/src/shader_recompiler/ir/basic_block.cpp index 426acb2b8..b4d1a78c7 100644 --- a/src/shader_recompiler/ir/basic_block.cpp +++ b/src/shader_recompiler/ir/basic_block.cpp @@ -19,12 +19,14 @@ void Block::AppendNewInst(Opcode op, std::initializer_list args) { Block::iterator Block::PrependNewInst(iterator insertion_point, const Inst& base_inst) { Inst* const inst{inst_pool->Create(base_inst)}; + inst->SetParent(this); return instructions.insert(insertion_point, *inst); } Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list args, u32 flags) { Inst* const inst{inst_pool->Create(op, flags)}; + inst->SetParent(this); const auto result_it{instructions.insert(insertion_point, *inst)}; if (inst->NumArgs() != args.size()) { diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index abd31a728..9b4ad63d2 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include #include "shader_recompiler/exception.h" @@ -119,10 +120,10 @@ void Inst::SetArg(size_t index, Value value) { } const IR::Value arg{Arg(index)}; if (!arg.IsImmediate()) { - UndoUse(arg); + UndoUse(arg.Inst(), index); } if (!value.IsImmediate()) { - Use(value); + Use(value.Inst(), index); } if (op == Opcode::Phi) { phi_args[index].second = value; @@ -143,7 +144,7 @@ Block* Inst::PhiBlock(size_t index) const { void Inst::AddPhiOperand(Block* predecessor, const Value& value) { if (!value.IsImmediate()) { - Use(value); + Use(value.Inst(), phi_args.size()); } phi_args.emplace_back(predecessor, value); } @@ -155,17 +156,19 @@ void Inst::Invalidate() { void Inst::ClearArgs() { if (op == Opcode::Phi) { - for (auto& pair : phi_args) { + for (auto i = 0; i < phi_args.size(); i++) { + auto& pair = phi_args[i]; IR::Value& value{pair.second}; if (!value.IsImmediate()) { - UndoUse(value); + UndoUse(value.Inst(), i); } } phi_args.clear(); } else { - for (auto& value : args) { + for (auto i = 0; i < args.size(); i++) { + auto& value = args[i]; if (!value.IsImmediate()) { - UndoUse(value); + UndoUse(value.Inst(), i); } } // Reset arguments to null @@ -174,13 +177,21 @@ void Inst::ClearArgs() { } } -void Inst::ReplaceUsesWith(Value replacement) { - Invalidate(); - ReplaceOpcode(Opcode::Identity); - if (!replacement.IsImmediate()) { - Use(replacement); +void Inst::ReplaceUsesWith(Value replacement, bool preserve) { + // Copy since user->SetArg will mutate this->uses + // Could also do temp_uses = std::move(uses) but more readable + const auto temp_uses = uses; + for (const auto& [user, operand] : temp_uses) { + DEBUG_ASSERT(user->Arg(operand).Inst() == this); + user->SetArg(operand, replacement); + } + Invalidate(); + if (preserve) { + // Still useful to have Identity for indirection. + // SSA pass would be more complicated without it + ReplaceOpcode(Opcode::Identity); + SetArg(0, replacement); } - args[0] = replacement; } void Inst::ReplaceOpcode(IR::Opcode opcode) { @@ -195,14 +206,15 @@ void Inst::ReplaceOpcode(IR::Opcode opcode) { op = opcode; } -void Inst::Use(const Value& value) { - Inst* const inst{value.Inst()}; - ++inst->use_count; +void Inst::Use(Inst* used, u32 operand) { + DEBUG_ASSERT(0 == std::count(used->uses.begin(), used->uses.end(), IR::Use(this, operand))); + used->uses.emplace_front(this, operand); } -void Inst::UndoUse(const Value& value) { - Inst* const inst{value.Inst()}; - --inst->use_count; +void Inst::UndoUse(Inst* used, u32 operand) { + IR::Use use(this, operand); + DEBUG_ASSERT(1 == std::count(used->uses.begin(), used->uses.end(), use)); + used->uses.remove(use); } } // namespace Shader::IR diff --git a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp index a03fe051c..9624ce6a5 100644 --- a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp @@ -43,7 +43,7 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { if (is_lhs_immediate && is_rhs_immediate) { const auto result{imm_fn(Arg(lhs), Arg(rhs))}; - inst.ReplaceUsesWith(IR::Value{result}); + inst.ReplaceUsesWithAndRemove(IR::Value{result}); return false; } if (is_lhs_immediate && !is_rhs_immediate) { @@ -75,7 +75,7 @@ bool FoldWhenAllImmediates(IR::Inst& inst, Func&& func) { return false; } using Indices = std::make_index_sequence::NUM_ARGS>; - inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{})); + inst.ReplaceUsesWithAndRemove(EvalImmediates(inst, func, Indices{})); return true; } @@ -83,12 +83,12 @@ template void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { const IR::Value value{inst.Arg(0)}; if (value.IsImmediate()) { - inst.ReplaceUsesWith(IR::Value{std::bit_cast(Arg(value))}); + inst.ReplaceUsesWithAndRemove(IR::Value{std::bit_cast(Arg(value))}); return; } IR::Inst* const arg_inst{value.InstRecursive()}; if (arg_inst->GetOpcode() == reverse) { - inst.ReplaceUsesWith(arg_inst->Arg(0)); + inst.ReplaceUsesWithAndRemove(arg_inst->Arg(0)); return; } } @@ -131,7 +131,7 @@ void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode inser if (!result) { return; } - inst.ReplaceUsesWith(*result); + inst.ReplaceUsesWithAndRemove(*result); } void FoldConvert(IR::Inst& inst, IR::Opcode opposite) { @@ -141,7 +141,7 @@ void FoldConvert(IR::Inst& inst, IR::Opcode opposite) { } IR::Inst* const producer{value.InstRecursive()}; if (producer->GetOpcode() == opposite) { - inst.ReplaceUsesWith(producer->Arg(0)); + inst.ReplaceUsesWithAndRemove(producer->Arg(0)); } } @@ -152,9 +152,9 @@ void FoldLogicalAnd(IR::Inst& inst) { const IR::Value rhs{inst.Arg(1)}; if (rhs.IsImmediate()) { if (rhs.U1()) { - inst.ReplaceUsesWith(inst.Arg(0)); + inst.ReplaceUsesWithAndRemove(inst.Arg(0)); } else { - inst.ReplaceUsesWith(IR::Value{false}); + inst.ReplaceUsesWithAndRemove(IR::Value{false}); } } } @@ -162,7 +162,7 @@ void FoldLogicalAnd(IR::Inst& inst) { void FoldSelect(IR::Inst& inst) { const IR::Value cond{inst.Arg(0)}; if (cond.IsImmediate()) { - inst.ReplaceUsesWith(cond.U1() ? inst.Arg(1) : inst.Arg(2)); + inst.ReplaceUsesWithAndRemove(cond.U1() ? inst.Arg(1) : inst.Arg(2)); } } @@ -173,9 +173,9 @@ void FoldLogicalOr(IR::Inst& inst) { const IR::Value rhs{inst.Arg(1)}; if (rhs.IsImmediate()) { if (rhs.U1()) { - inst.ReplaceUsesWith(IR::Value{true}); + inst.ReplaceUsesWithAndRemove(IR::Value{true}); } else { - inst.ReplaceUsesWith(inst.Arg(0)); + inst.ReplaceUsesWithAndRemove(inst.Arg(0)); } } } @@ -183,12 +183,12 @@ void FoldLogicalOr(IR::Inst& inst) { void FoldLogicalNot(IR::Inst& inst) { const IR::U1 value{inst.Arg(0)}; if (value.IsImmediate()) { - inst.ReplaceUsesWith(IR::Value{!value.U1()}); + inst.ReplaceUsesWithAndRemove(IR::Value{!value.U1()}); return; } IR::Inst* const arg{value.InstRecursive()}; if (arg->GetOpcode() == IR::Opcode::LogicalNot) { - inst.ReplaceUsesWith(arg->Arg(0)); + inst.ReplaceUsesWithAndRemove(arg->Arg(0)); } } @@ -199,7 +199,7 @@ void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) { } IR::Inst* const arg_inst{value.InstRecursive()}; if (arg_inst->GetOpcode() == reverse) { - inst.ReplaceUsesWith(arg_inst->Arg(0)); + inst.ReplaceUsesWithAndRemove(arg_inst->Arg(0)); return; } } @@ -211,7 +211,7 @@ void FoldAdd(IR::Block& block, IR::Inst& inst) { } const IR::Value rhs{inst.Arg(1)}; if (rhs.IsImmediate() && Arg(rhs) == 0) { - inst.ReplaceUsesWith(inst.Arg(0)); + inst.ReplaceUsesWithAndRemove(inst.Arg(0)); return; } } @@ -226,21 +226,58 @@ void FoldCmpClass(IR::Block& block, IR::Inst& inst) { } else if ((class_mask & IR::FloatClassFunc::Finite) == IR::FloatClassFunc::Finite) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; const IR::F32 value = IR::F32{inst.Arg(0)}; - inst.ReplaceUsesWith(ir.LogicalNot(ir.LogicalOr(ir.FPIsInf(value), ir.FPIsInf(value)))); + inst.ReplaceUsesWithAndRemove( + ir.LogicalNot(ir.LogicalOr(ir.FPIsInf(value), ir.FPIsInf(value)))); } else { UNREACHABLE(); } } -void FoldReadLane(IR::Inst& inst) { +void FoldReadLane(IR::Block& block, IR::Inst& inst) { const u32 lane = inst.Arg(1).U32(); IR::Inst* prod = inst.Arg(0).InstRecursive(); - while (prod->GetOpcode() == IR::Opcode::WriteLane) { - if (prod->Arg(2).U32() == lane) { - inst.ReplaceUsesWith(prod->Arg(1)); + + const auto search_chain = [lane](const IR::Inst* prod) -> IR::Value { + while (prod->GetOpcode() == IR::Opcode::WriteLane) { + if (prod->Arg(2).U32() == lane) { + return prod->Arg(1); + } + prod = prod->Arg(0).InstRecursive(); + } + return {}; + }; + + if (prod->GetOpcode() == IR::Opcode::WriteLane) { + if (const IR::Value value = search_chain(prod); !value.IsEmpty()) { + inst.ReplaceUsesWith(value); + } + return; + } + + if (prod->GetOpcode() == IR::Opcode::Phi) { + boost::container::small_vector phi_args; + for (size_t arg_index = 0; arg_index < prod->NumArgs(); ++arg_index) { + const IR::Inst* arg{prod->Arg(arg_index).InstRecursive()}; + if (arg->GetOpcode() != IR::Opcode::WriteLane) { + return; + } + const IR::Value value = search_chain(arg); + if (value.IsEmpty()) { + continue; + } + phi_args.emplace_back(value); + } + if (std::ranges::all_of(phi_args, [&](IR::Value value) { return value == phi_args[0]; })) { + inst.ReplaceUsesWith(phi_args[0]); return; } - prod = prod->Arg(0).InstRecursive(); + const auto insert_point = IR::Block::InstructionList::s_iterator_to(*prod); + IR::Inst* const new_phi{&*block.PrependNewInst(insert_point, IR::Opcode::Phi)}; + new_phi->SetFlags(IR::Type::U32); + for (size_t arg_index = 0; arg_index < phi_args.size(); arg_index++) { + new_phi->AddPhiOperand(prod->PhiBlock(arg_index), phi_args[arg_index]); + } + inst.ReplaceUsesWith(IR::Value{new_phi}); } } @@ -290,7 +327,7 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::SelectF64: return FoldSelect(inst); case IR::Opcode::ReadLane: - return FoldReadLane(inst); + return FoldReadLane(block, inst); case IR::Opcode::FPNeg32: FoldWhenAllImmediates(inst, [](f32 a) { return -a; }); return; diff --git a/src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp b/src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp index 76bfcf911..c109f3595 100644 --- a/src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp +++ b/src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp @@ -25,7 +25,7 @@ void LowerSharedMemToRegisters(IR::Program& program) { }); ASSERT(it != ds_writes.end()); // Replace data read with value written. - inst.ReplaceUsesWith((*it)->Arg(1)); + inst.ReplaceUsesWithAndRemove((*it)->Arg(1)); } } } diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index c1ff3d2f2..89c5c78a0 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -596,7 +596,7 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info, } return ir.ImageSampleImplicitLod(handle, coords, bias, offset, inst_info); }(); - inst.ReplaceUsesWith(new_inst); + inst.ReplaceUsesWithAndRemove(new_inst); } void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { diff --git a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp index df73c1bc8..1d252bee1 100644 --- a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp @@ -164,7 +164,6 @@ IR::Opcode UndefOpcode(const FlagTag) noexcept { enum class Status { Start, SetValue, - PreparePhiArgument, PushPhiArgument, }; @@ -253,12 +252,10 @@ public: IR::Inst* const phi{stack.back().phi}; phi->AddPhiOperand(*stack.back().pred_it, stack.back().result); ++stack.back().pred_it; - } - [[fallthrough]]; - case Status::PreparePhiArgument: prepare_phi_operand(); break; } + } } while (stack.size() > 1); return stack.back().result; } @@ -266,9 +263,7 @@ public: void SealBlock(IR::Block* block) { const auto it{incomplete_phis.find(block)}; if (it != incomplete_phis.end()) { - for (auto& pair : it->second) { - auto& variant{pair.first}; - auto& phi{pair.second}; + for (auto& [variant, phi] : it->second) { std::visit([&](auto& variable) { AddPhiOperands(variable, *phi, block); }, variant); } } @@ -289,7 +284,7 @@ private: const size_t num_args{phi.NumArgs()}; for (size_t arg_index = 0; arg_index < num_args; ++arg_index) { const IR::Value& op{phi.Arg(arg_index)}; - if (op.Resolve() == same.Resolve() || op == IR::Value{&phi}) { + if (op.Resolve() == same.Resolve() || op.Resolve() == IR::Value{&phi}) { // Unique value or self-reference continue; } @@ -314,9 +309,15 @@ private: ++reinsert_point; } // Reinsert the phi node and reroute all its uses to the "same" value + const auto users = phi.Uses(); list.insert(reinsert_point, phi); phi.ReplaceUsesWith(same); - // TODO: Try to recursively remove all phi users, which might have become trivial + // Try to recursively remove all phi users, which might have become trivial + for (const auto& [user, arg_index] : users) { + if (user->GetOpcode() == IR::Opcode::Phi) { + TryRemoveTrivialPhi(*user, user->GetParent(), undef_opcode); + } + } return same; } diff --git a/src/shader_recompiler/ir/value.h b/src/shader_recompiler/ir/value.h index 7e46747b9..dbe8b5cc4 100644 --- a/src/shader_recompiler/ir/value.h +++ b/src/shader_recompiler/ir/value.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -107,6 +108,16 @@ public: explicit TypedValue(IR::Inst* inst_) : TypedValue(Value(inst_)) {} }; +struct Use { + Inst* user; + u32 operand; + + Use() = default; + Use(Inst* user_, u32 operand_) : user(user_), operand(operand_) {} + Use(const Use&) = default; + bool operator==(const Use&) const noexcept = default; +}; + class Inst : public boost::intrusive::list_base_hook<> { public: explicit Inst(IR::Opcode op_, u32 flags_) noexcept; @@ -118,14 +129,22 @@ public: Inst& operator=(Inst&&) = delete; Inst(Inst&&) = delete; + IR::Block* GetParent() const { + ASSERT(parent); + return parent; + } + void SetParent(IR::Block* block) { + parent = block; + } + /// Get the number of uses this instruction has. [[nodiscard]] int UseCount() const noexcept { - return use_count; + return uses.size(); } /// Determines whether this instruction has uses or not. [[nodiscard]] bool HasUses() const noexcept { - return use_count > 0; + return uses.size() > 0; } /// Get the opcode this microinstruction represents. @@ -167,7 +186,13 @@ public: void Invalidate(); void ClearArgs(); - void ReplaceUsesWith(Value replacement); + void ReplaceUsesWithAndRemove(Value replacement) { + ReplaceUsesWith(replacement, false); + } + + void ReplaceUsesWith(Value replacement) { + ReplaceUsesWith(replacement, true); + } void ReplaceOpcode(IR::Opcode opcode); @@ -197,25 +222,32 @@ public: return std::bit_cast(definition); } + const auto Uses() const { + return uses; + } + private: struct NonTriviallyDummy { NonTriviallyDummy() noexcept {} }; - void Use(const Value& value); - void UndoUse(const Value& value); + void Use(Inst* used, u32 operand); + void UndoUse(Inst* used, u32 operand); + void ReplaceUsesWith(Value replacement, bool preserve); IR::Opcode op{}; - int use_count{}; u32 flags{}; u32 definition{}; + IR::Block* parent{}; union { NonTriviallyDummy dummy{}; boost::container::small_vector, 2> phi_args; std::array args; }; + + boost::container::list uses; }; -static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased"); +static_assert(sizeof(Inst) <= 160, "Inst size unintentionally increased"); using U1 = TypedValue; using U8 = TypedValue; @@ -373,4 +405,4 @@ template <> struct hash { std::size_t operator()(const Shader::IR::Value& v) const; }; -} // namespace std \ No newline at end of file +} // namespace std diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 4662def93..4c779a368 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -7,6 +7,7 @@ #include #include #include "common/types.h" +#include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/types.h" namespace Shader { @@ -105,6 +106,8 @@ struct FragmentRuntimeInfo { auto operator<=>(const PsInput&) const noexcept = default; }; + AmdGpu::Liverpool::PsInput en_flags; + AmdGpu::Liverpool::PsInput addr_flags; u32 num_inputs; std::array inputs; struct PsColorBuffer { @@ -117,6 +120,7 @@ struct FragmentRuntimeInfo { bool operator==(const FragmentRuntimeInfo& other) const noexcept { return std::ranges::equal(color_buffers, other.color_buffers) && + en_flags.raw == other.en_flags.raw && addr_flags.raw == other.addr_flags.raw && num_inputs == other.num_inputs && std::ranges::equal(inputs.begin(), inputs.begin() + num_inputs, other.inputs.begin(), other.inputs.begin() + num_inputs); diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index 740b89dda..2a3bd62f4 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -9,7 +9,6 @@ #include "frontend/fetch_shader.h" #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/info.h" -#include "shader_recompiler/ir/passes/srt.h" namespace Shader { @@ -22,8 +21,12 @@ struct VsAttribSpecialization { struct BufferSpecialization { u16 stride : 14; u16 is_storage : 1; + u32 size = 0; - auto operator<=>(const BufferSpecialization&) const = default; + bool operator==(const BufferSpecialization& other) const { + return stride == other.stride && is_storage == other.is_storage && + (size >= other.is_storage || is_storage); + } }; struct TextureBufferSpecialization { @@ -57,7 +60,7 @@ struct StageSpecialization { const Shader::Info* info; RuntimeInfo runtime_info; - Gcn::FetchShaderData fetch_shader_data{}; + std::optional fetch_shader_data{}; boost::container::small_vector vs_attribs; std::bitset bitset{}; boost::container::small_vector buffers; @@ -69,15 +72,14 @@ struct StageSpecialization { explicit StageSpecialization(const Info& info_, RuntimeInfo runtime_info_, const Profile& profile_, Backend::Bindings start_) : info{&info_}, runtime_info{runtime_info_}, start{start_} { - if (const auto fetch_shader = Gcn::ParseFetchShader(info_)) { - fetch_shader_data = *fetch_shader; - if (info_.stage == Stage::Vertex && !profile_.support_legacy_vertex_attributes) { - // Specialize shader on VS input number types to follow spec. - ForEachSharp(vs_attribs, fetch_shader_data.attributes, - [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { - spec.num_class = AmdGpu::GetNumberClass(sharp.GetNumberFmt()); - }); - } + fetch_shader_data = Gcn::ParseFetchShader(info_); + if (info_.stage == Stage::Vertex && fetch_shader_data && + !profile_.support_legacy_vertex_attributes) { + // Specialize shader on VS input number types to follow spec. + ForEachSharp(vs_attribs, fetch_shader_data->attributes, + [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { + spec.num_class = AmdGpu::GetNumberClass(sharp.GetNumberFmt()); + }); } u32 binding{}; if (info->has_readconst) { @@ -87,6 +89,9 @@ struct StageSpecialization { [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { spec.stride = sharp.GetStride(); spec.is_storage = desc.IsStorage(sharp); + if (!spec.is_storage) { + spec.size = sharp.GetSize(); + } }); ForEachSharp(binding, tex_buffers, info->texture_buffers, [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { diff --git a/src/shadps4.qrc b/src/shadps4.qrc index e328f2c42..30f234ed8 100644 --- a/src/shadps4.qrc +++ b/src/shadps4.qrc @@ -25,5 +25,10 @@ images/flag_us.png images/flag_world.png images/flag_china.png + images/github.png + images/discord.png + images/ko-fi.png + images/youtube.png + images/website.png diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index f7b710edd..8db2d63c4 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -46,7 +46,7 @@ Liverpool::~Liverpool() { } void Liverpool::Process(std::stop_token stoken) { - Common::SetCurrentThreadName("shadPS4:GPU_CommandProcessor"); + Common::SetCurrentThreadName("shadPS4:GpuCommandProcessor"); while (!stoken.stop_requested()) { { @@ -161,6 +161,19 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span ccb) { } break; } + case PM4ItOpcode::IndirectBufferConst: { + const auto* indirect_buffer = reinterpret_cast(header); + auto task = + ProcessCeUpdate({indirect_buffer->Address(), indirect_buffer->ib_size}); + while (!task.handle.done()) { + task.handle.resume(); + + TracyFiberLeave; + co_yield {}; + TracyFiberEnter(ccb_task_name); + }; + break; + } default: const u32 count = header->type3.NumWords(); UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}", @@ -552,7 +565,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); - if (dma_data->dst_addr_lo == 0x3022C) { + if (dma_data->dst_addr_lo == 0x3022C || !rasterizer) { break; } if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Gds) { @@ -597,6 +610,17 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); break; } + case PM4ItOpcode::Rewind: { + const PM4CmdRewind* rewind = reinterpret_cast(header); + while (!rewind->Valid()) { + mapped_queues[GfxQueueId].cs_state = regs.cs_program; + TracyFiberLeave; + co_yield {}; + TracyFiberEnter(dcb_task_name); + regs.cs_program = mapped_queues[GfxQueueId].cs_state; + } + break; + } case PM4ItOpcode::WaitRegMem: { const auto* wait_reg_mem = reinterpret_cast(header); // ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me); @@ -617,6 +641,19 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); + auto task = ProcessGraphics( + {indirect_buffer->Address(), indirect_buffer->ib_size}, {}); + while (!task.handle.done()) { + task.handle.resume(); + + TracyFiberLeave; + co_yield {}; + TracyFiberEnter(dcb_task_name); + }; + break; + } case PM4ItOpcode::IncrementDeCounter: { ++cblock.de_count; break; @@ -687,7 +724,7 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { } case PM4ItOpcode::DmaData: { const auto* dma_data = reinterpret_cast(header); - if (dma_data->dst_addr_lo == 0x3022C) { + if (dma_data->dst_addr_lo == 0x3022C || !rasterizer) { break; } if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Gds) { @@ -702,7 +739,7 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { false); } else if (dma_data->src_sel == DmaDataSrc::Gds && dma_data->dst_sel == DmaDataDst::Memory) { - LOG_WARNING(Render_Vulkan, "GDS memory read"); + // LOG_WARNING(Render_Vulkan, "GDS memory read"); } else if (dma_data->src_sel == DmaDataSrc::Memory && dma_data->dst_sel == DmaDataDst::Memory) { rasterizer->InlineData(dma_data->DstAddress(), @@ -717,6 +754,17 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { case PM4ItOpcode::AcquireMem: { break; } + case PM4ItOpcode::Rewind: { + const PM4CmdRewind* rewind = reinterpret_cast(header); + while (!rewind->Valid()) { + mapped_queues[vqid].cs_state = regs.cs_program; + TracyFiberLeave; + co_yield {}; + TracyFiberEnter(acb_task_name); + regs.cs_program = mapped_queues[vqid].cs_state; + } + break; + } case PM4ItOpcode::SetShReg: { const auto* set_data = reinterpret_cast(header); std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2, diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 2b2f2c00a..ca3b01612 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -1071,6 +1071,28 @@ struct Liverpool { BitField<27, 1, u32> enable_postz_overrasterization; }; + union PsInput { + u32 raw; + struct { + u32 persp_sample_ena : 1; + u32 persp_center_ena : 1; + u32 persp_centroid_ena : 1; + u32 persp_pull_model_ena : 1; + u32 linear_sample_ena : 1; + u32 linear_center_ena : 1; + u32 linear_centroid_ena : 1; + u32 line_stipple_tex_ena : 1; + u32 pos_x_float_ena : 1; + u32 pos_y_float_ena : 1; + u32 pos_z_float_ena : 1; + u32 pos_w_float_ena : 1; + u32 front_face_ena : 1; + u32 ancillary_ena : 1; + u32 sample_coverage_ena : 1; + u32 pos_fixed_pt_ena : 1; + }; + }; + union Regs { struct { INSERT_PADDING_WORDS(0x2C08); @@ -1126,7 +1148,10 @@ struct Liverpool { INSERT_PADDING_WORDS(0xA191 - 0xA187); std::array ps_inputs; VsOutputConfig vs_output_config; - INSERT_PADDING_WORDS(4); + INSERT_PADDING_WORDS(1); + PsInput ps_input_ena; + PsInput ps_input_addr; + INSERT_PADDING_WORDS(1); BitField<0, 6, u32> num_interp; INSERT_PADDING_WORDS(0xA1C3 - 0xA1B6 - 1); ShaderPosFormat shader_pos_format; @@ -1388,6 +1413,8 @@ static_assert(GFX6_3D_REG_INDEX(viewports) == 0xA10F); static_assert(GFX6_3D_REG_INDEX(clip_user_data) == 0xA16F); static_assert(GFX6_3D_REG_INDEX(ps_inputs) == 0xA191); static_assert(GFX6_3D_REG_INDEX(vs_output_config) == 0xA1B1); +static_assert(GFX6_3D_REG_INDEX(ps_input_ena) == 0xA1B3); +static_assert(GFX6_3D_REG_INDEX(ps_input_addr) == 0xA1B4); static_assert(GFX6_3D_REG_INDEX(num_interp) == 0xA1B6); static_assert(GFX6_3D_REG_INDEX(shader_pos_format) == 0xA1C3); static_assert(GFX6_3D_REG_INDEX(z_export_format) == 0xA1C4); diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index be6751285..238e09fad 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -418,6 +418,19 @@ struct PM4DmaData { } }; +struct PM4CmdRewind { + PM4Type3Header header; + union { + u32 raw; + BitField<24, 1, u32> offload_enable; ///< Enable offload polling valid bit to IQ + BitField<31, 1, u32> valid; ///< Set when subsequent packets are valid + }; + + bool Valid() const { + return valid; + } +}; + struct PM4CmdWaitRegMem { enum class Engine : u32 { Me = 0u, Pfp = 1u }; enum class MemSpace : u32 { Register = 0u, Memory = 1u }; diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index a78a68391..ba87425f2 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -420,11 +420,11 @@ struct Sampler { } float MinLod() const noexcept { - return static_cast(min_lod); + return static_cast(min_lod.Value()) / 256.0f; } float MaxLod() const noexcept { - return static_cast(max_lod); + return static_cast(max_lod.Value()) / 256.0f; } }; diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 1abdb230b..e9fc06493 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -360,7 +360,8 @@ std::pair BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b return {&buffer, buffer.Offset(device_addr)}; } -std::pair BufferCache::ObtainViewBuffer(VAddr gpu_addr, u32 size) { +std::pair BufferCache::ObtainViewBuffer(VAddr gpu_addr, u32 size, bool prefer_gpu) { + // Check if any buffer contains the full requested range. const u64 page = gpu_addr >> CACHING_PAGEBITS; const BufferId buffer_id = page_table[page]; if (buffer_id) { @@ -370,6 +371,13 @@ std::pair BufferCache::ObtainViewBuffer(VAddr gpu_addr, u32 size) return {&buffer, buffer.Offset(gpu_addr)}; } } + // If no buffer contains the full requested range but some buffer within was GPU-modified, + // fall back to ObtainBuffer to create a full buffer and avoid losing GPU modifications. + // This is only done if the request prefers to use GPU memory, otherwise we can skip it. + if (prefer_gpu && memory_tracker.IsRegionGpuModified(gpu_addr, size)) { + return ObtainBuffer(gpu_addr, size, false, false); + } + // In all other cases, just do a CPU copy to the staging buffer. const u32 offset = staging_buffer.Copy(gpu_addr, size, 16); return {&staging_buffer, offset}; } diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index b1bf77f8a..e62913413 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -42,7 +42,7 @@ public: struct Traits { using Entry = BufferId; - static constexpr size_t AddressSpaceBits = 39; + static constexpr size_t AddressSpaceBits = 40; static constexpr size_t FirstLevelBits = 14; static constexpr size_t PageBits = CACHING_PAGEBITS; }; @@ -96,7 +96,8 @@ public: BufferId buffer_id = {}); /// Attempts to obtain a buffer without modifying the cache contents. - [[nodiscard]] std::pair ObtainViewBuffer(VAddr gpu_addr, u32 size); + [[nodiscard]] std::pair ObtainViewBuffer(VAddr gpu_addr, u32 size, + bool prefer_gpu); /// Return true when a region is registered on the cache [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); diff --git a/src/video_core/buffer_cache/memory_tracker_base.h b/src/video_core/buffer_cache/memory_tracker_base.h index 375701c4c..a59bcfff5 100644 --- a/src/video_core/buffer_cache/memory_tracker_base.h +++ b/src/video_core/buffer_cache/memory_tracker_base.h @@ -14,7 +14,7 @@ namespace VideoCore { class MemoryTracker { public: - static constexpr size_t MAX_CPU_PAGE_BITS = 39; + static constexpr size_t MAX_CPU_PAGE_BITS = 40; static constexpr size_t HIGHER_PAGE_BITS = 22; static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; diff --git a/src/video_core/page_manager.cpp b/src/video_core/page_manager.cpp index d26a7067a..556555c25 100644 --- a/src/video_core/page_manager.cpp +++ b/src/video_core/page_manager.cpp @@ -29,10 +29,10 @@ namespace VideoCore { constexpr size_t PAGESIZE = 4_KB; constexpr size_t PAGEBITS = 12; -#if ENABLE_USERFAULTFD +#ifdef ENABLE_USERFAULTFD struct PageManager::Impl { Impl(Vulkan::Rasterizer* rasterizer_) : rasterizer{rasterizer_} { - uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); + uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY); ASSERT_MSG(uffd != -1, "{}", Common::GetLastErrorMsg()); // Request uffdio features from kernel. @@ -114,8 +114,7 @@ struct PageManager::Impl { // Notify rasterizer about the fault. const VAddr addr = msg.arg.pagefault.address; - const VAddr addr_page = GetPageAddr(addr); - rasterizer->InvalidateMemory(addr, addr_page, PAGESIZE); + rasterizer->InvalidateMemory(addr, 1); } } @@ -135,17 +134,14 @@ struct PageManager::Impl { } void OnMap(VAddr address, size_t size) { - owned_ranges += boost::icl::interval::right_open(address, address + size); + // No-op } void OnUnmap(VAddr address, size_t size) { - owned_ranges -= boost::icl::interval::right_open(address, address + size); + // No-op } void Protect(VAddr address, size_t size, bool allow_write) { - ASSERT_MSG(owned_ranges.find(address) != owned_ranges.end(), - "Attempted to track non-GPU memory at address {:#x}, size {:#x}.", address, - size); auto* memory = Core::Memory::Instance(); auto& impl = memory->GetAddressSpace(); impl.Protect(address, size, @@ -155,17 +151,13 @@ struct PageManager::Impl { static bool GuestFaultSignalHandler(void* context, void* fault_address) { const auto addr = reinterpret_cast(fault_address); - const bool is_write = Common::IsWriteError(context); - if (is_write && owned_ranges.find(addr) != owned_ranges.end()) { - const VAddr addr_aligned = GetPageAddr(addr); - rasterizer->InvalidateMemory(addr, addr_aligned, PAGESIZE); - return true; + if (Common::IsWriteError(context)) { + return rasterizer->InvalidateMemory(addr, 1); } return false; } inline static Vulkan::Rasterizer* rasterizer; - inline static boost::icl::interval_set owned_ranges; }; #endif @@ -210,6 +202,9 @@ void PageManager::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) { const VAddr interval_start_addr = boost::icl::first(interval) << PageShift; const VAddr interval_end_addr = boost::icl::last_next(interval) << PageShift; const u32 interval_size = interval_end_addr - interval_start_addr; + ASSERT_MSG(rasterizer->IsMapped(interval_start_addr, interval_size), + "Attempted to track non-GPU memory at address {:#x}, size {:#x}.", + interval_start_addr, interval_size); if (delta > 0 && count == delta) { impl->Protect(interval_start_addr, interval_size, false); } else if (delta < 0 && count == -delta) { diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 2262a429a..f0f7d352c 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -6,7 +6,7 @@ #include "video_core/amdgpu/pixel_format.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" -#include +#include #define INVALID_NUMBER_FORMAT_COMBO \ LOG_ERROR(Render_Vulkan, "Unsupported number type {} for format {}", number_type, format); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 09d4e4195..8d495ab06 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -13,7 +13,7 @@ namespace Vulkan { ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler_, DescriptorHeap& desc_heap_, vk::PipelineCache pipeline_cache, - u64 compute_key_, const Shader::Info& info_, + ComputePipelineKey compute_key_, const Shader::Info& info_, vk::ShaderModule module) : Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache, true}, compute_key{compute_key_} { auto& info = stages[int(Shader::Stage::Compute)]; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index ca429b58d..1c28e461c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -17,15 +17,33 @@ class Instance; class Scheduler; class DescriptorHeap; +struct ComputePipelineKey { + size_t value; + + friend bool operator==(const ComputePipelineKey& lhs, const ComputePipelineKey& rhs) { + return lhs.value == rhs.value; + } + friend bool operator!=(const ComputePipelineKey& lhs, const ComputePipelineKey& rhs) { + return !(lhs == rhs); + } +}; + class ComputePipeline : public Pipeline { public: ComputePipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap, - vk::PipelineCache pipeline_cache, u64 compute_key, const Shader::Info& info, - vk::ShaderModule module); + vk::PipelineCache pipeline_cache, ComputePipelineKey compute_key, + const Shader::Info& info, vk::ShaderModule module); ~ComputePipeline(); private: - u64 compute_key; + ComputePipelineKey compute_key; }; } // namespace Vulkan + +template <> +struct std::hash { + std::size_t operator()(const Vulkan::ComputePipelineKey& key) const noexcept { + return std::hash{}(key.value); + } +}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 91ffe4ea4..2834fceb7 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -1,6 +1,8 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#pragma once + #include #include "common/types.h" diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 49e4987db..81784eb60 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -70,8 +70,9 @@ std::unordered_map GetFormatProperties( static constexpr std::array misc_formats = { vk::Format::eA2R10G10B10UnormPack32, vk::Format::eA8B8G8R8UnormPack32, vk::Format::eA8B8G8R8SrgbPack32, vk::Format::eB8G8R8A8Unorm, - vk::Format::eB8G8R8A8Srgb, vk::Format::eR5G6B5UnormPack16, - vk::Format::eD24UnormS8Uint, + vk::Format::eB8G8R8A8Snorm, vk::Format::eB8G8R8A8Uint, + vk::Format::eB8G8R8A8Sint, vk::Format::eB8G8R8A8Srgb, + vk::Format::eR5G6B5UnormPack16, vk::Format::eD24UnormS8Uint, }; for (const auto& format : misc_formats) { if (!format_properties.contains(format)) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 47713f0ff..b9f318f7a 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -123,6 +123,8 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) { } case Shader::Stage::Fragment: { BuildCommon(regs.ps_program); + info.fs_info.en_flags = regs.ps_input_ena; + info.fs_info.addr_flags = regs.ps_input_addr; const auto& ps_inputs = regs.ps_inputs; info.fs_info.num_inputs = regs.num_interp; for (u32 i = 0; i < regs.num_interp; i++) { @@ -187,10 +189,19 @@ const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() { } const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key); if (is_new) { - it.value() = graphics_pipeline_pool.Create(instance, scheduler, desc_heap, graphics_key, - *pipeline_cache, infos, fetch_shader, modules); + it.value() = + std::make_unique(instance, scheduler, desc_heap, graphics_key, + *pipeline_cache, infos, fetch_shader, modules); + if (Config::collectShadersForDebug()) { + for (auto stage = 0; stage < MaxShaderStages; ++stage) { + if (infos[stage]) { + auto& m = modules[stage]; + module_related_pipelines[m].emplace_back(graphics_key); + } + } + } } - return it->second; + return it->second.get(); } const ComputePipeline* PipelineCache::GetComputePipeline() { @@ -199,10 +210,14 @@ const ComputePipeline* PipelineCache::GetComputePipeline() { } const auto [it, is_new] = compute_pipelines.try_emplace(compute_key); if (is_new) { - it.value() = compute_pipeline_pool.Create(instance, scheduler, desc_heap, *pipeline_cache, - compute_key, *infos[0], modules[0]); + it.value() = std::make_unique( + instance, scheduler, desc_heap, *pipeline_cache, compute_key, *infos[0], modules[0]); + if (Config::collectShadersForDebug()) { + auto& m = modules[0]; + module_related_pipelines[m].emplace_back(compute_key); + } } - return it->second; + return it->second.get(); } bool PipelineCache::RefreshGraphicsKey() { @@ -264,7 +279,7 @@ bool PipelineCache::RefreshGraphicsKey() { // recompiler. for (auto cb = 0u, remapped_cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { auto const& col_buf = regs.color_buffers[cb]; - if (skip_cb_binding || !col_buf) { + if (skip_cb_binding || !col_buf || !regs.color_target_mask.GetMask(cb)) { continue; } const auto base_format = @@ -279,6 +294,8 @@ bool PipelineCache::RefreshGraphicsKey() { ++remapped_cb; } + fetch_shader = std::nullopt; + Shader::Backend::Bindings binding{}; const auto& TryBindStageRemap = [&](Shader::Stage stage_in, Shader::Stage stage_out) -> bool { const auto stage_in_idx = static_cast(stage_in); @@ -368,7 +385,8 @@ bool PipelineCache::RefreshGraphicsKey() { // Second pass to fill remain CB pipeline key data for (auto cb = 0u, remapped_cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { auto const& col_buf = regs.color_buffers[cb]; - if (skip_cb_binding || !col_buf || (key.mrt_mask & (1u << cb)) == 0) { + if (skip_cb_binding || !col_buf || !regs.color_target_mask.GetMask(cb) || + (key.mrt_mask & (1u << cb)) == 0) { key.color_formats[cb] = vk::Format::eUndefined; key.mrt_swizzles[cb] = Liverpool::ColorBuffer::SwapMode::Standard; continue; @@ -397,7 +415,7 @@ bool PipelineCache::RefreshComputeKey() { Shader::Backend::Bindings binding{}; const auto* cs_pgm = &liverpool->regs.cs_program; const auto cs_params = Liverpool::GetParams(*cs_pgm); - std::tie(infos[0], modules[0], fetch_shader, compute_key) = + std::tie(infos[0], modules[0], fetch_shader, compute_key.value) = GetProgram(Shader::Stage::Compute, cs_params, binding); return true; } @@ -413,17 +431,23 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, const auto ir_program = Shader::TranslateProgram(code, pools, info, runtime_info, profile); auto spv = Shader::Backend::SPIRV::EmitSPIRV(profile, runtime_info, ir_program, binding); DumpShader(spv, info.pgm_hash, info.stage, perm_idx, "spv"); + + vk::ShaderModule module; + auto patch = GetShaderPatch(info.pgm_hash, info.stage, perm_idx, "spv"); - if (patch) { - spv = *patch; + const bool is_patched = patch && Config::patchShaders(); + if (is_patched) { LOG_INFO(Loader, "Loaded patch for {} shader {:#x}", info.stage, info.pgm_hash); + module = CompileSPV(*patch, instance.GetDevice()); + } else { + module = CompileSPV(spv, instance.GetDevice()); } - const auto module = CompileSPV(spv, instance.GetDevice()); - const auto name = fmt::format("{}_{:#x}_{}", info.stage, info.pgm_hash, perm_idx); + const auto name = fmt::format("{}_{:#018x}_{}", info.stage, info.pgm_hash, perm_idx); Vulkan::SetObjectName(instance.GetDevice(), module, name); if (Config::collectShadersForDebug()) { - DebugState.CollectShader(name, spv, code); + DebugState.CollectShader(name, module, spv, code, patch ? *patch : std::span{}, + is_patched); } return module; } @@ -434,17 +458,17 @@ PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params, const auto runtime_info = BuildRuntimeInfo(stage); auto [it_pgm, new_program] = program_cache.try_emplace(params.hash); if (new_program) { - Program* program = program_pool.Create(stage, params); + it_pgm.value() = std::make_unique(stage, params); + auto& program = it_pgm.value(); auto start = binding; const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding); const auto spec = Shader::StageSpecialization(program->info, runtime_info, profile, start); program->AddPermut(module, std::move(spec)); - it_pgm.value() = program; return std::make_tuple(&program->info, module, spec.fetch_shader_data, HashCombine(params.hash, 0)); } - Program* program = it_pgm->second; + auto& program = it_pgm.value(); auto& info = program->info; info.RefreshFlatBuf(); const auto spec = Shader::StageSpecialization(info, runtime_info, profile, binding); @@ -465,6 +489,34 @@ PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params, HashCombine(params.hash, perm_idx)); } +std::optional PipelineCache::ReplaceShader(vk::ShaderModule module, + std::span spv_code) { + std::optional new_module{}; + for (const auto& [_, program] : program_cache) { + for (auto& m : program->modules) { + if (m.module == module) { + const auto& d = instance.GetDevice(); + d.destroyShaderModule(m.module); + m.module = CompileSPV(spv_code, d); + new_module = m.module; + } + } + } + if (module_related_pipelines.contains(module)) { + auto& pipeline_keys = module_related_pipelines[module]; + for (auto& key : pipeline_keys) { + if (std::holds_alternative(key)) { + auto& graphics_key = std::get(key); + graphics_pipelines.erase(graphics_key); + } else if (std::holds_alternative(key)) { + auto& compute_key = std::get(key); + compute_pipelines.erase(compute_key); + } + } + } + return new_module; +} + void PipelineCache::DumpShader(std::span code, u64 hash, Shader::Stage stage, size_t perm_idx, std::string_view ext) { if (!Config::dumpShaders()) { @@ -484,9 +536,6 @@ void PipelineCache::DumpShader(std::span code, u64 hash, Shader::Stag std::optional> PipelineCache::GetShaderPatch(u64 hash, Shader::Stage stage, size_t perm_idx, std::string_view ext) { - if (!Config::patchShaders()) { - return {}; - } using namespace Common::FS; const auto patch_dir = GetUserPath(PathType::ShaderDir) / "patch"; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index e4a8abd4f..c5c2fc98e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -3,6 +3,7 @@ #pragma once +#include #include #include "shader_recompiler/profile.h" #include "shader_recompiler/recompiler.h" @@ -11,6 +12,13 @@ #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_resource_pool.h" +template <> +struct std::hash { + std::size_t operator()(const vk::ShaderModule& module) const noexcept { + return std::hash{}(reinterpret_cast((VkShaderModule)module)); + } +}; + namespace Shader { struct Info; } @@ -52,6 +60,9 @@ public: GetProgram(Shader::Stage stage, Shader::ShaderParams params, Shader::Backend::Bindings& binding); + std::optional ReplaceShader(vk::ShaderModule module, + std::span spv_code); + private: bool RefreshGraphicsKey(); bool RefreshComputeKey(); @@ -74,17 +85,19 @@ private: vk::UniquePipelineLayout pipeline_layout; Shader::Profile profile{}; Shader::Pools pools; - tsl::robin_map program_cache; - Common::ObjectPool program_pool; - Common::ObjectPool graphics_pipeline_pool; - Common::ObjectPool compute_pipeline_pool; - tsl::robin_map compute_pipelines; - tsl::robin_map graphics_pipelines; + tsl::robin_map> program_cache; + tsl::robin_map> compute_pipelines; + tsl::robin_map> graphics_pipelines; std::array infos{}; std::array modules{}; std::optional fetch_shader{}; GraphicsPipelineKey graphics_key{}; - u64 compute_key{}; + ComputePipelineKey compute_key{}; + + // Only if Config::collectShadersForDebug() + tsl::robin_map>> + module_related_pipelines; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp index b2a50cd44..2e717397b 100644 --- a/src/video_core/renderer_vulkan/vk_platform.cpp +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -22,7 +22,7 @@ #include "video_core/renderer_vulkan/vk_platform.h" #if VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL -static vk::DynamicLoader dl; +static vk::detail::DynamicLoader dl; #else extern "C" { VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetInstanceProcAddr(VkInstance instance, diff --git a/src/video_core/renderer_vulkan/vk_presenter.h b/src/video_core/renderer_vulkan/vk_presenter.h index 4d9226dec..4c29af0f0 100644 --- a/src/video_core/renderer_vulkan/vk_presenter.h +++ b/src/video_core/renderer_vulkan/vk_presenter.h @@ -53,6 +53,10 @@ public: return pp_settings.gamma; } + Frontend::WindowSDL& GetWindow() const { + return window; + } + Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address, bool is_eop) { auto desc = VideoCore::TextureCache::VideoOutDesc{attribute, cpu_address}; @@ -90,6 +94,10 @@ public: draw_scheduler.Flush(info); } + Rasterizer& GetRasterizer() const { + return *rasterizer.get(); + } + private: void CreatePostProcessPipeline(); Frame* PrepareFrameInternal(VideoCore::ImageId image_id, bool is_eop = true); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 084b7c345..bfcdc9538 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -8,6 +8,7 @@ #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_hle.h" #include "video_core/texture_cache/image_view.h" #include "video_core/texture_cache/texture_cache.h" #include "vk_rasterizer.h" @@ -102,8 +103,13 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { continue; } - const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress()); - texture_cache.TouchMeta(col_buf.CmaskAddress(), false); + // If the color buffer is still bound but rendering to it is disabled by the target + // mask, we need to prevent the render area from being affected by unbound render target + // extents. + if (!regs.color_target_mask.GetMask(col_buf_id)) { + state.color_attachments[state.num_color_attachments++].imageView = VK_NULL_HANDLE; + continue; + } const auto& hint = liverpool->last_cb_extent[col_buf_id]; auto& [image_id, desc] = cb_descs.emplace_back(std::piecewise_construct, std::tuple{}, @@ -113,10 +119,13 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { auto& image = texture_cache.GetImage(image_id); image.binding.is_target = 1u; + const auto slice = image_view.info.range.base.layer; + const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress(), slice); + texture_cache.TouchMeta(col_buf.CmaskAddress(), slice, false); + const auto mip = image_view.info.range.base.level; state.width = std::min(state.width, std::max(image.info.size.width >> mip, 1u)); state.height = std::min(state.height, std::max(image.info.size.height >> mip, 1u)); - state.color_images[state.num_color_attachments] = image.image; state.color_attachments[state.num_color_attachments++] = { .imageView = *image_view.image_view, .imageLayout = vk::ImageLayout::eUndefined, @@ -134,8 +143,6 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { (regs.depth_control.stencil_enable && regs.depth_buffer.stencil_info.format != StencilFormat::Invalid))) { const auto htile_address = regs.depth_htile_data_base.GetAddress(); - const bool is_clear = regs.depth_render_control.depth_clear_enable || - texture_cache.IsMetaCleared(htile_address); const auto& hint = liverpool->last_db_extent; auto& [image_id, desc] = db_desc.emplace(std::piecewise_construct, std::tuple{}, @@ -146,9 +153,13 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { auto& image = texture_cache.GetImage(image_id); image.binding.is_target = 1u; + const auto slice = image_view.info.range.base.layer; + const bool is_clear = regs.depth_render_control.depth_clear_enable || + texture_cache.IsMetaCleared(htile_address, slice); + ASSERT(desc.view_info.range.extent.layers == 1); + state.width = std::min(state.width, image.info.size.width); state.height = std::min(state.height, image.info.size.height); - state.depth_image = image.image; state.depth_attachment = { .imageView = *image_view.image_view, .imageLayout = vk::ImageLayout::eUndefined, @@ -157,7 +168,7 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { .clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear, .stencil = regs.stencil_clear}}, }; - texture_cache.TouchMeta(htile_address, false); + texture_cache.TouchMeta(htile_address, slice, false); state.has_depth = regs.depth_buffer.z_info.format != AmdGpu::Liverpool::DepthBuffer::ZFormat::Invalid; state.has_stencil = regs.depth_buffer.stencil_info.format != @@ -167,6 +178,22 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { return state; } +[[nodiscard]] std::pair GetDrawOffsets( + const AmdGpu::Liverpool::Regs& regs, const Shader::Info& info, + const std::optional& fetch_shader) { + u32 vertex_offset = regs.index_offset; + u32 instance_offset = 0; + if (fetch_shader) { + if (vertex_offset == 0 && fetch_shader->vertex_offset_sgpr != -1) { + vertex_offset = info.user_data[fetch_shader->vertex_offset_sgpr]; + } + if (fetch_shader->instance_offset_sgpr != -1) { + instance_offset = info.user_data[fetch_shader->instance_offset_sgpr]; + } + } + return {vertex_offset, instance_offset}; +} + void Rasterizer::Draw(bool is_indexed, u32 index_offset) { RENDERER_TRACE; @@ -194,7 +221,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { BeginRendering(*pipeline, state); UpdateDynamicState(*pipeline); - const auto [vertex_offset, instance_offset] = fetch_shader->GetDrawOffsets(regs, vs_info); + const auto [vertex_offset, instance_offset] = GetDrawOffsets(regs, vs_info, fetch_shader); const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); @@ -292,18 +319,24 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 void Rasterizer::DispatchDirect() { RENDERER_TRACE; - const auto cmdbuf = scheduler.CommandBuffer(); const auto& cs_program = liverpool->regs.cs_program; const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline(); if (!pipeline) { return; } + const auto& cs = pipeline->GetStage(Shader::Stage::Compute); + if (ExecuteShaderHLE(cs, liverpool->regs, *this)) { + return; + } + if (!BindResources(pipeline)) { return; } scheduler.EndRendering(); + + const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle()); cmdbuf.dispatch(cs_program.dim_x, cs_program.dim_y, cs_program.dim_z); @@ -313,7 +346,6 @@ void Rasterizer::DispatchDirect() { void Rasterizer::DispatchIndirect(VAddr address, u32 offset, u32 size) { RENDERER_TRACE; - const auto cmdbuf = scheduler.CommandBuffer(); const auto& cs_program = liverpool->regs.cs_program; const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline(); if (!pipeline) { @@ -325,8 +357,11 @@ void Rasterizer::DispatchIndirect(VAddr address, u32 offset, u32 size) { } scheduler.EndRendering(); - cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle()); + const auto [buffer, base] = buffer_cache.ObtainBuffer(address + offset, size, false); + + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle()); cmdbuf.dispatchIndirect(buffer->Handle(), base); ResetBindings(); @@ -359,9 +394,11 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { // will need its full emulation anyways. For cases of metadata read a warning will be // logged. const auto IsMetaUpdate = [&](const auto& desc) { - const VAddr address = desc.GetSharp(info).base_address; + const auto sharp = desc.GetSharp(info); + const VAddr address = sharp.base_address; if (desc.is_written) { - if (texture_cache.TouchMeta(address, true)) { + // Assume all slices were updates + if (texture_cache.ClearMeta(address)) { LOG_TRACE(Render_Vulkan, "Metadata update skipped"); return true; } @@ -373,17 +410,36 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { return false; }; + // Assume if a shader reads and writes metas at the same time, it is a copy shader. + bool meta_read = false; for (const auto& desc : info.buffers) { if (desc.is_gds_buffer) { continue; } - if (IsMetaUpdate(desc)) { - return false; + if (!desc.is_written) { + const VAddr address = desc.GetSharp(info).base_address; + meta_read = texture_cache.IsMeta(address); } } + for (const auto& desc : info.texture_buffers) { - if (IsMetaUpdate(desc)) { - return false; + if (!desc.is_written) { + const VAddr address = desc.GetSharp(info).base_address; + meta_read = texture_cache.IsMeta(address); + } + } + + if (!meta_read) { + for (const auto& desc : info.buffers) { + if (IsMetaUpdate(desc)) { + return false; + } + } + + for (const auto& desc : info.texture_buffers) { + if (IsMetaUpdate(desc)) { + return false; + } } } } @@ -507,12 +563,13 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer( vsharp.base_address, vsharp.GetSize(), desc.is_written, true, buffer_id); const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3; - ASSERT_MSG(fmt_stride == vsharp.GetStride(), + const u32 buf_stride = vsharp.GetStride(); + ASSERT_MSG(buf_stride % fmt_stride == 0, "Texel buffer stride must match format stride"); const u32 offset_aligned = Common::AlignDown(offset, alignment); const u32 adjust = offset - offset_aligned; ASSERT(adjust % fmt_stride == 0); - push_data.AddOffset(binding.buffer, adjust / fmt_stride); + push_data.AddTexelOffset(binding.buffer, buf_stride / fmt_stride, adjust / fmt_stride); buffer_view = vk_buffer->View(offset_aligned, vsharp.GetSize() + adjust, desc.is_written, vsharp.GetDataFmt(), vsharp.GetNumberFmt()); @@ -665,7 +722,6 @@ void Rasterizer::BeginRendering(const GraphicsPipeline& pipeline, RenderState& s auto& image = texture_cache.GetImage(view.image_id); state.color_attachments[cb_index].imageView = *view.image_view; state.color_attachments[cb_index].imageLayout = image.last_state.layout; - state.color_images[cb_index] = image.image; const auto mip = view.info.range.base.level; state.width = std::min(state.width, std::max(image.info.size.width >> mip, 1u)); @@ -734,8 +790,10 @@ void Rasterizer::Resolve() { mrt0_hint}; VideoCore::TextureCache::RenderTargetDesc mrt1_desc{liverpool->regs.color_buffers[1], mrt1_hint}; - auto& mrt0_image = texture_cache.GetImage(texture_cache.FindImage(mrt0_desc)); - auto& mrt1_image = texture_cache.GetImage(texture_cache.FindImage(mrt1_desc)); + auto& mrt0_image = + texture_cache.GetImage(texture_cache.FindImage(mrt0_desc, VideoCore::FindFlags::ExactFmt)); + auto& mrt1_image = + texture_cache.GetImage(texture_cache.FindImage(mrt1_desc, VideoCore::FindFlags::ExactFmt)); VideoCore::SubresourceRange mrt0_range; mrt0_range.base.layer = liverpool->regs.color_buffers[0].view.slice_start; @@ -785,12 +843,27 @@ u32 Rasterizer::ReadDataFromGds(u32 gds_offset) { return value; } -void Rasterizer::InvalidateMemory(VAddr addr, VAddr addr_aligned, u64 size) { - buffer_cache.InvalidateMemory(addr_aligned, size); - texture_cache.InvalidateMemory(addr, addr_aligned, size); +bool Rasterizer::InvalidateMemory(VAddr addr, u64 size) { + if (!IsMapped(addr, size)) { + // Not GPU mapped memory, can skip invalidation logic entirely. + return false; + } + buffer_cache.InvalidateMemory(addr, size); + texture_cache.InvalidateMemory(addr, size); + return true; +} + +bool Rasterizer::IsMapped(VAddr addr, u64 size) { + if (size == 0) { + // There is no memory, so not mapped. + return false; + } + return mapped_ranges.find(boost::icl::interval::right_open(addr, addr + size)) != + mapped_ranges.end(); } void Rasterizer::MapMemory(VAddr addr, u64 size) { + mapped_ranges += boost::icl::interval::right_open(addr, addr + size); page_manager.OnGpuMap(addr, size); } @@ -798,6 +871,7 @@ void Rasterizer::UnmapMemory(VAddr addr, u64 size) { buffer_cache.InvalidateMemory(addr, size); texture_cache.UnmapMemory(addr, size); page_manager.OnGpuUnmap(addr, size); + mapped_ranges -= boost::icl::interval::right_open(addr, addr + size); } void Rasterizer::UpdateDynamicState(const GraphicsPipeline& pipeline) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index fe8aceba7..ec1b5e134 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -28,6 +28,14 @@ public: AmdGpu::Liverpool* liverpool); ~Rasterizer(); + [[nodiscard]] Scheduler& GetScheduler() noexcept { + return scheduler; + } + + [[nodiscard]] VideoCore::BufferCache& GetBufferCache() noexcept { + return buffer_cache; + } + [[nodiscard]] VideoCore::TextureCache& GetTextureCache() noexcept { return texture_cache; } @@ -46,7 +54,8 @@ public: void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); u32 ReadDataFromGds(u32 gsd_offset); - void InvalidateMemory(VAddr addr, VAddr addr_aligned, u64 size); + bool InvalidateMemory(VAddr addr, u64 size); + bool IsMapped(VAddr addr, u64 size); void MapMemory(VAddr addr, u64 size); void UnmapMemory(VAddr addr, u64 size); @@ -54,6 +63,10 @@ public: u64 Flush(); void Finish(); + PipelineCache& GetPipelineCache() { + return pipeline_cache; + } + private: RenderState PrepareRenderState(u32 mrt_mask); void BeginRendering(const GraphicsPipeline& pipeline, RenderState& state); @@ -88,6 +101,7 @@ private: VideoCore::TextureCache texture_cache; AmdGpu::Liverpool* liverpool; Core::MemoryManager* memory; + boost::icl::interval_set mapped_ranges; PipelineCache pipeline_cache; boost::container::static_vector< diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 1140bfbc2..cdd33745a 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -10,15 +10,17 @@ #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_resource_pool.h" +namespace tracy { +class VkCtxScope; +} + namespace Vulkan { class Instance; struct RenderState { std::array color_attachments{}; - std::array color_images{}; vk::RenderingAttachmentInfo depth_attachment{}; - vk::Image depth_image{}; u32 num_color_attachments{}; bool has_depth{}; bool has_stencil{}; diff --git a/src/video_core/renderer_vulkan/vk_shader_hle.cpp b/src/video_core/renderer_vulkan/vk_shader_hle.cpp new file mode 100644 index 000000000..df9d40f07 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_shader_hle.cpp @@ -0,0 +1,139 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/info.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_hle.h" + +#include "vk_rasterizer.h" + +namespace Vulkan { + +static constexpr u64 COPY_SHADER_HASH = 0xfefebf9f; + +bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs, + Rasterizer& rasterizer) { + auto& scheduler = rasterizer.GetScheduler(); + auto& buffer_cache = rasterizer.GetBufferCache(); + + // Copy shader defines three formatted buffers as inputs: control, source, and destination. + const auto ctl_buf_sharp = info.texture_buffers[0].GetSharp(info); + const auto src_buf_sharp = info.texture_buffers[1].GetSharp(info); + const auto dst_buf_sharp = info.texture_buffers[2].GetSharp(info); + const auto buf_stride = src_buf_sharp.GetStride(); + ASSERT(buf_stride == dst_buf_sharp.GetStride()); + + struct CopyShaderControl { + u32 dst_idx; + u32 src_idx; + u32 end; + }; + static_assert(sizeof(CopyShaderControl) == 12); + ASSERT(ctl_buf_sharp.GetStride() == sizeof(CopyShaderControl)); + const auto ctl_buf = reinterpret_cast(ctl_buf_sharp.base_address); + + static std::vector copies; + copies.clear(); + copies.reserve(regs.cs_program.dim_x); + + for (u32 i = 0; i < regs.cs_program.dim_x; i++) { + const auto& [dst_idx, src_idx, end] = ctl_buf[i]; + const u32 local_dst_offset = dst_idx * buf_stride; + const u32 local_src_offset = src_idx * buf_stride; + const u32 local_size = (end + 1) * buf_stride; + copies.emplace_back(local_src_offset, local_dst_offset, local_size); + } + + scheduler.EndRendering(); + + static constexpr vk::MemoryBarrier READ_BARRIER{ + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite, + }; + static constexpr vk::MemoryBarrier WRITE_BARRIER{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + }; + scheduler.CommandBuffer().pipelineBarrier( + vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, READ_BARRIER, {}, {}); + + static constexpr vk::DeviceSize MaxDistanceForMerge = 64_MB; + u32 batch_start = 0; + u32 batch_end = 1; + + while (batch_end < copies.size()) { + // Place first copy into the current batch + const auto& copy = copies[batch_start]; + auto src_offset_min = copy.srcOffset; + auto src_offset_max = copy.srcOffset + copy.size; + auto dst_offset_min = copy.dstOffset; + auto dst_offset_max = copy.dstOffset + copy.size; + + for (int i = batch_start + 1; i < copies.size(); i++) { + // Compute new src and dst bounds if we were to batch this copy + const auto [src_offset, dst_offset, size] = copies[i]; + auto new_src_offset_min = std::min(src_offset_min, src_offset); + auto new_src_offset_max = std::max(src_offset_max, src_offset + size); + if (new_src_offset_max - new_src_offset_min > MaxDistanceForMerge) { + continue; + } + + auto new_dst_offset_min = std::min(dst_offset_min, dst_offset); + auto new_dst_offset_max = std::max(dst_offset_max, dst_offset + size); + if (new_dst_offset_max - new_dst_offset_min > MaxDistanceForMerge) { + continue; + } + + // We can batch this copy + src_offset_min = new_src_offset_min; + src_offset_max = new_src_offset_max; + dst_offset_min = new_dst_offset_min; + dst_offset_max = new_dst_offset_max; + if (i != batch_end) { + std::swap(copies[i], copies[batch_end]); + } + ++batch_end; + } + + // Obtain buffers for the total source and destination ranges. + const auto [src_buf, src_buf_offset] = + buffer_cache.ObtainBuffer(src_buf_sharp.base_address + src_offset_min, + src_offset_max - src_offset_min, false, false); + const auto [dst_buf, dst_buf_offset] = + buffer_cache.ObtainBuffer(dst_buf_sharp.base_address + dst_offset_min, + dst_offset_max - dst_offset_min, true, false); + + // Apply found buffer base. + const auto vk_copies = std::span{copies}.subspan(batch_start, batch_end - batch_start); + for (auto& copy : vk_copies) { + copy.srcOffset = copy.srcOffset - src_offset_min + src_buf_offset; + copy.dstOffset = copy.dstOffset - dst_offset_min + dst_buf_offset; + } + + // Execute buffer copies. + LOG_TRACE(Render_Vulkan, "HLE buffer copy: src_size = {}, dst_size = {}", + src_offset_max - src_offset_min, dst_offset_max - dst_offset_min); + scheduler.CommandBuffer().copyBuffer(src_buf->Handle(), dst_buf->Handle(), vk_copies); + batch_start = batch_end; + ++batch_end; + } + + scheduler.CommandBuffer().pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {}); + + return true; +} + +bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs, + Rasterizer& rasterizer) { + switch (info.pgm_hash) { + case COPY_SHADER_HASH: + return ExecuteCopyShaderHLE(info, regs, rasterizer); + default: + return false; + } +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_hle.h b/src/video_core/renderer_vulkan/vk_shader_hle.h new file mode 100644 index 000000000..fda9b1735 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_shader_hle.h @@ -0,0 +1,20 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "video_core/amdgpu/liverpool.h" + +namespace Shader { +struct Info; +} + +namespace Vulkan { + +class Rasterizer; + +/// Attempts to execute a shader using HLE if possible. +bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs, + Rasterizer& rasterizer); + +} // namespace Vulkan diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index 3d5202ad6..e7e1ce1da 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -151,9 +151,10 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, // the texture cache should re-create the resource with the usage requested vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat | vk::ImageCreateFlagBits::eExtendedUsage}; - const bool can_be_cube = (info.type == vk::ImageType::e2D) && - (info.resources.layers % 6 == 0) && - (info.size.width == info.size.height); + const bool can_be_cube = + (info.type == vk::ImageType::e2D) && + ((info.props.is_pow2 ? (info.resources.layers % 8) : (info.resources.layers % 6)) == 0) && + (info.size.width == info.size.height); if (info.props.is_cube || can_be_cube) { flags |= vk::ImageCreateFlagBits::eCubeCompatible; } else if (info.props.is_volume) { diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 61cabdf11..61f1aaafe 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -58,11 +58,22 @@ bool IsIdentityMapping(u32 dst_sel, u32 num_components) { } vk::Format TrySwizzleFormat(vk::Format format, u32 dst_sel) { - if (format == vk::Format::eR8G8B8A8Unorm && dst_sel == 0b111100101110) { - return vk::Format::eB8G8R8A8Unorm; - } - if (format == vk::Format::eR8G8B8A8Srgb && dst_sel == 0b111100101110) { - return vk::Format::eB8G8R8A8Srgb; + // BGRA + if (dst_sel == 0b111100101110) { + switch (format) { + case vk::Format::eR8G8B8A8Unorm: + return vk::Format::eB8G8R8A8Unorm; + case vk::Format::eR8G8B8A8Snorm: + return vk::Format::eB8G8R8A8Snorm; + case vk::Format::eR8G8B8A8Uint: + return vk::Format::eB8G8R8A8Uint; + case vk::Format::eR8G8B8A8Sint: + return vk::Format::eB8G8R8A8Sint; + case vk::Format::eR8G8B8A8Srgb: + return vk::Format::eB8G8R8A8Srgb; + default: + break; + } } return format; } @@ -150,11 +161,12 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info if (!info.is_storage) { usage_ci.usage &= ~vk::ImageUsageFlagBits::eStorage; } - // When sampling D32 texture from shader, the T# specifies R32 Float format so adjust it. + // When sampling D32/D16 texture from shader, the T# specifies R32/R16 format so adjust it. vk::Format format = info.format; vk::ImageAspectFlags aspect = image.aspect_mask; if (image.aspect_mask & vk::ImageAspectFlagBits::eDepth && - (format == vk::Format::eR32Sfloat || format == vk::Format::eD32Sfloat)) { + (format == vk::Format::eR32Sfloat || format == vk::Format::eD32Sfloat || + format == vk::Format::eR16Unorm || format == vk::Format::eD16Unorm)) { format = image.info.pixel_format; aspect = vk::ImageAspectFlagBits::eDepth; } diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 4373fdc52..153314d2b 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -56,24 +56,27 @@ void TextureCache::MarkAsMaybeDirty(ImageId image_id, Image& image) { UntrackImage(image_id); } -void TextureCache::InvalidateMemory(VAddr addr, VAddr page_addr, size_t size) { +void TextureCache::InvalidateMemory(VAddr addr, size_t size) { std::scoped_lock lock{mutex}; - ForEachImageInRegion(page_addr, size, [&](ImageId image_id, Image& image) { + const auto end = addr + size; + const auto pages_start = PageManager::GetPageAddr(addr); + const auto pages_end = PageManager::GetNextPageAddr(addr + size - 1); + ForEachImageInRegion(pages_start, pages_end - pages_start, [&](ImageId image_id, Image& image) { const auto image_begin = image.info.guest_address; const auto image_end = image.info.guest_address + image.info.guest_size_bytes; - const auto page_end = page_addr + size; - if (image_begin <= addr && addr < image_end) { - // This image was definitely accessed by this page fault. - // Untrack image, so the range is unprotected and the guest can write freely + if (image_begin < end && addr < image_end) { + // Start or end of the modified region is in the image, or the image is entirely within + // the modified region, so the image was definitely accessed by this page fault. + // Untrack the image, so that the range is unprotected and the guest can write freely. image.flags |= ImageFlagBits::CpuDirty; UntrackImage(image_id); - } else if (page_end < image_end) { + } else if (pages_end < image_end) { // This page access may or may not modify the image. // We should not mark it as dirty now. If it really was modified // it will receive more invalidations on its other pages. // Remove tracking from this page only. UntrackImageHead(image_id); - } else if (image_begin < page_addr) { + } else if (image_begin < pages_start) { // This page access does not modify the image but the page should be untracked. // We should not mark this image as dirty now. If it really was modified // it will receive more invalidations on its other pages. @@ -321,6 +324,10 @@ ImageId TextureCache::FindImage(BaseDesc& desc, FindFlags flags) { !IsVulkanFormatCompatible(info.pixel_format, cache_image.info.pixel_format)) { continue; } + if (True(flags & FindFlags::ExactFmt) && + info.pixel_format != cache_image.info.pixel_format) { + continue; + } ASSERT((cache_image.info.type == info.type || info.size == Extent3D{1, 1, 1} || True(flags & FindFlags::RelaxFmt))); image_id = cache_id; @@ -345,9 +352,12 @@ ImageId TextureCache::FindImage(BaseDesc& desc, FindFlags flags) { } if (image_id) { - Image& image_resoved = slot_images[image_id]; - - if (image_resoved.info.resources < info.resources) { + Image& image_resolved = slot_images[image_id]; + if (True(flags & FindFlags::ExactFmt) && + info.pixel_format != image_resolved.info.pixel_format) { + // Cannot reuse this image as we need the exact requested format. + image_id = {}; + } else if (image_resolved.info.resources < info.resources) { // The image was clearly picked up wrong. FreeImage(image_id); image_id = {}; @@ -398,17 +408,15 @@ ImageView& TextureCache::FindRenderTarget(BaseDesc& desc) { // Register meta data for this color buffer if (!(image.flags & ImageFlagBits::MetaRegistered)) { if (desc.info.meta_info.cmask_addr) { - surface_metas.emplace( - desc.info.meta_info.cmask_addr, - MetaDataInfo{.type = MetaDataInfo::Type::CMask, .is_cleared = true}); + surface_metas.emplace(desc.info.meta_info.cmask_addr, + MetaDataInfo{.type = MetaDataInfo::Type::CMask}); image.info.meta_info.cmask_addr = desc.info.meta_info.cmask_addr; image.flags |= ImageFlagBits::MetaRegistered; } if (desc.info.meta_info.fmask_addr) { - surface_metas.emplace( - desc.info.meta_info.fmask_addr, - MetaDataInfo{.type = MetaDataInfo::Type::FMask, .is_cleared = true}); + surface_metas.emplace(desc.info.meta_info.fmask_addr, + MetaDataInfo{.type = MetaDataInfo::Type::FMask}); image.info.meta_info.fmask_addr = desc.info.meta_info.fmask_addr; image.flags |= ImageFlagBits::MetaRegistered; } @@ -428,9 +436,8 @@ ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) { // Register meta data for this depth buffer if (!(image.flags & ImageFlagBits::MetaRegistered)) { if (desc.info.meta_info.htile_addr) { - surface_metas.emplace( - desc.info.meta_info.htile_addr, - MetaDataInfo{.type = MetaDataInfo::Type::HTile, .is_cleared = true}); + surface_metas.emplace(desc.info.meta_info.htile_addr, + MetaDataInfo{.type = MetaDataInfo::Type::HTile}); image.info.meta_info.htile_addr = desc.info.meta_info.htile_addr; image.flags |= ImageFlagBits::MetaRegistered; } @@ -469,6 +476,9 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule const auto& num_mips = image.info.resources.levels; ASSERT(num_mips == image.info.mips_layout.size()); + const bool is_gpu_modified = True(image.flags & ImageFlagBits::GpuModified); + const bool is_gpu_dirty = True(image.flags & ImageFlagBits::GpuDirty); + boost::container::small_vector image_copy{}; for (u32 m = 0; m < num_mips; m++) { const u32 width = std::max(image.info.size.width >> m, 1u); @@ -478,8 +488,6 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule const auto& mip = image.info.mips_layout[m]; // Protect GPU modified resources from accidental CPU reuploads. - const bool is_gpu_modified = True(image.flags & ImageFlagBits::GpuModified); - const bool is_gpu_dirty = True(image.flags & ImageFlagBits::GpuDirty); if (is_gpu_modified && !is_gpu_dirty) { const u8* addr = std::bit_cast(image.info.guest_address); const u64 hash = XXH3_64bits(addr + mip.offset, mip.size); @@ -518,7 +526,8 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule const VAddr image_addr = image.info.guest_address; const size_t image_size = image.info.guest_size_bytes; - const auto [vk_buffer, buf_offset] = buffer_cache.ObtainViewBuffer(image_addr, image_size); + const auto [vk_buffer, buf_offset] = + buffer_cache.ObtainViewBuffer(image_addr, image_size, is_gpu_dirty); // The obtained buffer may be written by a shader so we need to emit a barrier to prevent RAW // hazard if (auto barrier = vk_buffer->GetBarrier(vk::AccessFlagBits2::eTransferRead, diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index fab4c832f..430415ed2 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -28,6 +28,7 @@ enum class FindFlags { RelaxDim = 1 << 1, ///< Do not check the dimentions of image, only address. RelaxSize = 1 << 2, ///< Do not check that the size matches exactly. RelaxFmt = 1 << 3, ///< Do not check that format is compatible. + ExactFmt = 1 << 4, ///< Require the format to be exactly the same. }; DECLARE_ENUM_FLAG_OPERATORS(FindFlags) @@ -95,7 +96,7 @@ public: ~TextureCache(); /// Invalidates any image in the logical page range. - void InvalidateMemory(VAddr addr, VAddr page_addr, size_t size); + void InvalidateMemory(VAddr addr, size_t size); /// Marks an image as dirty if it exists at the provided address. void InvalidateMemoryFromGPU(VAddr address, size_t max_size); @@ -156,18 +157,31 @@ public: return surface_metas.contains(address); } - bool IsMetaCleared(VAddr address) const { + bool IsMetaCleared(VAddr address, u32 slice) const { const auto& it = surface_metas.find(address); if (it != surface_metas.end()) { - return it.value().is_cleared; + return it.value().clear_mask & (1u << slice); } return false; } - bool TouchMeta(VAddr address, bool is_clear) { + bool ClearMeta(VAddr address) { auto it = surface_metas.find(address); if (it != surface_metas.end()) { - it.value().is_cleared = is_clear; + it.value().clear_mask = u32(-1); + return true; + } + return false; + } + + bool TouchMeta(VAddr address, u32 slice, bool is_clear) { + auto it = surface_metas.find(address); + if (it != surface_metas.end()) { + if (is_clear) { + it.value().clear_mask |= 1u << slice; + } else { + it.value().clear_mask &= ~(1u << slice); + } return true; } return false; @@ -280,7 +294,7 @@ private: HTile, }; Type type; - bool is_cleared; + u32 clear_mask{u32(-1)}; }; tsl::robin_map surface_metas; }; diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index 7430168d0..fc3d35e3e 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -14,7 +14,7 @@ #include "video_core/host_shaders/detile_m8x2_comp.h" #include -#include +#include #include namespace VideoCore { @@ -174,6 +174,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) { switch (format) { case vk::Format::eR8Unorm: return vk::Format::eR8Uint; + case vk::Format::eR4G4B4A4UnormPack16: case vk::Format::eR8G8Unorm: case vk::Format::eR16Sfloat: case vk::Format::eR16Unorm: @@ -392,7 +393,8 @@ std::pair TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o const auto* detiler = GetDetiler(image); if (!detiler) { if (image.info.tiling_mode != AmdGpu::TilingMode::Texture_MacroTiled && - image.info.tiling_mode != AmdGpu::TilingMode::Display_MacroTiled) { + image.info.tiling_mode != AmdGpu::TilingMode::Display_MacroTiled && + image.info.tiling_mode != AmdGpu::TilingMode::Depth_MacroTiled) { LOG_ERROR(Render_Vulkan, "Unsupported tiled image: {} ({})", vk::to_string(image.info.pixel_format), NameOf(image.info.tiling_mode)); }